feat(admin): add system observability dashboard with circuit breaker monitoring

Backend: expose circuit breaker status via new AdminObservabilityController
(health, circuit-breakers, redis endpoints). Frontend: new observability
feature in admin-client with auto-refreshing status cards.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-08 05:28:24 -08:00
parent 0d488ac68b
commit 85c78b0775
12 changed files with 374 additions and 2 deletions

View File

@ -11,6 +11,7 @@ import { ConversationsPage } from './features/conversations';
import { SettingsPage } from './features/settings';
import { TenantsPage } from './features/tenants';
import { McpPage } from './features/mcp';
import { ObservabilityPage } from './features/observability';
function App() {
return (
@ -37,6 +38,7 @@ function App() {
<Route path="conversations" element={<ConversationsPage />} />
<Route path="tenants" element={<TenantsPage />} />
<Route path="mcp" element={<McpPage />} />
<Route path="observability" element={<ObservabilityPage />} />
<Route path="settings" element={<SettingsPage />} />
</Route>

View File

@ -0,0 +1,6 @@
export {
useSystemHealth,
useCircuitBreakers,
useRedisStatus,
OBSERVABILITY_KEYS,
} from './useObservability';

View File

@ -0,0 +1,33 @@
import { useQuery } from '@tanstack/react-query';
import { observabilityApi } from '../infrastructure/observability.api';
export const OBSERVABILITY_KEYS = {
all: ['observability'] as const,
health: ['observability', 'health'] as const,
circuitBreakers: ['observability', 'circuit-breakers'] as const,
redis: ['observability', 'redis'] as const,
};
export function useSystemHealth() {
return useQuery({
queryKey: OBSERVABILITY_KEYS.health,
queryFn: () => observabilityApi.getHealth(),
refetchInterval: 30_000,
});
}
export function useCircuitBreakers() {
return useQuery({
queryKey: OBSERVABILITY_KEYS.circuitBreakers,
queryFn: () => observabilityApi.getCircuitBreakers(),
refetchInterval: 10_000,
});
}
export function useRedisStatus() {
return useQuery({
queryKey: OBSERVABILITY_KEYS.redis,
queryFn: () => observabilityApi.getRedisStatus(),
refetchInterval: 30_000,
});
}

View File

@ -0,0 +1,3 @@
export { ObservabilityPage } from './presentation/pages/ObservabilityPage';
export * from './application';
export type { CircuitBreakerStatus, SystemHealth, RedisStatus } from './infrastructure';

View File

@ -0,0 +1,2 @@
export { observabilityApi } from './observability.api';
export type { CircuitBreakerStatus, SystemHealth, RedisStatus } from './observability.api';

View File

@ -0,0 +1,40 @@
import api from '../../../shared/utils/api';
export interface CircuitBreakerStatus {
name: string;
state: 'CLOSED' | 'OPEN' | 'HALF_OPEN';
failureCount: number;
config: {
failureThreshold: number;
resetTimeoutMs: number;
};
}
export interface SystemHealth {
status: 'healthy' | 'degraded';
redis: { available: boolean };
circuitBreakers: CircuitBreakerStatus[];
timestamp: string;
}
export interface RedisStatus {
available: boolean;
purpose: string;
}
export const observabilityApi = {
getHealth: async (): Promise<SystemHealth> => {
const response = await api.get('/conversations/admin/observability/health');
return response.data.data;
},
getCircuitBreakers: async (): Promise<CircuitBreakerStatus[]> => {
const response = await api.get('/conversations/admin/observability/circuit-breakers');
return response.data.data;
},
getRedisStatus: async (): Promise<RedisStatus> => {
const response = await api.get('/conversations/admin/observability/redis');
return response.data.data;
},
};

View File

@ -0,0 +1,155 @@
import { Card, Row, Col, Tag, Typography, Spin, Progress, Descriptions, Badge, Button } from 'antd';
import {
CheckCircleOutlined,
CloseCircleOutlined,
ExclamationCircleOutlined,
ReloadOutlined,
DatabaseOutlined,
CloudServerOutlined,
} from '@ant-design/icons';
import { useQueryClient } from '@tanstack/react-query';
import { useSystemHealth, useCircuitBreakers, OBSERVABILITY_KEYS } from '../../application';
import type { CircuitBreakerStatus } from '../../infrastructure';
const { Title, Text } = Typography;
const STATE_CONFIG: Record<string, { color: string; icon: React.ReactNode; label: string }> = {
CLOSED: { color: 'success', icon: <CheckCircleOutlined />, label: '正常' },
OPEN: { color: 'error', icon: <CloseCircleOutlined />, label: '熔断中' },
HALF_OPEN: { color: 'warning', icon: <ExclamationCircleOutlined />, label: '探测恢复' },
};
function CircuitBreakerCard({ breaker }: { breaker: CircuitBreakerStatus }) {
const config = STATE_CONFIG[breaker.state] || STATE_CONFIG.CLOSED;
const failurePercent = Math.round((breaker.failureCount / breaker.config.failureThreshold) * 100);
return (
<Card
title={
<span>
<CloudServerOutlined className="mr-2" />
{breaker.name}
</span>
}
extra={<Tag color={config.color} icon={config.icon}>{config.label}</Tag>}
>
<div className="mb-4">
<Text type="secondary"> / </Text>
<Progress
percent={failurePercent}
format={() => `${breaker.failureCount} / ${breaker.config.failureThreshold}`}
status={breaker.state === 'OPEN' ? 'exception' : breaker.state === 'HALF_OPEN' ? 'active' : 'normal'}
/>
</div>
<Descriptions size="small" column={1}>
<Descriptions.Item label="熔断阈值">{breaker.config.failureThreshold} </Descriptions.Item>
<Descriptions.Item label="重置超时">{breaker.config.resetTimeoutMs / 1000}s</Descriptions.Item>
<Descriptions.Item label="当前状态">
<Badge status={breaker.state === 'CLOSED' ? 'success' : breaker.state === 'OPEN' ? 'error' : 'warning'} text={config.label} />
</Descriptions.Item>
</Descriptions>
</Card>
);
}
export function ObservabilityPage() {
const queryClient = useQueryClient();
const { data: health, isLoading: loadingHealth } = useSystemHealth();
const { data: breakers, isLoading: loadingBreakers } = useCircuitBreakers();
const handleRefresh = () => {
queryClient.invalidateQueries({ queryKey: OBSERVABILITY_KEYS.all });
};
return (
<div className="p-6">
<div className="flex justify-between items-center mb-6">
<Title level={4} className="!mb-0"></Title>
<Button icon={<ReloadOutlined />} onClick={handleRefresh}>
</Button>
</div>
{/* Health Overview */}
<Spin spinning={loadingHealth}>
<Row gutter={[16, 16]} className="mb-6">
<Col xs={24} sm={8}>
<Card>
<div className="text-center">
<div className="text-lg mb-2"></div>
{health?.status === 'healthy' ? (
<Tag color="success" icon={<CheckCircleOutlined />} className="text-base px-4 py-1">
</Tag>
) : (
<Tag color="error" icon={<ExclamationCircleOutlined />} className="text-base px-4 py-1">
</Tag>
)}
</div>
</Card>
</Col>
<Col xs={24} sm={8}>
<Card>
<div className="text-center">
<div className="text-lg mb-2">
<DatabaseOutlined className="mr-1" />
Redis
</div>
{health?.redis.available ? (
<Tag color="success" icon={<CheckCircleOutlined />} className="text-base px-4 py-1">
</Tag>
) : (
<Tag color="default" icon={<CloseCircleOutlined />} className="text-base px-4 py-1">
</Tag>
)}
</div>
</Card>
</Col>
<Col xs={24} sm={8}>
<Card>
<div className="text-center">
<div className="text-lg mb-2"></div>
<Text>
{health?.circuitBreakers.filter(cb => cb.state === 'CLOSED').length || 0}
{' / '}
{health?.circuitBreakers.length || 0}
{' '}
</Text>
</div>
</Card>
</Col>
</Row>
</Spin>
{/* Circuit Breaker Details */}
<Title level={5} className="mb-4"></Title>
<Spin spinning={loadingBreakers}>
<Row gutter={[16, 16]}>
{breakers?.map((breaker) => (
<Col xs={24} md={12} key={breaker.name}>
<CircuitBreakerCard breaker={breaker} />
</Col>
))}
{breakers?.length === 0 && (
<Col span={24}>
<Card>
<Text type="secondary"></Text>
</Card>
</Col>
)}
</Row>
</Spin>
{/* Auto-refresh indicator */}
<div className="mt-4 text-right">
<Text type="secondary" className="text-xs">
10 | : {health?.timestamp ? new Date(health.timestamp).toLocaleTimeString() : '-'}
</Text>
</div>
</div>
);
}

View File

@ -18,6 +18,7 @@ import {
MessageOutlined,
ClusterOutlined,
ApiOutlined,
MonitorOutlined,
} from '@ant-design/icons';
import { useAuth } from '../hooks/useAuth';
@ -82,6 +83,11 @@ const menuItems: MenuProps['items'] = [
icon: <ApiOutlined />,
label: 'MCP 服务器',
},
{
key: '/observability',
icon: <MonitorOutlined />,
label: '系统监控',
},
{
key: '/settings',
icon: <SettingOutlined />,

View File

@ -0,0 +1,99 @@
import {
Controller,
Get,
Headers,
UnauthorizedException,
} from '@nestjs/common';
import * as jwt from 'jsonwebtoken';
import { KnowledgeClientService } from '../../infrastructure/knowledge/knowledge-client.service';
import { PaymentClientService } from '../../infrastructure/payment/payment-client.service';
import { RedisClientService } from '../../infrastructure/cache/redis-client.service';
interface AdminPayload {
id: string;
username: string;
role: string;
}
/**
* API Redis
*/
@Controller('conversations/admin/observability')
export class AdminObservabilityController {
constructor(
private readonly knowledgeClient: KnowledgeClientService,
private readonly paymentClient: PaymentClientService,
private readonly redisClient: RedisClientService,
) {}
private verifyAdmin(authorization: string): AdminPayload {
const token = authorization?.replace('Bearer ', '');
if (!token) {
throw new UnauthorizedException('Missing token');
}
try {
const secret = process.env.JWT_SECRET || 'your-jwt-secret-key';
return jwt.verify(token, secret) as AdminPayload;
} catch {
throw new UnauthorizedException('Invalid token');
}
}
/**
*
*/
@Get('health')
getHealth(@Headers('authorization') auth: string) {
this.verifyAdmin(auth);
const circuitBreakers = [
this.knowledgeClient.getCircuitBreakerStatus(),
this.paymentClient.getCircuitBreakerStatus(),
];
const allHealthy = circuitBreakers.every(cb => cb.state === 'CLOSED');
const redisAvailable = this.redisClient.isAvailable();
return {
success: true,
data: {
status: allHealthy && redisAvailable ? 'healthy' : 'degraded',
redis: { available: redisAvailable },
circuitBreakers,
timestamp: new Date().toISOString(),
},
};
}
/**
*
*/
@Get('circuit-breakers')
getCircuitBreakers(@Headers('authorization') auth: string) {
this.verifyAdmin(auth);
return {
success: true,
data: [
this.knowledgeClient.getCircuitBreakerStatus(),
this.paymentClient.getCircuitBreakerStatus(),
],
};
}
/**
* Redis
*/
@Get('redis')
getRedisStatus(@Headers('authorization') auth: string) {
this.verifyAdmin(auth);
return {
success: true,
data: {
available: this.redisClient.isAvailable(),
purpose: 'Agent loop checkpoint persistence',
},
};
}
}

View File

@ -16,11 +16,13 @@ import { InternalConversationController } from '../adapters/inbound/internal.con
import { AdminConversationController } from '../adapters/inbound/admin-conversation.controller';
import { AdminMcpController } from '../adapters/inbound/admin-mcp.controller';
import { AdminEvaluationRuleController } from '../adapters/inbound/admin-evaluation-rule.controller';
import { AdminObservabilityController } from '../adapters/inbound/admin-observability.controller';
import { ConversationGateway } from '../adapters/inbound/conversation.gateway';
import { PaymentModule } from '../infrastructure/payment/payment.module';
@Module({
imports: [TypeOrmModule.forFeature([ConversationORM, MessageORM, TokenUsageORM, AgentExecutionORM])],
controllers: [ConversationController, InternalConversationController, AdminMcpController, AdminEvaluationRuleController, AdminConversationController],
imports: [TypeOrmModule.forFeature([ConversationORM, MessageORM, TokenUsageORM, AgentExecutionORM]), PaymentModule],
controllers: [ConversationController, InternalConversationController, AdminMcpController, AdminEvaluationRuleController, AdminConversationController, AdminObservabilityController],
providers: [
ConversationService,
ConversationGateway,

View File

@ -282,4 +282,16 @@ export class KnowledgeClientService implements OnModuleInit {
if (memories.length === 0) return '';
return memories.map(m => `[${m.memoryType}] (重要度:${m.importance}) ${m.content}`).join('\n');
}
/**
* Admin API 使
*/
getCircuitBreakerStatus() {
return {
name: 'knowledge-service',
state: this.circuitBreaker.getState(),
failureCount: this.circuitBreaker.getFailureCount(),
config: { failureThreshold: 5, resetTimeoutMs: 60_000 },
};
}
}

View File

@ -210,4 +210,16 @@ export class PaymentClientService implements OnModuleInit {
}>;
return data.success ? data.data : null;
}
/**
* Admin API 使
*/
getCircuitBreakerStatus() {
return {
name: 'payment-service',
state: this.circuitBreaker.getState(),
failureCount: this.circuitBreaker.getFailureCount(),
config: { failureThreshold: 3, resetTimeoutMs: 30_000 },
};
}
}