feat(admin): add system observability dashboard with circuit breaker monitoring
Backend: expose circuit breaker status via new AdminObservabilityController (health, circuit-breakers, redis endpoints). Frontend: new observability feature in admin-client with auto-refreshing status cards. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0d488ac68b
commit
85c78b0775
|
|
@ -11,6 +11,7 @@ import { ConversationsPage } from './features/conversations';
|
|||
import { SettingsPage } from './features/settings';
|
||||
import { TenantsPage } from './features/tenants';
|
||||
import { McpPage } from './features/mcp';
|
||||
import { ObservabilityPage } from './features/observability';
|
||||
|
||||
function App() {
|
||||
return (
|
||||
|
|
@ -37,6 +38,7 @@ function App() {
|
|||
<Route path="conversations" element={<ConversationsPage />} />
|
||||
<Route path="tenants" element={<TenantsPage />} />
|
||||
<Route path="mcp" element={<McpPage />} />
|
||||
<Route path="observability" element={<ObservabilityPage />} />
|
||||
<Route path="settings" element={<SettingsPage />} />
|
||||
</Route>
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,6 @@
|
|||
export {
|
||||
useSystemHealth,
|
||||
useCircuitBreakers,
|
||||
useRedisStatus,
|
||||
OBSERVABILITY_KEYS,
|
||||
} from './useObservability';
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import { useQuery } from '@tanstack/react-query';
|
||||
import { observabilityApi } from '../infrastructure/observability.api';
|
||||
|
||||
export const OBSERVABILITY_KEYS = {
|
||||
all: ['observability'] as const,
|
||||
health: ['observability', 'health'] as const,
|
||||
circuitBreakers: ['observability', 'circuit-breakers'] as const,
|
||||
redis: ['observability', 'redis'] as const,
|
||||
};
|
||||
|
||||
export function useSystemHealth() {
|
||||
return useQuery({
|
||||
queryKey: OBSERVABILITY_KEYS.health,
|
||||
queryFn: () => observabilityApi.getHealth(),
|
||||
refetchInterval: 30_000,
|
||||
});
|
||||
}
|
||||
|
||||
export function useCircuitBreakers() {
|
||||
return useQuery({
|
||||
queryKey: OBSERVABILITY_KEYS.circuitBreakers,
|
||||
queryFn: () => observabilityApi.getCircuitBreakers(),
|
||||
refetchInterval: 10_000,
|
||||
});
|
||||
}
|
||||
|
||||
export function useRedisStatus() {
|
||||
return useQuery({
|
||||
queryKey: OBSERVABILITY_KEYS.redis,
|
||||
queryFn: () => observabilityApi.getRedisStatus(),
|
||||
refetchInterval: 30_000,
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
export { ObservabilityPage } from './presentation/pages/ObservabilityPage';
|
||||
export * from './application';
|
||||
export type { CircuitBreakerStatus, SystemHealth, RedisStatus } from './infrastructure';
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
export { observabilityApi } from './observability.api';
|
||||
export type { CircuitBreakerStatus, SystemHealth, RedisStatus } from './observability.api';
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import api from '../../../shared/utils/api';
|
||||
|
||||
export interface CircuitBreakerStatus {
|
||||
name: string;
|
||||
state: 'CLOSED' | 'OPEN' | 'HALF_OPEN';
|
||||
failureCount: number;
|
||||
config: {
|
||||
failureThreshold: number;
|
||||
resetTimeoutMs: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface SystemHealth {
|
||||
status: 'healthy' | 'degraded';
|
||||
redis: { available: boolean };
|
||||
circuitBreakers: CircuitBreakerStatus[];
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export interface RedisStatus {
|
||||
available: boolean;
|
||||
purpose: string;
|
||||
}
|
||||
|
||||
export const observabilityApi = {
|
||||
getHealth: async (): Promise<SystemHealth> => {
|
||||
const response = await api.get('/conversations/admin/observability/health');
|
||||
return response.data.data;
|
||||
},
|
||||
|
||||
getCircuitBreakers: async (): Promise<CircuitBreakerStatus[]> => {
|
||||
const response = await api.get('/conversations/admin/observability/circuit-breakers');
|
||||
return response.data.data;
|
||||
},
|
||||
|
||||
getRedisStatus: async (): Promise<RedisStatus> => {
|
||||
const response = await api.get('/conversations/admin/observability/redis');
|
||||
return response.data.data;
|
||||
},
|
||||
};
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
import { Card, Row, Col, Tag, Typography, Spin, Progress, Descriptions, Badge, Button } from 'antd';
|
||||
import {
|
||||
CheckCircleOutlined,
|
||||
CloseCircleOutlined,
|
||||
ExclamationCircleOutlined,
|
||||
ReloadOutlined,
|
||||
DatabaseOutlined,
|
||||
CloudServerOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import { useQueryClient } from '@tanstack/react-query';
|
||||
import { useSystemHealth, useCircuitBreakers, OBSERVABILITY_KEYS } from '../../application';
|
||||
import type { CircuitBreakerStatus } from '../../infrastructure';
|
||||
|
||||
const { Title, Text } = Typography;
|
||||
|
||||
const STATE_CONFIG: Record<string, { color: string; icon: React.ReactNode; label: string }> = {
|
||||
CLOSED: { color: 'success', icon: <CheckCircleOutlined />, label: '正常' },
|
||||
OPEN: { color: 'error', icon: <CloseCircleOutlined />, label: '熔断中' },
|
||||
HALF_OPEN: { color: 'warning', icon: <ExclamationCircleOutlined />, label: '探测恢复' },
|
||||
};
|
||||
|
||||
function CircuitBreakerCard({ breaker }: { breaker: CircuitBreakerStatus }) {
|
||||
const config = STATE_CONFIG[breaker.state] || STATE_CONFIG.CLOSED;
|
||||
const failurePercent = Math.round((breaker.failureCount / breaker.config.failureThreshold) * 100);
|
||||
|
||||
return (
|
||||
<Card
|
||||
title={
|
||||
<span>
|
||||
<CloudServerOutlined className="mr-2" />
|
||||
{breaker.name}
|
||||
</span>
|
||||
}
|
||||
extra={<Tag color={config.color} icon={config.icon}>{config.label}</Tag>}
|
||||
>
|
||||
<div className="mb-4">
|
||||
<Text type="secondary">失败次数 / 阈值</Text>
|
||||
<Progress
|
||||
percent={failurePercent}
|
||||
format={() => `${breaker.failureCount} / ${breaker.config.failureThreshold}`}
|
||||
status={breaker.state === 'OPEN' ? 'exception' : breaker.state === 'HALF_OPEN' ? 'active' : 'normal'}
|
||||
/>
|
||||
</div>
|
||||
<Descriptions size="small" column={1}>
|
||||
<Descriptions.Item label="熔断阈值">{breaker.config.failureThreshold} 次连续失败</Descriptions.Item>
|
||||
<Descriptions.Item label="重置超时">{breaker.config.resetTimeoutMs / 1000}s</Descriptions.Item>
|
||||
<Descriptions.Item label="当前状态">
|
||||
<Badge status={breaker.state === 'CLOSED' ? 'success' : breaker.state === 'OPEN' ? 'error' : 'warning'} text={config.label} />
|
||||
</Descriptions.Item>
|
||||
</Descriptions>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
export function ObservabilityPage() {
|
||||
const queryClient = useQueryClient();
|
||||
const { data: health, isLoading: loadingHealth } = useSystemHealth();
|
||||
const { data: breakers, isLoading: loadingBreakers } = useCircuitBreakers();
|
||||
|
||||
const handleRefresh = () => {
|
||||
queryClient.invalidateQueries({ queryKey: OBSERVABILITY_KEYS.all });
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="p-6">
|
||||
<div className="flex justify-between items-center mb-6">
|
||||
<Title level={4} className="!mb-0">系统监控</Title>
|
||||
<Button icon={<ReloadOutlined />} onClick={handleRefresh}>
|
||||
刷新
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Health Overview */}
|
||||
<Spin spinning={loadingHealth}>
|
||||
<Row gutter={[16, 16]} className="mb-6">
|
||||
<Col xs={24} sm={8}>
|
||||
<Card>
|
||||
<div className="text-center">
|
||||
<div className="text-lg mb-2">系统状态</div>
|
||||
{health?.status === 'healthy' ? (
|
||||
<Tag color="success" icon={<CheckCircleOutlined />} className="text-base px-4 py-1">
|
||||
健康
|
||||
</Tag>
|
||||
) : (
|
||||
<Tag color="error" icon={<ExclamationCircleOutlined />} className="text-base px-4 py-1">
|
||||
降级
|
||||
</Tag>
|
||||
)}
|
||||
</div>
|
||||
</Card>
|
||||
</Col>
|
||||
<Col xs={24} sm={8}>
|
||||
<Card>
|
||||
<div className="text-center">
|
||||
<div className="text-lg mb-2">
|
||||
<DatabaseOutlined className="mr-1" />
|
||||
Redis
|
||||
</div>
|
||||
{health?.redis.available ? (
|
||||
<Tag color="success" icon={<CheckCircleOutlined />} className="text-base px-4 py-1">
|
||||
已连接
|
||||
</Tag>
|
||||
) : (
|
||||
<Tag color="default" icon={<CloseCircleOutlined />} className="text-base px-4 py-1">
|
||||
未连接
|
||||
</Tag>
|
||||
)}
|
||||
</div>
|
||||
</Card>
|
||||
</Col>
|
||||
<Col xs={24} sm={8}>
|
||||
<Card>
|
||||
<div className="text-center">
|
||||
<div className="text-lg mb-2">熔断器</div>
|
||||
<Text>
|
||||
{health?.circuitBreakers.filter(cb => cb.state === 'CLOSED').length || 0}
|
||||
{' / '}
|
||||
{health?.circuitBreakers.length || 0}
|
||||
{' '}
|
||||
正常
|
||||
</Text>
|
||||
</div>
|
||||
</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
</Spin>
|
||||
|
||||
{/* Circuit Breaker Details */}
|
||||
<Title level={5} className="mb-4">熔断器状态</Title>
|
||||
<Spin spinning={loadingBreakers}>
|
||||
<Row gutter={[16, 16]}>
|
||||
{breakers?.map((breaker) => (
|
||||
<Col xs={24} md={12} key={breaker.name}>
|
||||
<CircuitBreakerCard breaker={breaker} />
|
||||
</Col>
|
||||
))}
|
||||
{breakers?.length === 0 && (
|
||||
<Col span={24}>
|
||||
<Card>
|
||||
<Text type="secondary">暂无熔断器数据</Text>
|
||||
</Card>
|
||||
</Col>
|
||||
)}
|
||||
</Row>
|
||||
</Spin>
|
||||
|
||||
{/* Auto-refresh indicator */}
|
||||
<div className="mt-4 text-right">
|
||||
<Text type="secondary" className="text-xs">
|
||||
数据每 10 秒自动刷新 | 最后更新: {health?.timestamp ? new Date(health.timestamp).toLocaleTimeString() : '-'}
|
||||
</Text>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@ import {
|
|||
MessageOutlined,
|
||||
ClusterOutlined,
|
||||
ApiOutlined,
|
||||
MonitorOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import { useAuth } from '../hooks/useAuth';
|
||||
|
||||
|
|
@ -82,6 +83,11 @@ const menuItems: MenuProps['items'] = [
|
|||
icon: <ApiOutlined />,
|
||||
label: 'MCP 服务器',
|
||||
},
|
||||
{
|
||||
key: '/observability',
|
||||
icon: <MonitorOutlined />,
|
||||
label: '系统监控',
|
||||
},
|
||||
{
|
||||
key: '/settings',
|
||||
icon: <SettingOutlined />,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,99 @@
|
|||
import {
|
||||
Controller,
|
||||
Get,
|
||||
Headers,
|
||||
UnauthorizedException,
|
||||
} from '@nestjs/common';
|
||||
import * as jwt from 'jsonwebtoken';
|
||||
import { KnowledgeClientService } from '../../infrastructure/knowledge/knowledge-client.service';
|
||||
import { PaymentClientService } from '../../infrastructure/payment/payment-client.service';
|
||||
import { RedisClientService } from '../../infrastructure/cache/redis-client.service';
|
||||
|
||||
interface AdminPayload {
|
||||
id: string;
|
||||
username: string;
|
||||
role: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 管理员可观测性 API — 系统健康、熔断器状态、Redis 状态
|
||||
*/
|
||||
@Controller('conversations/admin/observability')
|
||||
export class AdminObservabilityController {
|
||||
constructor(
|
||||
private readonly knowledgeClient: KnowledgeClientService,
|
||||
private readonly paymentClient: PaymentClientService,
|
||||
private readonly redisClient: RedisClientService,
|
||||
) {}
|
||||
|
||||
private verifyAdmin(authorization: string): AdminPayload {
|
||||
const token = authorization?.replace('Bearer ', '');
|
||||
if (!token) {
|
||||
throw new UnauthorizedException('Missing token');
|
||||
}
|
||||
try {
|
||||
const secret = process.env.JWT_SECRET || 'your-jwt-secret-key';
|
||||
return jwt.verify(token, secret) as AdminPayload;
|
||||
} catch {
|
||||
throw new UnauthorizedException('Invalid token');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 综合健康检查
|
||||
*/
|
||||
@Get('health')
|
||||
getHealth(@Headers('authorization') auth: string) {
|
||||
this.verifyAdmin(auth);
|
||||
|
||||
const circuitBreakers = [
|
||||
this.knowledgeClient.getCircuitBreakerStatus(),
|
||||
this.paymentClient.getCircuitBreakerStatus(),
|
||||
];
|
||||
|
||||
const allHealthy = circuitBreakers.every(cb => cb.state === 'CLOSED');
|
||||
const redisAvailable = this.redisClient.isAvailable();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: {
|
||||
status: allHealthy && redisAvailable ? 'healthy' : 'degraded',
|
||||
redis: { available: redisAvailable },
|
||||
circuitBreakers,
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 熔断器详细状态
|
||||
*/
|
||||
@Get('circuit-breakers')
|
||||
getCircuitBreakers(@Headers('authorization') auth: string) {
|
||||
this.verifyAdmin(auth);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: [
|
||||
this.knowledgeClient.getCircuitBreakerStatus(),
|
||||
this.paymentClient.getCircuitBreakerStatus(),
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Redis 连接状态
|
||||
*/
|
||||
@Get('redis')
|
||||
getRedisStatus(@Headers('authorization') auth: string) {
|
||||
this.verifyAdmin(auth);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: {
|
||||
available: this.redisClient.isAvailable(),
|
||||
purpose: 'Agent loop checkpoint persistence',
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -16,11 +16,13 @@ import { InternalConversationController } from '../adapters/inbound/internal.con
|
|||
import { AdminConversationController } from '../adapters/inbound/admin-conversation.controller';
|
||||
import { AdminMcpController } from '../adapters/inbound/admin-mcp.controller';
|
||||
import { AdminEvaluationRuleController } from '../adapters/inbound/admin-evaluation-rule.controller';
|
||||
import { AdminObservabilityController } from '../adapters/inbound/admin-observability.controller';
|
||||
import { ConversationGateway } from '../adapters/inbound/conversation.gateway';
|
||||
import { PaymentModule } from '../infrastructure/payment/payment.module';
|
||||
|
||||
@Module({
|
||||
imports: [TypeOrmModule.forFeature([ConversationORM, MessageORM, TokenUsageORM, AgentExecutionORM])],
|
||||
controllers: [ConversationController, InternalConversationController, AdminMcpController, AdminEvaluationRuleController, AdminConversationController],
|
||||
imports: [TypeOrmModule.forFeature([ConversationORM, MessageORM, TokenUsageORM, AgentExecutionORM]), PaymentModule],
|
||||
controllers: [ConversationController, InternalConversationController, AdminMcpController, AdminEvaluationRuleController, AdminConversationController, AdminObservabilityController],
|
||||
providers: [
|
||||
ConversationService,
|
||||
ConversationGateway,
|
||||
|
|
|
|||
|
|
@ -282,4 +282,16 @@ export class KnowledgeClientService implements OnModuleInit {
|
|||
if (memories.length === 0) return '';
|
||||
return memories.map(m => `[${m.memoryType}] (重要度:${m.importance}) ${m.content}`).join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取熔断器状态 — 供 Admin API 使用
|
||||
*/
|
||||
getCircuitBreakerStatus() {
|
||||
return {
|
||||
name: 'knowledge-service',
|
||||
state: this.circuitBreaker.getState(),
|
||||
failureCount: this.circuitBreaker.getFailureCount(),
|
||||
config: { failureThreshold: 5, resetTimeoutMs: 60_000 },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -210,4 +210,16 @@ export class PaymentClientService implements OnModuleInit {
|
|||
}>;
|
||||
return data.success ? data.data : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取熔断器状态 — 供 Admin API 使用
|
||||
*/
|
||||
getCircuitBreakerStatus() {
|
||||
return {
|
||||
name: 'payment-service',
|
||||
state: this.circuitBreaker.getState(),
|
||||
failureCount: this.circuitBreaker.getFailureCount(),
|
||||
config: { failureThreshold: 3, resetTimeoutMs: 30_000 },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue