feat(admin): add system observability dashboard with circuit breaker monitoring
Backend: expose circuit breaker status via new AdminObservabilityController (health, circuit-breakers, redis endpoints). Frontend: new observability feature in admin-client with auto-refreshing status cards. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0d488ac68b
commit
85c78b0775
|
|
@ -11,6 +11,7 @@ import { ConversationsPage } from './features/conversations';
|
||||||
import { SettingsPage } from './features/settings';
|
import { SettingsPage } from './features/settings';
|
||||||
import { TenantsPage } from './features/tenants';
|
import { TenantsPage } from './features/tenants';
|
||||||
import { McpPage } from './features/mcp';
|
import { McpPage } from './features/mcp';
|
||||||
|
import { ObservabilityPage } from './features/observability';
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
return (
|
return (
|
||||||
|
|
@ -37,6 +38,7 @@ function App() {
|
||||||
<Route path="conversations" element={<ConversationsPage />} />
|
<Route path="conversations" element={<ConversationsPage />} />
|
||||||
<Route path="tenants" element={<TenantsPage />} />
|
<Route path="tenants" element={<TenantsPage />} />
|
||||||
<Route path="mcp" element={<McpPage />} />
|
<Route path="mcp" element={<McpPage />} />
|
||||||
|
<Route path="observability" element={<ObservabilityPage />} />
|
||||||
<Route path="settings" element={<SettingsPage />} />
|
<Route path="settings" element={<SettingsPage />} />
|
||||||
</Route>
|
</Route>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
export {
|
||||||
|
useSystemHealth,
|
||||||
|
useCircuitBreakers,
|
||||||
|
useRedisStatus,
|
||||||
|
OBSERVABILITY_KEYS,
|
||||||
|
} from './useObservability';
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
import { useQuery } from '@tanstack/react-query';
|
||||||
|
import { observabilityApi } from '../infrastructure/observability.api';
|
||||||
|
|
||||||
|
export const OBSERVABILITY_KEYS = {
|
||||||
|
all: ['observability'] as const,
|
||||||
|
health: ['observability', 'health'] as const,
|
||||||
|
circuitBreakers: ['observability', 'circuit-breakers'] as const,
|
||||||
|
redis: ['observability', 'redis'] as const,
|
||||||
|
};
|
||||||
|
|
||||||
|
export function useSystemHealth() {
|
||||||
|
return useQuery({
|
||||||
|
queryKey: OBSERVABILITY_KEYS.health,
|
||||||
|
queryFn: () => observabilityApi.getHealth(),
|
||||||
|
refetchInterval: 30_000,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useCircuitBreakers() {
|
||||||
|
return useQuery({
|
||||||
|
queryKey: OBSERVABILITY_KEYS.circuitBreakers,
|
||||||
|
queryFn: () => observabilityApi.getCircuitBreakers(),
|
||||||
|
refetchInterval: 10_000,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useRedisStatus() {
|
||||||
|
return useQuery({
|
||||||
|
queryKey: OBSERVABILITY_KEYS.redis,
|
||||||
|
queryFn: () => observabilityApi.getRedisStatus(),
|
||||||
|
refetchInterval: 30_000,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
export { ObservabilityPage } from './presentation/pages/ObservabilityPage';
|
||||||
|
export * from './application';
|
||||||
|
export type { CircuitBreakerStatus, SystemHealth, RedisStatus } from './infrastructure';
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
export { observabilityApi } from './observability.api';
|
||||||
|
export type { CircuitBreakerStatus, SystemHealth, RedisStatus } from './observability.api';
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
import api from '../../../shared/utils/api';
|
||||||
|
|
||||||
|
export interface CircuitBreakerStatus {
|
||||||
|
name: string;
|
||||||
|
state: 'CLOSED' | 'OPEN' | 'HALF_OPEN';
|
||||||
|
failureCount: number;
|
||||||
|
config: {
|
||||||
|
failureThreshold: number;
|
||||||
|
resetTimeoutMs: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SystemHealth {
|
||||||
|
status: 'healthy' | 'degraded';
|
||||||
|
redis: { available: boolean };
|
||||||
|
circuitBreakers: CircuitBreakerStatus[];
|
||||||
|
timestamp: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RedisStatus {
|
||||||
|
available: boolean;
|
||||||
|
purpose: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const observabilityApi = {
|
||||||
|
getHealth: async (): Promise<SystemHealth> => {
|
||||||
|
const response = await api.get('/conversations/admin/observability/health');
|
||||||
|
return response.data.data;
|
||||||
|
},
|
||||||
|
|
||||||
|
getCircuitBreakers: async (): Promise<CircuitBreakerStatus[]> => {
|
||||||
|
const response = await api.get('/conversations/admin/observability/circuit-breakers');
|
||||||
|
return response.data.data;
|
||||||
|
},
|
||||||
|
|
||||||
|
getRedisStatus: async (): Promise<RedisStatus> => {
|
||||||
|
const response = await api.get('/conversations/admin/observability/redis');
|
||||||
|
return response.data.data;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
@ -0,0 +1,155 @@
|
||||||
|
import { Card, Row, Col, Tag, Typography, Spin, Progress, Descriptions, Badge, Button } from 'antd';
|
||||||
|
import {
|
||||||
|
CheckCircleOutlined,
|
||||||
|
CloseCircleOutlined,
|
||||||
|
ExclamationCircleOutlined,
|
||||||
|
ReloadOutlined,
|
||||||
|
DatabaseOutlined,
|
||||||
|
CloudServerOutlined,
|
||||||
|
} from '@ant-design/icons';
|
||||||
|
import { useQueryClient } from '@tanstack/react-query';
|
||||||
|
import { useSystemHealth, useCircuitBreakers, OBSERVABILITY_KEYS } from '../../application';
|
||||||
|
import type { CircuitBreakerStatus } from '../../infrastructure';
|
||||||
|
|
||||||
|
const { Title, Text } = Typography;
|
||||||
|
|
||||||
|
const STATE_CONFIG: Record<string, { color: string; icon: React.ReactNode; label: string }> = {
|
||||||
|
CLOSED: { color: 'success', icon: <CheckCircleOutlined />, label: '正常' },
|
||||||
|
OPEN: { color: 'error', icon: <CloseCircleOutlined />, label: '熔断中' },
|
||||||
|
HALF_OPEN: { color: 'warning', icon: <ExclamationCircleOutlined />, label: '探测恢复' },
|
||||||
|
};
|
||||||
|
|
||||||
|
function CircuitBreakerCard({ breaker }: { breaker: CircuitBreakerStatus }) {
|
||||||
|
const config = STATE_CONFIG[breaker.state] || STATE_CONFIG.CLOSED;
|
||||||
|
const failurePercent = Math.round((breaker.failureCount / breaker.config.failureThreshold) * 100);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Card
|
||||||
|
title={
|
||||||
|
<span>
|
||||||
|
<CloudServerOutlined className="mr-2" />
|
||||||
|
{breaker.name}
|
||||||
|
</span>
|
||||||
|
}
|
||||||
|
extra={<Tag color={config.color} icon={config.icon}>{config.label}</Tag>}
|
||||||
|
>
|
||||||
|
<div className="mb-4">
|
||||||
|
<Text type="secondary">失败次数 / 阈值</Text>
|
||||||
|
<Progress
|
||||||
|
percent={failurePercent}
|
||||||
|
format={() => `${breaker.failureCount} / ${breaker.config.failureThreshold}`}
|
||||||
|
status={breaker.state === 'OPEN' ? 'exception' : breaker.state === 'HALF_OPEN' ? 'active' : 'normal'}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<Descriptions size="small" column={1}>
|
||||||
|
<Descriptions.Item label="熔断阈值">{breaker.config.failureThreshold} 次连续失败</Descriptions.Item>
|
||||||
|
<Descriptions.Item label="重置超时">{breaker.config.resetTimeoutMs / 1000}s</Descriptions.Item>
|
||||||
|
<Descriptions.Item label="当前状态">
|
||||||
|
<Badge status={breaker.state === 'CLOSED' ? 'success' : breaker.state === 'OPEN' ? 'error' : 'warning'} text={config.label} />
|
||||||
|
</Descriptions.Item>
|
||||||
|
</Descriptions>
|
||||||
|
</Card>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ObservabilityPage() {
|
||||||
|
const queryClient = useQueryClient();
|
||||||
|
const { data: health, isLoading: loadingHealth } = useSystemHealth();
|
||||||
|
const { data: breakers, isLoading: loadingBreakers } = useCircuitBreakers();
|
||||||
|
|
||||||
|
const handleRefresh = () => {
|
||||||
|
queryClient.invalidateQueries({ queryKey: OBSERVABILITY_KEYS.all });
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="p-6">
|
||||||
|
<div className="flex justify-between items-center mb-6">
|
||||||
|
<Title level={4} className="!mb-0">系统监控</Title>
|
||||||
|
<Button icon={<ReloadOutlined />} onClick={handleRefresh}>
|
||||||
|
刷新
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Health Overview */}
|
||||||
|
<Spin spinning={loadingHealth}>
|
||||||
|
<Row gutter={[16, 16]} className="mb-6">
|
||||||
|
<Col xs={24} sm={8}>
|
||||||
|
<Card>
|
||||||
|
<div className="text-center">
|
||||||
|
<div className="text-lg mb-2">系统状态</div>
|
||||||
|
{health?.status === 'healthy' ? (
|
||||||
|
<Tag color="success" icon={<CheckCircleOutlined />} className="text-base px-4 py-1">
|
||||||
|
健康
|
||||||
|
</Tag>
|
||||||
|
) : (
|
||||||
|
<Tag color="error" icon={<ExclamationCircleOutlined />} className="text-base px-4 py-1">
|
||||||
|
降级
|
||||||
|
</Tag>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</Card>
|
||||||
|
</Col>
|
||||||
|
<Col xs={24} sm={8}>
|
||||||
|
<Card>
|
||||||
|
<div className="text-center">
|
||||||
|
<div className="text-lg mb-2">
|
||||||
|
<DatabaseOutlined className="mr-1" />
|
||||||
|
Redis
|
||||||
|
</div>
|
||||||
|
{health?.redis.available ? (
|
||||||
|
<Tag color="success" icon={<CheckCircleOutlined />} className="text-base px-4 py-1">
|
||||||
|
已连接
|
||||||
|
</Tag>
|
||||||
|
) : (
|
||||||
|
<Tag color="default" icon={<CloseCircleOutlined />} className="text-base px-4 py-1">
|
||||||
|
未连接
|
||||||
|
</Tag>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</Card>
|
||||||
|
</Col>
|
||||||
|
<Col xs={24} sm={8}>
|
||||||
|
<Card>
|
||||||
|
<div className="text-center">
|
||||||
|
<div className="text-lg mb-2">熔断器</div>
|
||||||
|
<Text>
|
||||||
|
{health?.circuitBreakers.filter(cb => cb.state === 'CLOSED').length || 0}
|
||||||
|
{' / '}
|
||||||
|
{health?.circuitBreakers.length || 0}
|
||||||
|
{' '}
|
||||||
|
正常
|
||||||
|
</Text>
|
||||||
|
</div>
|
||||||
|
</Card>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
|
</Spin>
|
||||||
|
|
||||||
|
{/* Circuit Breaker Details */}
|
||||||
|
<Title level={5} className="mb-4">熔断器状态</Title>
|
||||||
|
<Spin spinning={loadingBreakers}>
|
||||||
|
<Row gutter={[16, 16]}>
|
||||||
|
{breakers?.map((breaker) => (
|
||||||
|
<Col xs={24} md={12} key={breaker.name}>
|
||||||
|
<CircuitBreakerCard breaker={breaker} />
|
||||||
|
</Col>
|
||||||
|
))}
|
||||||
|
{breakers?.length === 0 && (
|
||||||
|
<Col span={24}>
|
||||||
|
<Card>
|
||||||
|
<Text type="secondary">暂无熔断器数据</Text>
|
||||||
|
</Card>
|
||||||
|
</Col>
|
||||||
|
)}
|
||||||
|
</Row>
|
||||||
|
</Spin>
|
||||||
|
|
||||||
|
{/* Auto-refresh indicator */}
|
||||||
|
<div className="mt-4 text-right">
|
||||||
|
<Text type="secondary" className="text-xs">
|
||||||
|
数据每 10 秒自动刷新 | 最后更新: {health?.timestamp ? new Date(health.timestamp).toLocaleTimeString() : '-'}
|
||||||
|
</Text>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -18,6 +18,7 @@ import {
|
||||||
MessageOutlined,
|
MessageOutlined,
|
||||||
ClusterOutlined,
|
ClusterOutlined,
|
||||||
ApiOutlined,
|
ApiOutlined,
|
||||||
|
MonitorOutlined,
|
||||||
} from '@ant-design/icons';
|
} from '@ant-design/icons';
|
||||||
import { useAuth } from '../hooks/useAuth';
|
import { useAuth } from '../hooks/useAuth';
|
||||||
|
|
||||||
|
|
@ -82,6 +83,11 @@ const menuItems: MenuProps['items'] = [
|
||||||
icon: <ApiOutlined />,
|
icon: <ApiOutlined />,
|
||||||
label: 'MCP 服务器',
|
label: 'MCP 服务器',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
key: '/observability',
|
||||||
|
icon: <MonitorOutlined />,
|
||||||
|
label: '系统监控',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
key: '/settings',
|
key: '/settings',
|
||||||
icon: <SettingOutlined />,
|
icon: <SettingOutlined />,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,99 @@
|
||||||
|
import {
|
||||||
|
Controller,
|
||||||
|
Get,
|
||||||
|
Headers,
|
||||||
|
UnauthorizedException,
|
||||||
|
} from '@nestjs/common';
|
||||||
|
import * as jwt from 'jsonwebtoken';
|
||||||
|
import { KnowledgeClientService } from '../../infrastructure/knowledge/knowledge-client.service';
|
||||||
|
import { PaymentClientService } from '../../infrastructure/payment/payment-client.service';
|
||||||
|
import { RedisClientService } from '../../infrastructure/cache/redis-client.service';
|
||||||
|
|
||||||
|
interface AdminPayload {
|
||||||
|
id: string;
|
||||||
|
username: string;
|
||||||
|
role: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 管理员可观测性 API — 系统健康、熔断器状态、Redis 状态
|
||||||
|
*/
|
||||||
|
@Controller('conversations/admin/observability')
|
||||||
|
export class AdminObservabilityController {
|
||||||
|
constructor(
|
||||||
|
private readonly knowledgeClient: KnowledgeClientService,
|
||||||
|
private readonly paymentClient: PaymentClientService,
|
||||||
|
private readonly redisClient: RedisClientService,
|
||||||
|
) {}
|
||||||
|
|
||||||
|
private verifyAdmin(authorization: string): AdminPayload {
|
||||||
|
const token = authorization?.replace('Bearer ', '');
|
||||||
|
if (!token) {
|
||||||
|
throw new UnauthorizedException('Missing token');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const secret = process.env.JWT_SECRET || 'your-jwt-secret-key';
|
||||||
|
return jwt.verify(token, secret) as AdminPayload;
|
||||||
|
} catch {
|
||||||
|
throw new UnauthorizedException('Invalid token');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 综合健康检查
|
||||||
|
*/
|
||||||
|
@Get('health')
|
||||||
|
getHealth(@Headers('authorization') auth: string) {
|
||||||
|
this.verifyAdmin(auth);
|
||||||
|
|
||||||
|
const circuitBreakers = [
|
||||||
|
this.knowledgeClient.getCircuitBreakerStatus(),
|
||||||
|
this.paymentClient.getCircuitBreakerStatus(),
|
||||||
|
];
|
||||||
|
|
||||||
|
const allHealthy = circuitBreakers.every(cb => cb.state === 'CLOSED');
|
||||||
|
const redisAvailable = this.redisClient.isAvailable();
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
status: allHealthy && redisAvailable ? 'healthy' : 'degraded',
|
||||||
|
redis: { available: redisAvailable },
|
||||||
|
circuitBreakers,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 熔断器详细状态
|
||||||
|
*/
|
||||||
|
@Get('circuit-breakers')
|
||||||
|
getCircuitBreakers(@Headers('authorization') auth: string) {
|
||||||
|
this.verifyAdmin(auth);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
data: [
|
||||||
|
this.knowledgeClient.getCircuitBreakerStatus(),
|
||||||
|
this.paymentClient.getCircuitBreakerStatus(),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Redis 连接状态
|
||||||
|
*/
|
||||||
|
@Get('redis')
|
||||||
|
getRedisStatus(@Headers('authorization') auth: string) {
|
||||||
|
this.verifyAdmin(auth);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
available: this.redisClient.isAvailable(),
|
||||||
|
purpose: 'Agent loop checkpoint persistence',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -16,11 +16,13 @@ import { InternalConversationController } from '../adapters/inbound/internal.con
|
||||||
import { AdminConversationController } from '../adapters/inbound/admin-conversation.controller';
|
import { AdminConversationController } from '../adapters/inbound/admin-conversation.controller';
|
||||||
import { AdminMcpController } from '../adapters/inbound/admin-mcp.controller';
|
import { AdminMcpController } from '../adapters/inbound/admin-mcp.controller';
|
||||||
import { AdminEvaluationRuleController } from '../adapters/inbound/admin-evaluation-rule.controller';
|
import { AdminEvaluationRuleController } from '../adapters/inbound/admin-evaluation-rule.controller';
|
||||||
|
import { AdminObservabilityController } from '../adapters/inbound/admin-observability.controller';
|
||||||
import { ConversationGateway } from '../adapters/inbound/conversation.gateway';
|
import { ConversationGateway } from '../adapters/inbound/conversation.gateway';
|
||||||
|
import { PaymentModule } from '../infrastructure/payment/payment.module';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
imports: [TypeOrmModule.forFeature([ConversationORM, MessageORM, TokenUsageORM, AgentExecutionORM])],
|
imports: [TypeOrmModule.forFeature([ConversationORM, MessageORM, TokenUsageORM, AgentExecutionORM]), PaymentModule],
|
||||||
controllers: [ConversationController, InternalConversationController, AdminMcpController, AdminEvaluationRuleController, AdminConversationController],
|
controllers: [ConversationController, InternalConversationController, AdminMcpController, AdminEvaluationRuleController, AdminConversationController, AdminObservabilityController],
|
||||||
providers: [
|
providers: [
|
||||||
ConversationService,
|
ConversationService,
|
||||||
ConversationGateway,
|
ConversationGateway,
|
||||||
|
|
|
||||||
|
|
@ -282,4 +282,16 @@ export class KnowledgeClientService implements OnModuleInit {
|
||||||
if (memories.length === 0) return '';
|
if (memories.length === 0) return '';
|
||||||
return memories.map(m => `[${m.memoryType}] (重要度:${m.importance}) ${m.content}`).join('\n');
|
return memories.map(m => `[${m.memoryType}] (重要度:${m.importance}) ${m.content}`).join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取熔断器状态 — 供 Admin API 使用
|
||||||
|
*/
|
||||||
|
getCircuitBreakerStatus() {
|
||||||
|
return {
|
||||||
|
name: 'knowledge-service',
|
||||||
|
state: this.circuitBreaker.getState(),
|
||||||
|
failureCount: this.circuitBreaker.getFailureCount(),
|
||||||
|
config: { failureThreshold: 5, resetTimeoutMs: 60_000 },
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -210,4 +210,16 @@ export class PaymentClientService implements OnModuleInit {
|
||||||
}>;
|
}>;
|
||||||
return data.success ? data.data : null;
|
return data.success ? data.data : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取熔断器状态 — 供 Admin API 使用
|
||||||
|
*/
|
||||||
|
getCircuitBreakerStatus() {
|
||||||
|
return {
|
||||||
|
name: 'payment-service',
|
||||||
|
state: this.circuitBreaker.getState(),
|
||||||
|
failureCount: this.circuitBreaker.getFailureCount(),
|
||||||
|
config: { failureThreshold: 3, resetTimeoutMs: 30_000 },
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue