diff --git a/backend/api-gateway/grafana/provisioning/dashboards/presence-dashboard.json b/backend/api-gateway/grafana/provisioning/dashboards/presence-dashboard.json new file mode 100644 index 00000000..b0c3d2be --- /dev/null +++ b/backend/api-gateway/grafana/provisioning/dashboards/presence-dashboard.json @@ -0,0 +1,1124 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "RWA 用户活跃度监控面板 - DAU、实时在线人数、事件统计", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 100, + "panels": [], + "title": "📊 核心指标概览", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 100 + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "presence_online_users_total", + "legendFormat": "在线人数", + "refId": "A" + } + ], + "title": "🟢 实时在线人数", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1000 + }, + { + "color": "red", + "value": 5000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "presence_dau_total", + "legendFormat": "DAU", + "refId": "A" + } + ], + "title": "📅 今日 DAU", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(presence_heartbeat_total[1h]))", + "legendFormat": "心跳数/小时", + "refId": "A" + } + ], + "title": "💓 心跳数 (1h)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(presence_events_received_total[1h]))", + "legendFormat": "事件数/小时", + "refId": "A" + } + ], + "title": "📨 事件数 (1h)", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 101, + "panels": [], + "title": "📈 趋势图表", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 5, + "options": { + "legend": { + "calcs": ["mean", "max", "last"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "presence_online_users_total", + "legendFormat": "在线人数", + "refId": "A" + } + ], + "title": "🟢 实时在线人数趋势", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 6, + "options": { + "legend": { + "calcs": ["mean", "max", "last"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(presence_heartbeat_total[5m])", + "legendFormat": "心跳速率", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(presence_events_received_total[5m])", + "legendFormat": "事件速率", + "refId": "B" + } + ], + "title": "💓 心跳 & 事件速率", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 102, + "panels": [], + "title": "📊 事件分布", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 7, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value", "percent"] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "topk(10, sum by (event_name) (increase(presence_events_received_total[24h])))", + "legendFormat": "{{event_name}}", + "refId": "A" + } + ], + "title": "📊 事件类型分布 (24h)", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 8, + "y": 16 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum by (event_name) (increase(presence_events_received_total[1h]))", + "legendFormat": "{{event_name}}", + "refId": "A" + } + ], + "title": "📈 事件类型趋势 (按小时)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 103, + "panels": [], + "title": "⚡ 性能指标", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 0.1 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 9, + "options": { + "legend": { + "calcs": ["mean", "max", "p99"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.50, sum(rate(presence_heartbeat_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "P50", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.95, sum(rate(presence_heartbeat_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "P95", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.99, sum(rate(presence_heartbeat_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "P99", + "refId": "C" + } + ], + "title": "💓 心跳处理延迟", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 0.5 + }, + { + "color": "red", + "value": 2 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 10, + "options": { + "legend": { + "calcs": ["mean", "max", "p99"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.50, sum(rate(presence_event_batch_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "P50", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.95, sum(rate(presence_event_batch_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "P95", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.99, sum(rate(presence_event_batch_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "P99", + "refId": "C" + } + ], + "title": "📨 事件批处理延迟", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 104, + "panels": [], + "title": "🖥️ 服务资源", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 11, + "options": { + "legend": { + "calcs": ["mean", "max", "last"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "presence_process_resident_memory_bytes", + "legendFormat": "RSS 内存", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "presence_nodejs_heap_size_used_bytes", + "legendFormat": "Heap Used", + "refId": "B" + } + ], + "title": "🖥️ 内存使用", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 12, + "options": { + "legend": { + "calcs": ["mean", "max", "last"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(presence_process_cpu_seconds_total[1m])", + "legendFormat": "CPU 使用率", + "refId": "A" + } + ], + "title": "⚙️ CPU 使用率", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": ["rwa", "presence", "dau", "telemetry"], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "数据源", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "Asia/Shanghai", + "title": "RWA 用户活跃度监控", + "uid": "rwa-presence-dashboard", + "version": 1, + "weekStart": "monday" +} diff --git a/backend/api-gateway/prometheus.yml b/backend/api-gateway/prometheus.yml index 3a0b3fcb..73df5265 100644 --- a/backend/api-gateway/prometheus.yml +++ b/backend/api-gateway/prometheus.yml @@ -1,5 +1,5 @@ # ============================================================================= -# Prometheus 配置 - Kong API Gateway 监控 +# Prometheus 配置 - Kong API Gateway + RWA Services 监控 # ============================================================================= global: @@ -17,3 +17,21 @@ scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] + + # ========================================================================== + # RWA Presence Service - 用户活跃度与在线状态监控 + # ========================================================================== + - job_name: 'presence-service' + static_configs: + # 生产环境: 使用内网 IP 或 Docker 网络名称 + # - targets: ['presence-service:3011'] + # 开发环境: 使用 host.docker.internal 访问宿主机服务 + - targets: ['host.docker.internal:3011'] + metrics_path: /api/v1/metrics + scrape_interval: 15s + scrape_timeout: 10s + # 添加标签便于区分 + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: 'presence-service' diff --git a/backend/services/presence-service/package.json b/backend/services/presence-service/package.json index 1a20886a..9b522437 100644 --- a/backend/services/presence-service/package.json +++ b/backend/services/presence-service/package.json @@ -42,6 +42,7 @@ "date-fns-tz": "^2.0.0", "ioredis": "^5.3.2", "kafkajs": "^2.2.4", + "prom-client": "^15.1.0", "reflect-metadata": "^0.1.13", "rxjs": "^7.8.1", "uuid": "^9.0.1" diff --git a/backend/services/presence-service/src/application/commands/record-events/record-events.handler.ts b/backend/services/presence-service/src/application/commands/record-events/record-events.handler.ts index 3c4ea236..88774cff 100644 --- a/backend/services/presence-service/src/application/commands/record-events/record-events.handler.ts +++ b/backend/services/presence-service/src/application/commands/record-events/record-events.handler.ts @@ -11,6 +11,7 @@ import { } from '../../../domain/repositories/event-log.repository.interface'; import { RedisService } from '../../../infrastructure/redis/redis.service'; import { EventPublisherService } from '../../../infrastructure/kafka/event-publisher.service'; +import { MetricsService } from '../../../infrastructure/metrics/metrics.service'; import { SessionStartedEvent } from '../../../domain/events/session-started.event'; import { formatToDateKey } from '../../../shared/utils/timezone.util'; @@ -28,9 +29,11 @@ export class RecordEventsHandler implements ICommandHandler private readonly eventLogRepository: IEventLogRepository, private readonly redisService: RedisService, private readonly eventPublisher: EventPublisherService, + private readonly metricsService: MetricsService, ) {} async execute(command: RecordEventsCommand): Promise { + const startTime = Date.now(); const { events } = command; const errors: string[] = []; const validLogs: EventLog[] = []; @@ -78,6 +81,13 @@ export class RecordEventsHandler implements ICommandHandler } } + // 5. 记录 Prometheus 指标 + for (const log of validLogs) { + this.metricsService.recordEvent(log.eventName.value); + } + const durationSeconds = (Date.now() - startTime) / 1000; + this.metricsService.recordEventBatchDuration(durationSeconds); + return { accepted: validLogs.length, failed: events.length - validLogs.length, diff --git a/backend/services/presence-service/src/application/commands/record-heartbeat/record-heartbeat.handler.ts b/backend/services/presence-service/src/application/commands/record-heartbeat/record-heartbeat.handler.ts index dc942b35..7670703f 100644 --- a/backend/services/presence-service/src/application/commands/record-heartbeat/record-heartbeat.handler.ts +++ b/backend/services/presence-service/src/application/commands/record-heartbeat/record-heartbeat.handler.ts @@ -3,6 +3,7 @@ import { Injectable } from '@nestjs/common'; import { RecordHeartbeatCommand } from './record-heartbeat.command'; import { PresenceRedisRepository } from '../../../infrastructure/redis/presence-redis.repository'; import { EventPublisherService } from '../../../infrastructure/kafka/event-publisher.service'; +import { MetricsService } from '../../../infrastructure/metrics/metrics.service'; import { HeartbeatReceivedEvent } from '../../../domain/events/heartbeat-received.event'; export interface RecordHeartbeatResult { @@ -16,9 +17,11 @@ export class RecordHeartbeatHandler implements ICommandHandler { + const startTime = Date.now(); const { userId, installId, appVersion, clientTs } = command; const now = Math.floor(Date.now() / 1000); @@ -31,6 +34,10 @@ export class RecordHeartbeatHandler implements ICommandHandler { + try { + const now = Math.floor(Date.now() / 1000); + const threshold = now - this.presenceWindowSeconds; + const count = await this.presenceRedisRepository.countOnlineUsers(threshold); + + this.metricsService.setOnlineUsers(count); + this.logger.debug(`Online users metric updated: ${count}`); + } catch (error) { + this.logger.error('Failed to collect online users metric', error); + } + } + + /** + * 每 5 分钟更新 DAU 指标 + */ + @Cron(CronExpression.EVERY_5_MINUTES) + async collectDauMetric(): Promise { + try { + const today = new Date(); + const dateStr = format(today, 'yyyy-MM-dd'); + + const stats = await this.dauRepository.findByDay(today); + if (stats) { + this.metricsService.setDau(dateStr, stats.dauCount); + this.logger.debug(`DAU metric updated: ${dateStr} = ${stats.dauCount}`); + } + } catch (error) { + this.logger.error('Failed to collect DAU metric', error); + } + } + + /** + * 启动时立即收集一次 + */ + async onModuleInit(): Promise { + await this.collectOnlineUsersMetric(); + await this.collectDauMetric(); + } +} diff --git a/backend/services/presence-service/src/infrastructure/metrics/metrics.controller.ts b/backend/services/presence-service/src/infrastructure/metrics/metrics.controller.ts new file mode 100644 index 00000000..a6f759b6 --- /dev/null +++ b/backend/services/presence-service/src/infrastructure/metrics/metrics.controller.ts @@ -0,0 +1,19 @@ +import { Controller, Get, Header, Res } from '@nestjs/common'; +import { ApiTags, ApiOperation, ApiExcludeEndpoint } from '@nestjs/swagger'; +import { Response } from 'express'; +import { MetricsService } from './metrics.service'; + +@ApiTags('Metrics') +@Controller('metrics') +export class MetricsController { + constructor(private readonly metricsService: MetricsService) {} + + @Get() + @ApiExcludeEndpoint() // 不在 Swagger 文档中显示 + @ApiOperation({ summary: 'Prometheus 指标端点' }) + async getMetrics(@Res() res: Response): Promise { + const metrics = await this.metricsService.getMetrics(); + res.header('Content-Type', this.metricsService.getContentType()); + res.send(metrics); + } +} diff --git a/backend/services/presence-service/src/infrastructure/metrics/metrics.module.ts b/backend/services/presence-service/src/infrastructure/metrics/metrics.module.ts new file mode 100644 index 00000000..271b9389 --- /dev/null +++ b/backend/services/presence-service/src/infrastructure/metrics/metrics.module.ts @@ -0,0 +1,12 @@ +import { Module, Global } from '@nestjs/common'; +import { MetricsService } from './metrics.service'; +import { MetricsController } from './metrics.controller'; +import { MetricsCollectorService } from './metrics-collector.service'; + +@Global() +@Module({ + controllers: [MetricsController], + providers: [MetricsService, MetricsCollectorService], + exports: [MetricsService], +}) +export class MetricsModule {} diff --git a/backend/services/presence-service/src/infrastructure/metrics/metrics.service.ts b/backend/services/presence-service/src/infrastructure/metrics/metrics.service.ts new file mode 100644 index 00000000..54172405 --- /dev/null +++ b/backend/services/presence-service/src/infrastructure/metrics/metrics.service.ts @@ -0,0 +1,161 @@ +import { Injectable, OnModuleInit } from '@nestjs/common'; +import { Registry, Gauge, Counter, Histogram, collectDefaultMetrics } from 'prom-client'; + +@Injectable() +export class MetricsService implements OnModuleInit { + private readonly registry: Registry; + + // ============ Gauges (当前值) ============ + + /** 当前在线人数 */ + public readonly onlineUsersGauge: Gauge; + + /** 今日 DAU */ + public readonly dauGauge: Gauge; + + /** 事件队列待处理数 */ + public readonly eventQueueSizeGauge: Gauge; + + // ============ Counters (累计值) ============ + + /** 心跳总数 */ + public readonly heartbeatTotal: Counter; + + /** 事件上报总数 */ + public readonly eventsReceivedTotal: Counter; + + /** 会话开始总数 */ + public readonly sessionStartTotal: Counter; + + /** 会话结束总数 */ + public readonly sessionEndTotal: Counter; + + // ============ Histograms (分布) ============ + + /** 心跳处理时间 */ + public readonly heartbeatDuration: Histogram; + + /** 事件批量上传处理时间 */ + public readonly eventBatchDuration: Histogram; + + constructor() { + this.registry = new Registry(); + + // 默认指标 (CPU, 内存, GC 等) + collectDefaultMetrics({ register: this.registry, prefix: 'presence_' }); + + // ============ 定义 Gauges ============ + + this.onlineUsersGauge = new Gauge({ + name: 'presence_online_users_total', + help: 'Current number of online users', + registers: [this.registry], + }); + + this.dauGauge = new Gauge({ + name: 'presence_dau_total', + help: 'Daily Active Users count', + labelNames: ['date'], + registers: [this.registry], + }); + + this.eventQueueSizeGauge = new Gauge({ + name: 'presence_event_queue_size', + help: 'Number of events pending in queue', + registers: [this.registry], + }); + + // ============ 定义 Counters ============ + + this.heartbeatTotal = new Counter({ + name: 'presence_heartbeat_total', + help: 'Total number of heartbeats received', + labelNames: ['app_version'], + registers: [this.registry], + }); + + this.eventsReceivedTotal = new Counter({ + name: 'presence_events_received_total', + help: 'Total number of telemetry events received', + labelNames: ['event_name'], + registers: [this.registry], + }); + + this.sessionStartTotal = new Counter({ + name: 'presence_session_start_total', + help: 'Total number of session starts (for DAU)', + registers: [this.registry], + }); + + this.sessionEndTotal = new Counter({ + name: 'presence_session_end_total', + help: 'Total number of session ends', + registers: [this.registry], + }); + + // ============ 定义 Histograms ============ + + this.heartbeatDuration = new Histogram({ + name: 'presence_heartbeat_duration_seconds', + help: 'Heartbeat processing duration in seconds', + buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1], + registers: [this.registry], + }); + + this.eventBatchDuration = new Histogram({ + name: 'presence_event_batch_duration_seconds', + help: 'Event batch processing duration in seconds', + buckets: [0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5], + registers: [this.registry], + }); + } + + onModuleInit() { + // 初始化时可以加载一些初始值 + } + + /** 获取 Prometheus 格式的指标 */ + async getMetrics(): Promise { + return this.registry.metrics(); + } + + /** 获取 Content-Type */ + getContentType(): string { + return this.registry.contentType; + } + + // ============ 便捷方法 ============ + + /** 更新在线人数 */ + setOnlineUsers(count: number): void { + this.onlineUsersGauge.set(count); + } + + /** 更新 DAU */ + setDau(date: string, count: number): void { + this.dauGauge.labels(date).set(count); + } + + /** 记录心跳 */ + recordHeartbeat(appVersion: string, durationSeconds: number): void { + this.heartbeatTotal.labels(appVersion).inc(); + this.heartbeatDuration.observe(durationSeconds); + } + + /** 记录事件 */ + recordEvent(eventName: string): void { + this.eventsReceivedTotal.labels(eventName).inc(); + + // 特殊处理会话事件 + if (eventName === 'app_session_start') { + this.sessionStartTotal.inc(); + } else if (eventName === 'app_session_end') { + this.sessionEndTotal.inc(); + } + } + + /** 记录批量事件处理时间 */ + recordEventBatchDuration(durationSeconds: number): void { + this.eventBatchDuration.observe(durationSeconds); + } +}