# ============================================================================= # RWA Infrastructure - 可观测性与服务治理基础设施 # ============================================================================= # # 模块化设计,可按需启用: # - consul: 服务发现与配置中心 # - jaeger: 分布式链路追踪 # - loki: 日志聚合 # - grafana: 统一可视化仪表盘 # # 使用方法: # ./deploy.sh up # 启动所有组件 # ./deploy.sh up consul # 只启动 Consul # ./deploy.sh up jaeger loki # 启动指定组件 # ./deploy.sh down # 停止所有组件 # # ============================================================================= services: # =========================================================================== # Consul - 服务发现与配置中心 # =========================================================================== # 功能: # - 服务注册与发现 # - 健康检查 # - KV 配置存储 # - 多数据中心支持 # =========================================================================== consul: image: docker.io/hashicorp/consul:1.18 container_name: rwa-consul command: agent -server -bootstrap-expect=1 -ui -client=0.0.0.0 -datacenter=rwa-dc1 environment: TZ: Asia/Shanghai CONSUL_BIND_INTERFACE: eth0 ports: - "${CONSUL_HTTP_PORT:-8500}:8500" # HTTP API + UI - "${CONSUL_DNS_PORT:-8600}:8600/udp" # DNS - "8301:8301" # Serf LAN - "8302:8302" # Serf WAN volumes: - consul_data:/consul/data - ./consul/config:/consul/config:ro healthcheck: test: ["CMD", "consul", "members"] interval: 10s timeout: 5s retries: 5 restart: unless-stopped networks: - rwa-infra profiles: - consul - full # =========================================================================== # Jaeger - 分布式链路追踪 # =========================================================================== # 功能: # - 请求链路追踪 # - 性能瓶颈分析 # - 服务依赖可视化 # - 错误定位 # =========================================================================== jaeger: image: docker.io/jaegertracing/all-in-one:1.54 container_name: rwa-jaeger environment: TZ: Asia/Shanghai COLLECTOR_ZIPKIN_HOST_PORT: :9411 COLLECTOR_OTLP_ENABLED: true SPAN_STORAGE_TYPE: badger BADGER_EPHEMERAL: false BADGER_DIRECTORY_VALUE: /badger/data BADGER_DIRECTORY_KEY: /badger/key ports: - "${JAEGER_UI_PORT:-16686}:16686" # UI - "6831:6831/udp" # Thrift compact (agent) - "6832:6832/udp" # Thrift binary (agent) - "4317:4317" # OTLP gRPC - "4318:4318" # OTLP HTTP - "14250:14250" # gRPC (collector) - "14268:14268" # HTTP (collector) - "9411:9411" # Zipkin compatible volumes: - jaeger_data:/badger healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:16686 || exit 1"] interval: 10s timeout: 5s retries: 5 restart: unless-stopped networks: - rwa-infra profiles: - jaeger - tracing - full # =========================================================================== # Loki - 日志聚合系统 # =========================================================================== # 功能: # - 日志收集与存储 # - 日志查询 (LogQL) # - 与 Grafana 深度集成 # - 低资源占用 # =========================================================================== loki: image: docker.io/grafana/loki:2.9.4 container_name: rwa-loki command: -config.file=/etc/loki/loki-config.yml ports: - "${LOKI_PORT:-3100}:3100" volumes: - ./loki/loki-config.yml:/etc/loki/loki-config.yml:ro - loki_data:/loki healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:3100/ready || exit 1"] interval: 10s timeout: 5s retries: 5 restart: unless-stopped networks: - rwa-infra profiles: - loki - logging - full # =========================================================================== # Promtail - 日志收集代理 # =========================================================================== # 功能: # - 收集 Docker 容器日志 # - 日志标签化 # - 推送到 Loki # =========================================================================== promtail: image: docker.io/grafana/promtail:2.9.4 container_name: rwa-promtail command: -config.file=/etc/promtail/promtail-config.yml volumes: - ./loki/promtail-config.yml:/etc/promtail/promtail-config.yml:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro - /var/run/docker.sock:/var/run/docker.sock:ro - promtail_positions:/tmp depends_on: loki: condition: service_healthy restart: unless-stopped networks: - rwa-infra profiles: - loki - logging - full # =========================================================================== # Grafana - 统一可视化平台 # =========================================================================== # 功能: # - 多数据源集成 (Prometheus, Loki, Jaeger) # - 自定义仪表盘 # - 告警管理 # - 团队协作 # =========================================================================== grafana: image: docker.io/grafana/grafana:10.3.1 container_name: rwa-grafana environment: - TZ=Asia/Shanghai - GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER:-admin} - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin123} - GF_USERS_ALLOW_SIGN_UP=false # 服务器配置 - GF_SERVER_ROOT_URL=${GRAFANA_ROOT_URL:-http://localhost:3030} - GF_SERVER_SERVE_FROM_SUB_PATH=false # 安全配置 - GF_SECURITY_ALLOW_EMBEDDING=true - GF_SECURITY_COOKIE_SAMESITE=lax # 功能开关 - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tempoSearch tempoBackendSearch # 日志级别 - GF_LOG_LEVEL=${GRAFANA_LOG_LEVEL:-info} ports: - "${GRAFANA_PORT:-3030}:3000" volumes: - grafana_data:/var/lib/grafana - ./grafana/provisioning:/etc/grafana/provisioning:ro healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:3000/api/health || exit 1"] interval: 10s timeout: 5s retries: 5 restart: unless-stopped networks: - rwa-infra profiles: - grafana - full # =========================================================================== # Prometheus - 指标收集 (可选,如果 api-gateway 已有可跳过) # =========================================================================== prometheus: image: docker.io/prom/prometheus:v2.49.1 container_name: rwa-prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=15d' - '--web.enable-lifecycle' - '--web.enable-admin-api' ports: - "${PROMETHEUS_PORT:-9090}:9090" volumes: - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - ./prometheus/rules:/etc/prometheus/rules:ro - prometheus_data:/prometheus healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:9090/-/healthy || exit 1"] interval: 10s timeout: 5s retries: 5 restart: unless-stopped networks: - rwa-infra profiles: - prometheus - metrics - full # ============================================================================= # Volumes - 持久化存储 # ============================================================================= volumes: consul_data: driver: local jaeger_data: driver: local loki_data: driver: local promtail_positions: driver: local grafana_data: driver: local prometheus_data: driver: local # ============================================================================= # Networks # ============================================================================= networks: rwa-infra: driver: bridge name: rwa-infra