rwadurian/backend/infrastructure/docker-compose.yml

258 lines
8.4 KiB
YAML

# =============================================================================
# RWA Infrastructure - 可观测性与服务治理基础设施
# =============================================================================
#
# 模块化设计,可按需启用:
# - consul: 服务发现与配置中心
# - jaeger: 分布式链路追踪
# - loki: 日志聚合
# - grafana: 统一可视化仪表盘
#
# 使用方法:
# ./deploy.sh up # 启动所有组件
# ./deploy.sh up consul # 只启动 Consul
# ./deploy.sh up jaeger loki # 启动指定组件
# ./deploy.sh down # 停止所有组件
#
# =============================================================================
services:
# ===========================================================================
# Consul - 服务发现与配置中心
# ===========================================================================
# 功能:
# - 服务注册与发现
# - 健康检查
# - KV 配置存储
# - 多数据中心支持
# ===========================================================================
consul:
image: docker.io/hashicorp/consul:1.18
container_name: rwa-consul
command: agent -server -bootstrap-expect=1 -ui -client=0.0.0.0 -datacenter=rwa-dc1
environment:
TZ: Asia/Shanghai
CONSUL_BIND_INTERFACE: eth0
ports:
- "${CONSUL_HTTP_PORT:-8500}:8500" # HTTP API + UI
- "${CONSUL_DNS_PORT:-8600}:8600/udp" # DNS
- "8301:8301" # Serf LAN
- "8302:8302" # Serf WAN
volumes:
- consul_data:/consul/data
- ./consul/config:/consul/config:ro
healthcheck:
test: ["CMD", "consul", "members"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- rwa-infra
profiles:
- consul
- full
# ===========================================================================
# Jaeger - 分布式链路追踪
# ===========================================================================
# 功能:
# - 请求链路追踪
# - 性能瓶颈分析
# - 服务依赖可视化
# - 错误定位
# ===========================================================================
jaeger:
image: docker.io/jaegertracing/all-in-one:1.54
container_name: rwa-jaeger
environment:
TZ: Asia/Shanghai
COLLECTOR_ZIPKIN_HOST_PORT: :9411
COLLECTOR_OTLP_ENABLED: true
SPAN_STORAGE_TYPE: badger
BADGER_EPHEMERAL: false
BADGER_DIRECTORY_VALUE: /badger/data
BADGER_DIRECTORY_KEY: /badger/key
ports:
- "${JAEGER_UI_PORT:-16686}:16686" # UI
- "6831:6831/udp" # Thrift compact (agent)
- "6832:6832/udp" # Thrift binary (agent)
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
- "14250:14250" # gRPC (collector)
- "14268:14268" # HTTP (collector)
- "9411:9411" # Zipkin compatible
volumes:
- jaeger_data:/badger
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:16686 || exit 1"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- rwa-infra
profiles:
- jaeger
- tracing
- full
# ===========================================================================
# Loki - 日志聚合系统
# ===========================================================================
# 功能:
# - 日志收集与存储
# - 日志查询 (LogQL)
# - 与 Grafana 深度集成
# - 低资源占用
# ===========================================================================
loki:
image: docker.io/grafana/loki:2.9.4
container_name: rwa-loki
command: -config.file=/etc/loki/loki-config.yml
ports:
- "${LOKI_PORT:-3100}:3100"
volumes:
- ./loki/loki-config.yml:/etc/loki/loki-config.yml:ro
- loki_data:/loki
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:3100/ready || exit 1"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- rwa-infra
profiles:
- loki
- logging
- full
# ===========================================================================
# Promtail - 日志收集代理
# ===========================================================================
# 功能:
# - 收集 Docker 容器日志
# - 日志标签化
# - 推送到 Loki
# ===========================================================================
promtail:
image: docker.io/grafana/promtail:2.9.4
container_name: rwa-promtail
command: -config.file=/etc/promtail/promtail-config.yml
volumes:
- ./loki/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- promtail_positions:/tmp
depends_on:
loki:
condition: service_healthy
restart: unless-stopped
networks:
- rwa-infra
profiles:
- loki
- logging
- full
# ===========================================================================
# Grafana - 统一可视化平台
# ===========================================================================
# 功能:
# - 多数据源集成 (Prometheus, Loki, Jaeger)
# - 自定义仪表盘
# - 告警管理
# - 团队协作
# ===========================================================================
grafana:
image: docker.io/grafana/grafana:10.3.1
container_name: rwa-grafana
environment:
- TZ=Asia/Shanghai
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER:-admin}
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin123}
- GF_USERS_ALLOW_SIGN_UP=false
# 服务器配置
- GF_SERVER_ROOT_URL=${GRAFANA_ROOT_URL:-http://localhost:3030}
- GF_SERVER_SERVE_FROM_SUB_PATH=false
# 安全配置
- GF_SECURITY_ALLOW_EMBEDDING=true
- GF_SECURITY_COOKIE_SAMESITE=lax
# 功能开关
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tempoSearch tempoBackendSearch
# 日志级别
- GF_LOG_LEVEL=${GRAFANA_LOG_LEVEL:-info}
ports:
- "${GRAFANA_PORT:-3030}:3000"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:3000/api/health || exit 1"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- rwa-infra
profiles:
- grafana
- full
# ===========================================================================
# Prometheus - 指标收集 (可选,如果 api-gateway 已有可跳过)
# ===========================================================================
prometheus:
image: docker.io/prom/prometheus:v2.49.1
container_name: rwa-prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=15d'
- '--web.enable-lifecycle'
- '--web.enable-admin-api'
ports:
- "${PROMETHEUS_PORT:-9090}:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/rules:/etc/prometheus/rules:ro
- prometheus_data:/prometheus
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:9090/-/healthy || exit 1"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- rwa-infra
profiles:
- prometheus
- metrics
- full
# =============================================================================
# Volumes - 持久化存储
# =============================================================================
volumes:
consul_data:
driver: local
jaeger_data:
driver: local
loki_data:
driver: local
promtail_positions:
driver: local
grafana_data:
driver: local
prometheus_data:
driver: local
# =============================================================================
# Networks
# =============================================================================
networks:
rwa-infra:
driver: bridge
name: rwa-infra