rwadurian/backend/services/reporting-service/docs/DEPLOYMENT.md

21 KiB

部署指南

1. 部署架构

1.1 生产环境架构

                         ┌─────────────────┐
                         │  Load Balancer  │
                         │   (Nginx/ALB)   │
                         └────────┬────────┘
                                  │
              ┌───────────────────┼───────────────────┐
              │                   │                   │
              ▼                   ▼                   ▼
    ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
    │   Reporting     │ │   Reporting     │ │   Reporting     │
    │   Service #1    │ │   Service #2    │ │   Service #3    │
    └────────┬────────┘ └────────┬────────┘ └────────┬────────┘
             │                   │                   │
             └───────────────────┼───────────────────┘
                                 │
              ┌──────────────────┼──────────────────┐
              │                  │                  │
              ▼                  ▼                  ▼
    ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
    │   PostgreSQL    │ │     Redis       │ │   S3/MinIO      │
    │   (Primary)     │ │    Cluster      │ │   (Exports)     │
    └─────────────────┘ └─────────────────┘ └─────────────────┘

1.2 Kubernetes 部署架构

┌─────────────────────────────────────────────────────────────┐
│                     Kubernetes Cluster                       │
│  ┌─────────────────────────────────────────────────────┐   │
│  │                    Namespace: rwadurian              │   │
│  │  ┌─────────────┐  ┌─────────────┐  ┌────────────┐  │   │
│  │  │  Ingress    │  │   Service   │  │ ConfigMap  │  │   │
│  │  │  Controller │  │  (ClusterIP)│  │  & Secrets │  │   │
│  │  └──────┬──────┘  └──────┬──────┘  └────────────┘  │   │
│  │         │                │                          │   │
│  │         ▼                ▼                          │   │
│  │  ┌─────────────────────────────────────────────┐   │   │
│  │  │              Deployment                      │   │   │
│  │  │  ┌─────────┐ ┌─────────┐ ┌─────────┐       │   │   │
│  │  │  │  Pod 1  │ │  Pod 2  │ │  Pod 3  │       │   │   │
│  │  │  └─────────┘ └─────────┘ └─────────┘       │   │   │
│  │  └─────────────────────────────────────────────┘   │   │
│  └─────────────────────────────────────────────────────┘   │
└─────────────────────────────────────────────────────────────┘

2. Docker 部署

2.1 生产 Dockerfile

# Dockerfile
# Build stage
FROM node:20-alpine AS builder

WORKDIR /app

# 安装构建依赖
RUN apk add --no-cache openssl openssl-dev

# 复制依赖文件
COPY package*.json ./
COPY prisma ./prisma/

# 安装依赖
RUN npm ci --only=production && npm cache clean --force

# 生成 Prisma Client
RUN npx prisma generate

# 复制源代码
COPY . .

# 构建应用
RUN npm run build

# Production stage
FROM node:20-alpine AS production

WORKDIR /app

# 安装运行时依赖
RUN apk add --no-cache openssl dumb-init

# 创建非 root 用户
RUN addgroup -g 1001 -S nodejs && \
    adduser -S nestjs -u 1001

# 从构建阶段复制必要文件
COPY --from=builder --chown=nestjs:nodejs /app/dist ./dist
COPY --from=builder --chown=nestjs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nestjs:nodejs /app/prisma ./prisma
COPY --from=builder --chown=nestjs:nodejs /app/package*.json ./

# 切换到非 root 用户
USER nestjs

# 暴露端口
EXPOSE 3000

# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
  CMD wget --no-verbose --tries=1 --spider http://localhost:3000/api/v1/health || exit 1

# 使用 dumb-init 启动
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
CMD ["node", "dist/main.js"]

2.2 Docker Compose (生产)

# docker-compose.yml
version: '3.8'

services:
  reporting-service:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: reporting-service
    restart: unless-stopped
    ports:
      - "3000:3000"
    environment:
      NODE_ENV: production
      PORT: 3000
      DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}?schema=public
      REDIS_HOST: redis
      REDIS_PORT: 6379
      JWT_SECRET: ${JWT_SECRET}
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    networks:
      - rwadurian-network
    deploy:
      resources:
        limits:
          cpus: '1'
          memory: 512M
        reservations:
          cpus: '0.5'
          memory: 256M

  postgres:
    image: postgres:15-alpine
    container_name: reporting-postgres
    restart: unless-stopped
    environment:
      POSTGRES_USER: ${DB_USER}
      POSTGRES_PASSWORD: ${DB_PASSWORD}
      POSTGRES_DB: ${DB_NAME}
    volumes:
      - postgres-data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - rwadurian-network

  redis:
    image: redis:7-alpine
    container_name: reporting-redis
    restart: unless-stopped
    command: redis-server --appendonly yes
    volumes:
      - redis-data:/data
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - rwadurian-network

volumes:
  postgres-data:
  redis-data:

networks:
  rwadurian-network:
    driver: bridge

2.3 构建和运行

# 构建镜像
docker build -t reporting-service:latest .

# 运行容器
docker compose up -d

# 查看日志
docker compose logs -f reporting-service

# 检查健康状态
docker compose ps

3. Kubernetes 部署

3.1 Namespace

# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
  name: rwadurian
  labels:
    app.kubernetes.io/name: rwadurian

3.2 ConfigMap

# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: reporting-service-config
  namespace: rwadurian
data:
  NODE_ENV: "production"
  PORT: "3000"
  REDIS_HOST: "redis-service"
  REDIS_PORT: "6379"

3.3 Secret

# k8s/secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: reporting-service-secret
  namespace: rwadurian
type: Opaque
stringData:
  DATABASE_URL: "postgresql://user:password@postgres-service:5432/rwadurian_reporting?schema=public"
  JWT_SECRET: "your-super-secret-jwt-key"

3.4 Deployment

# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: reporting-service
  namespace: rwadurian
  labels:
    app: reporting-service
spec:
  replicas: 3
  selector:
    matchLabels:
      app: reporting-service
  template:
    metadata:
      labels:
        app: reporting-service
    spec:
      containers:
        - name: reporting-service
          image: reporting-service:latest
          imagePullPolicy: Always
          ports:
            - containerPort: 3000
              name: http
          envFrom:
            - configMapRef:
                name: reporting-service-config
            - secretRef:
                name: reporting-service-secret
          resources:
            requests:
              cpu: "250m"
              memory: "256Mi"
            limits:
              cpu: "1000m"
              memory: "512Mi"
          livenessProbe:
            httpGet:
              path: /api/v1/health
              port: http
            initialDelaySeconds: 30
            periodSeconds: 10
            timeoutSeconds: 5
            failureThreshold: 3
          readinessProbe:
            httpGet:
              path: /api/v1/health/ready
              port: http
            initialDelaySeconds: 5
            periodSeconds: 5
            timeoutSeconds: 3
            failureThreshold: 3
          lifecycle:
            preStop:
              exec:
                command: ["/bin/sh", "-c", "sleep 10"]
      terminationGracePeriodSeconds: 30

3.5 Service

# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
  name: reporting-service
  namespace: rwadurian
  labels:
    app: reporting-service
spec:
  type: ClusterIP
  ports:
    - port: 80
      targetPort: 3000
      protocol: TCP
      name: http
  selector:
    app: reporting-service

3.6 Ingress

# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: reporting-service-ingress
  namespace: rwadurian
  annotations:
    kubernetes.io/ingress.class: nginx
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    nginx.ingress.kubernetes.io/proxy-body-size: "50m"
spec:
  tls:
    - hosts:
        - reporting.rwadurian.com
      secretName: reporting-tls-secret
  rules:
    - host: reporting.rwadurian.com
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: reporting-service
                port:
                  number: 80

3.7 HPA (Horizontal Pod Autoscaler)

# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: reporting-service-hpa
  namespace: rwadurian
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: reporting-service
  minReplicas: 2
  maxReplicas: 10
  metrics:
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 70
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: 80

3.8 部署命令

# 创建命名空间
kubectl apply -f k8s/namespace.yaml

# 应用配置
kubectl apply -f k8s/configmap.yaml
kubectl apply -f k8s/secret.yaml

# 部署应用
kubectl apply -f k8s/deployment.yaml
kubectl apply -f k8s/service.yaml
kubectl apply -f k8s/ingress.yaml
kubectl apply -f k8s/hpa.yaml

# 检查状态
kubectl get pods -n rwadurian
kubectl get svc -n rwadurian
kubectl get ingress -n rwadurian

# 查看日志
kubectl logs -f deployment/reporting-service -n rwadurian

# 扩缩容
kubectl scale deployment reporting-service --replicas=5 -n rwadurian

4. 数据库迁移

4.1 使用 Prisma Migrate

# 创建迁移
npx prisma migrate dev --name init

# 应用迁移 (生产环境)
npx prisma migrate deploy

# 重置数据库 (开发环境)
npx prisma migrate reset

4.2 迁移脚本

#!/bin/bash
# scripts/migrate.sh

set -e

echo "Running database migrations..."

# 等待数据库就绪
until pg_isready -h $DB_HOST -p $DB_PORT -U $DB_USER; do
  echo "Waiting for database..."
  sleep 2
done

# 运行迁移
npx prisma migrate deploy

echo "Migrations completed successfully!"

4.3 Kubernetes Job

# k8s/migration-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
  name: reporting-service-migration
  namespace: rwadurian
spec:
  ttlSecondsAfterFinished: 100
  template:
    spec:
      containers:
        - name: migration
          image: reporting-service:latest
          command: ["npx", "prisma", "migrate", "deploy"]
          envFrom:
            - secretRef:
                name: reporting-service-secret
      restartPolicy: Never
  backoffLimit: 3

5. CI/CD 流程

5.1 GitHub Actions

# .github/workflows/deploy.yml
name: Deploy

on:
  push:
    branches: [main]
    tags: ['v*']

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}/reporting-service

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-node@v4
        with:
          node-version: '20'
          cache: 'npm'
      - run: npm ci
      - run: npx prisma generate
      - run: npm test

  build:
    needs: test
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    outputs:
      image-tag: ${{ steps.meta.outputs.tags }}
    steps:
      - uses: actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Log in to Container Registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Extract metadata
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          tags: |
            type=ref,event=branch
            type=ref,event=tag
            type=sha,prefix={{branch}}-            

      - name: Build and push
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=gha
          cache-to: type=gha,mode=max

  deploy:
    needs: build
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/main'
    steps:
      - uses: actions/checkout@v4

      - name: Set up kubectl
        uses: azure/setup-kubectl@v3

      - name: Configure kubeconfig
        run: |
          mkdir -p ~/.kube
          echo "${{ secrets.KUBE_CONFIG }}" | base64 -d > ~/.kube/config          

      - name: Update deployment
        run: |
          kubectl set image deployment/reporting-service \
            reporting-service=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:main-${{ github.sha }} \
            -n rwadurian
          kubectl rollout status deployment/reporting-service -n rwadurian          

5.2 GitLab CI/CD

# .gitlab-ci.yml
stages:
  - test
  - build
  - deploy

variables:
  DOCKER_IMAGE: $CI_REGISTRY_IMAGE/reporting-service

test:
  stage: test
  image: node:20-alpine
  script:
    - npm ci
    - npx prisma generate
    - npm test

build:
  stage: build
  image: docker:24
  services:
    - docker:24-dind
  script:
    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
    - docker build -t $DOCKER_IMAGE:$CI_COMMIT_SHA .
    - docker push $DOCKER_IMAGE:$CI_COMMIT_SHA
  only:
    - main
    - tags

deploy:
  stage: deploy
  image: bitnami/kubectl:latest
  script:
    - kubectl set image deployment/reporting-service reporting-service=$DOCKER_IMAGE:$CI_COMMIT_SHA -n rwadurian
    - kubectl rollout status deployment/reporting-service -n rwadurian
  only:
    - main
  environment:
    name: production

6. 监控和日志

6.1 Prometheus 指标

# k8s/servicemonitor.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: reporting-service
  namespace: rwadurian
spec:
  selector:
    matchLabels:
      app: reporting-service
  endpoints:
    - port: http
      path: /metrics
      interval: 30s

6.2 日志配置

// src/main.ts
import { WinstonModule } from 'nest-winston';
import * as winston from 'winston';

const app = await NestFactory.create(AppModule, {
  logger: WinstonModule.createLogger({
    transports: [
      new winston.transports.Console({
        format: winston.format.combine(
          winston.format.timestamp(),
          winston.format.json(),
        ),
      }),
    ],
  }),
});

6.3 Grafana 仪表板

关键指标:

  • 请求速率 (requests/sec)
  • 响应时间 (p50, p95, p99)
  • 错误率
  • CPU/内存使用率
  • 数据库连接池状态
  • Redis 缓存命中率

7. 安全配置

7.1 网络策略

# k8s/network-policy.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: reporting-service-network-policy
  namespace: rwadurian
spec:
  podSelector:
    matchLabels:
      app: reporting-service
  policyTypes:
    - Ingress
    - Egress
  ingress:
    - from:
        - namespaceSelector:
            matchLabels:
              name: ingress-nginx
      ports:
        - protocol: TCP
          port: 3000
  egress:
    - to:
        - podSelector:
            matchLabels:
              app: postgres
      ports:
        - protocol: TCP
          port: 5432
    - to:
        - podSelector:
            matchLabels:
              app: redis
      ports:
        - protocol: TCP
          port: 6379

7.2 Pod 安全策略

# k8s/pod-security.yaml
apiVersion: v1
kind: Pod
metadata:
  name: reporting-service
spec:
  securityContext:
    runAsNonRoot: true
    runAsUser: 1001
    fsGroup: 1001
  containers:
    - name: reporting-service
      securityContext:
        allowPrivilegeEscalation: false
        readOnlyRootFilesystem: true
        capabilities:
          drop:
            - ALL

8. 备份和恢复

8.1 数据库备份

#!/bin/bash
# scripts/backup.sh

BACKUP_DIR="/backups"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="$BACKUP_DIR/reporting_${TIMESTAMP}.sql.gz"

pg_dump -h $DB_HOST -U $DB_USER -d $DB_NAME | gzip > $BACKUP_FILE

# 上传到 S3
aws s3 cp $BACKUP_FILE s3://rwadurian-backups/reporting/

# 清理旧备份 (保留 7 天)
find $BACKUP_DIR -name "*.sql.gz" -mtime +7 -delete

8.2 Kubernetes CronJob

# k8s/backup-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
  name: reporting-db-backup
  namespace: rwadurian
spec:
  schedule: "0 2 * * *"  # 每天凌晨 2 点
  jobTemplate:
    spec:
      template:
        spec:
          containers:
            - name: backup
              image: postgres:15-alpine
              command: ["/scripts/backup.sh"]
              envFrom:
                - secretRef:
                    name: reporting-service-secret
              volumeMounts:
                - name: backup-scripts
                  mountPath: /scripts
          volumes:
            - name: backup-scripts
              configMap:
                name: backup-scripts
                defaultMode: 0755
          restartPolicy: OnFailure

9. 故障排除

9.1 常见问题

Q: Pod 启动失败

# 查看 Pod 状态
kubectl describe pod <pod-name> -n rwadurian

# 查看日志
kubectl logs <pod-name> -n rwadurian --previous

Q: 数据库连接失败

# 检查网络策略
kubectl get networkpolicy -n rwadurian

# 测试连接
kubectl exec -it <pod-name> -n rwadurian -- nc -zv postgres-service 5432

Q: 内存不足

# 查看资源使用
kubectl top pod -n rwadurian

# 增加资源限制
kubectl patch deployment reporting-service -n rwadurian \
  -p '{"spec":{"template":{"spec":{"containers":[{"name":"reporting-service","resources":{"limits":{"memory":"1Gi"}}}]}}}}'

9.2 回滚

# 查看部署历史
kubectl rollout history deployment/reporting-service -n rwadurian

# 回滚到上一版本
kubectl rollout undo deployment/reporting-service -n rwadurian

# 回滚到指定版本
kubectl rollout undo deployment/reporting-service --to-revision=2 -n rwadurian