21 KiB
21 KiB
部署指南
1. 部署架构
1.1 生产环境架构
┌─────────────────┐
│ Load Balancer │
│ (Nginx/ALB) │
└────────┬────────┘
│
┌───────────────────┼───────────────────┐
│ │ │
▼ ▼ ▼
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ Reporting │ │ Reporting │ │ Reporting │
│ Service #1 │ │ Service #2 │ │ Service #3 │
└────────┬────────┘ └────────┬────────┘ └────────┬────────┘
│ │ │
└───────────────────┼───────────────────┘
│
┌──────────────────┼──────────────────┐
│ │ │
▼ ▼ ▼
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ PostgreSQL │ │ Redis │ │ S3/MinIO │
│ (Primary) │ │ Cluster │ │ (Exports) │
└─────────────────┘ └─────────────────┘ └─────────────────┘
1.2 Kubernetes 部署架构
┌─────────────────────────────────────────────────────────────┐
│ Kubernetes Cluster │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ Namespace: rwadurian │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌────────────┐ │ │
│ │ │ Ingress │ │ Service │ │ ConfigMap │ │ │
│ │ │ Controller │ │ (ClusterIP)│ │ & Secrets │ │ │
│ │ └──────┬──────┘ └──────┬──────┘ └────────────┘ │ │
│ │ │ │ │ │
│ │ ▼ ▼ │ │
│ │ ┌─────────────────────────────────────────────┐ │ │
│ │ │ Deployment │ │ │
│ │ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │ │
│ │ │ │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ │ │ │
│ │ │ └─────────┘ └─────────┘ └─────────┘ │ │ │
│ │ └─────────────────────────────────────────────┘ │ │
│ └─────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
2. Docker 部署
2.1 生产 Dockerfile
# Dockerfile
# Build stage
FROM node:20-alpine AS builder
WORKDIR /app
# 安装构建依赖
RUN apk add --no-cache openssl openssl-dev
# 复制依赖文件
COPY package*.json ./
COPY prisma ./prisma/
# 安装依赖
RUN npm ci --only=production && npm cache clean --force
# 生成 Prisma Client
RUN npx prisma generate
# 复制源代码
COPY . .
# 构建应用
RUN npm run build
# Production stage
FROM node:20-alpine AS production
WORKDIR /app
# 安装运行时依赖
RUN apk add --no-cache openssl dumb-init
# 创建非 root 用户
RUN addgroup -g 1001 -S nodejs && \
adduser -S nestjs -u 1001
# 从构建阶段复制必要文件
COPY --from=builder --chown=nestjs:nodejs /app/dist ./dist
COPY --from=builder --chown=nestjs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nestjs:nodejs /app/prisma ./prisma
COPY --from=builder --chown=nestjs:nodejs /app/package*.json ./
# 切换到非 root 用户
USER nestjs
# 暴露端口
EXPOSE 3000
# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:3000/api/v1/health || exit 1
# 使用 dumb-init 启动
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
CMD ["node", "dist/main.js"]
2.2 Docker Compose (生产)
# docker-compose.yml
version: '3.8'
services:
reporting-service:
build:
context: .
dockerfile: Dockerfile
container_name: reporting-service
restart: unless-stopped
ports:
- "3000:3000"
environment:
NODE_ENV: production
PORT: 3000
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}?schema=public
REDIS_HOST: redis
REDIS_PORT: 6379
JWT_SECRET: ${JWT_SECRET}
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
networks:
- rwadurian-network
deploy:
resources:
limits:
cpus: '1'
memory: 512M
reservations:
cpus: '0.5'
memory: 256M
postgres:
image: postgres:15-alpine
container_name: reporting-postgres
restart: unless-stopped
environment:
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_DB: ${DB_NAME}
volumes:
- postgres-data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
interval: 10s
timeout: 5s
retries: 5
networks:
- rwadurian-network
redis:
image: redis:7-alpine
container_name: reporting-redis
restart: unless-stopped
command: redis-server --appendonly yes
volumes:
- redis-data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
networks:
- rwadurian-network
volumes:
postgres-data:
redis-data:
networks:
rwadurian-network:
driver: bridge
2.3 构建和运行
# 构建镜像
docker build -t reporting-service:latest .
# 运行容器
docker compose up -d
# 查看日志
docker compose logs -f reporting-service
# 检查健康状态
docker compose ps
3. Kubernetes 部署
3.1 Namespace
# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: rwadurian
labels:
app.kubernetes.io/name: rwadurian
3.2 ConfigMap
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: reporting-service-config
namespace: rwadurian
data:
NODE_ENV: "production"
PORT: "3000"
REDIS_HOST: "redis-service"
REDIS_PORT: "6379"
3.3 Secret
# k8s/secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: reporting-service-secret
namespace: rwadurian
type: Opaque
stringData:
DATABASE_URL: "postgresql://user:password@postgres-service:5432/rwadurian_reporting?schema=public"
JWT_SECRET: "your-super-secret-jwt-key"
3.4 Deployment
# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: reporting-service
namespace: rwadurian
labels:
app: reporting-service
spec:
replicas: 3
selector:
matchLabels:
app: reporting-service
template:
metadata:
labels:
app: reporting-service
spec:
containers:
- name: reporting-service
image: reporting-service:latest
imagePullPolicy: Always
ports:
- containerPort: 3000
name: http
envFrom:
- configMapRef:
name: reporting-service-config
- secretRef:
name: reporting-service-secret
resources:
requests:
cpu: "250m"
memory: "256Mi"
limits:
cpu: "1000m"
memory: "512Mi"
livenessProbe:
httpGet:
path: /api/v1/health
port: http
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /api/v1/health/ready
port: http
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 10"]
terminationGracePeriodSeconds: 30
3.5 Service
# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
name: reporting-service
namespace: rwadurian
labels:
app: reporting-service
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 3000
protocol: TCP
name: http
selector:
app: reporting-service
3.6 Ingress
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: reporting-service-ingress
namespace: rwadurian
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "50m"
spec:
tls:
- hosts:
- reporting.rwadurian.com
secretName: reporting-tls-secret
rules:
- host: reporting.rwadurian.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: reporting-service
port:
number: 80
3.7 HPA (Horizontal Pod Autoscaler)
# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: reporting-service-hpa
namespace: rwadurian
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: reporting-service
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
3.8 部署命令
# 创建命名空间
kubectl apply -f k8s/namespace.yaml
# 应用配置
kubectl apply -f k8s/configmap.yaml
kubectl apply -f k8s/secret.yaml
# 部署应用
kubectl apply -f k8s/deployment.yaml
kubectl apply -f k8s/service.yaml
kubectl apply -f k8s/ingress.yaml
kubectl apply -f k8s/hpa.yaml
# 检查状态
kubectl get pods -n rwadurian
kubectl get svc -n rwadurian
kubectl get ingress -n rwadurian
# 查看日志
kubectl logs -f deployment/reporting-service -n rwadurian
# 扩缩容
kubectl scale deployment reporting-service --replicas=5 -n rwadurian
4. 数据库迁移
4.1 使用 Prisma Migrate
# 创建迁移
npx prisma migrate dev --name init
# 应用迁移 (生产环境)
npx prisma migrate deploy
# 重置数据库 (开发环境)
npx prisma migrate reset
4.2 迁移脚本
#!/bin/bash
# scripts/migrate.sh
set -e
echo "Running database migrations..."
# 等待数据库就绪
until pg_isready -h $DB_HOST -p $DB_PORT -U $DB_USER; do
echo "Waiting for database..."
sleep 2
done
# 运行迁移
npx prisma migrate deploy
echo "Migrations completed successfully!"
4.3 Kubernetes Job
# k8s/migration-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: reporting-service-migration
namespace: rwadurian
spec:
ttlSecondsAfterFinished: 100
template:
spec:
containers:
- name: migration
image: reporting-service:latest
command: ["npx", "prisma", "migrate", "deploy"]
envFrom:
- secretRef:
name: reporting-service-secret
restartPolicy: Never
backoffLimit: 3
5. CI/CD 流程
5.1 GitHub Actions
# .github/workflows/deploy.yml
name: Deploy
on:
push:
branches: [main]
tags: ['v*']
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}/reporting-service
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'npm'
- run: npm ci
- run: npx prisma generate
- run: npm test
build:
needs: test
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
image-tag: ${{ steps.meta.outputs.tags }}
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=tag
type=sha,prefix={{branch}}-
- name: Build and push
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
deploy:
needs: build
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v4
- name: Set up kubectl
uses: azure/setup-kubectl@v3
- name: Configure kubeconfig
run: |
mkdir -p ~/.kube
echo "${{ secrets.KUBE_CONFIG }}" | base64 -d > ~/.kube/config
- name: Update deployment
run: |
kubectl set image deployment/reporting-service \
reporting-service=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:main-${{ github.sha }} \
-n rwadurian
kubectl rollout status deployment/reporting-service -n rwadurian
5.2 GitLab CI/CD
# .gitlab-ci.yml
stages:
- test
- build
- deploy
variables:
DOCKER_IMAGE: $CI_REGISTRY_IMAGE/reporting-service
test:
stage: test
image: node:20-alpine
script:
- npm ci
- npx prisma generate
- npm test
build:
stage: build
image: docker:24
services:
- docker:24-dind
script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- docker build -t $DOCKER_IMAGE:$CI_COMMIT_SHA .
- docker push $DOCKER_IMAGE:$CI_COMMIT_SHA
only:
- main
- tags
deploy:
stage: deploy
image: bitnami/kubectl:latest
script:
- kubectl set image deployment/reporting-service reporting-service=$DOCKER_IMAGE:$CI_COMMIT_SHA -n rwadurian
- kubectl rollout status deployment/reporting-service -n rwadurian
only:
- main
environment:
name: production
6. 监控和日志
6.1 Prometheus 指标
# k8s/servicemonitor.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: reporting-service
namespace: rwadurian
spec:
selector:
matchLabels:
app: reporting-service
endpoints:
- port: http
path: /metrics
interval: 30s
6.2 日志配置
// src/main.ts
import { WinstonModule } from 'nest-winston';
import * as winston from 'winston';
const app = await NestFactory.create(AppModule, {
logger: WinstonModule.createLogger({
transports: [
new winston.transports.Console({
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json(),
),
}),
],
}),
});
6.3 Grafana 仪表板
关键指标:
- 请求速率 (requests/sec)
- 响应时间 (p50, p95, p99)
- 错误率
- CPU/内存使用率
- 数据库连接池状态
- Redis 缓存命中率
7. 安全配置
7.1 网络策略
# k8s/network-policy.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: reporting-service-network-policy
namespace: rwadurian
spec:
podSelector:
matchLabels:
app: reporting-service
policyTypes:
- Ingress
- Egress
ingress:
- from:
- namespaceSelector:
matchLabels:
name: ingress-nginx
ports:
- protocol: TCP
port: 3000
egress:
- to:
- podSelector:
matchLabels:
app: postgres
ports:
- protocol: TCP
port: 5432
- to:
- podSelector:
matchLabels:
app: redis
ports:
- protocol: TCP
port: 6379
7.2 Pod 安全策略
# k8s/pod-security.yaml
apiVersion: v1
kind: Pod
metadata:
name: reporting-service
spec:
securityContext:
runAsNonRoot: true
runAsUser: 1001
fsGroup: 1001
containers:
- name: reporting-service
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
8. 备份和恢复
8.1 数据库备份
#!/bin/bash
# scripts/backup.sh
BACKUP_DIR="/backups"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="$BACKUP_DIR/reporting_${TIMESTAMP}.sql.gz"
pg_dump -h $DB_HOST -U $DB_USER -d $DB_NAME | gzip > $BACKUP_FILE
# 上传到 S3
aws s3 cp $BACKUP_FILE s3://rwadurian-backups/reporting/
# 清理旧备份 (保留 7 天)
find $BACKUP_DIR -name "*.sql.gz" -mtime +7 -delete
8.2 Kubernetes CronJob
# k8s/backup-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: reporting-db-backup
namespace: rwadurian
spec:
schedule: "0 2 * * *" # 每天凌晨 2 点
jobTemplate:
spec:
template:
spec:
containers:
- name: backup
image: postgres:15-alpine
command: ["/scripts/backup.sh"]
envFrom:
- secretRef:
name: reporting-service-secret
volumeMounts:
- name: backup-scripts
mountPath: /scripts
volumes:
- name: backup-scripts
configMap:
name: backup-scripts
defaultMode: 0755
restartPolicy: OnFailure
9. 故障排除
9.1 常见问题
Q: Pod 启动失败
# 查看 Pod 状态
kubectl describe pod <pod-name> -n rwadurian
# 查看日志
kubectl logs <pod-name> -n rwadurian --previous
Q: 数据库连接失败
# 检查网络策略
kubectl get networkpolicy -n rwadurian
# 测试连接
kubectl exec -it <pod-name> -n rwadurian -- nc -zv postgres-service 5432
Q: 内存不足
# 查看资源使用
kubectl top pod -n rwadurian
# 增加资源限制
kubectl patch deployment reporting-service -n rwadurian \
-p '{"spec":{"template":{"spec":{"containers":[{"name":"reporting-service","resources":{"limits":{"memory":"1Gi"}}}]}}}}'
9.2 回滚
# 查看部署历史
kubectl rollout history deployment/reporting-service -n rwadurian
# 回滚到上一版本
kubectl rollout undo deployment/reporting-service -n rwadurian
# 回滚到指定版本
kubectl rollout undo deployment/reporting-service --to-revision=2 -n rwadurian