22 KiB
22 KiB
MPC Party Service 部署文档
概述
本文档描述 MPC Party Service 的部署架构、部署流程和运维指南。
部署架构
生产环境架构
┌─────────────────┐
│ Load Balancer │
│ (Nginx/ALB) │
└────────┬────────┘
│
┌────────────────────────┼────────────────────────┐
│ │ │
┌───────▼───────┐ ┌────────▼───────┐ ┌────────▼───────┐
│ MPC Service │ │ MPC Service │ │ MPC Service │
│ Node 1 │ │ Node 2 │ │ Node 3 │
│ (Party 1) │ │ (Party 2) │ │ (Party 3) │
└───────┬───────┘ └────────┬───────┘ └────────┬───────┘
│ │ │
└────────────────────────┼────────────────────────┘
│
┌────────────────────────┼────────────────────────┐
│ │ │
┌───────▼───────┐ ┌────────▼───────┐ ┌────────▼───────┐
│ MySQL │ │ Redis │ │ Kafka │
│ (Primary) │ │ (Cluster) │ │ (Cluster) │
└───────────────┘ └────────────────┘ └────────────────┘
容器化部署
┌─────────────────────────────────────────────────────────────────┐
│ Kubernetes Cluster │
├─────────────────────────────────────────────────────────────────┤
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Namespace: mpc-system │ │
│ ├─────────────────────────────────────────────────────────┤ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
│ │ │ mpc-party-1 │ │ mpc-party-2 │ │ mpc-party-3 │ │ │
│ │ │ Deployment │ │ Deployment │ │ Deployment │ │ │
│ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
│ │ │ │
│ │ ┌─────────────────────────────────────────────────┐ │ │
│ │ │ Shared Services │ │ │
│ │ │ - ConfigMap - Secrets │ │ │
│ │ │ - PVC (Logs) - ServiceAccount │ │ │
│ │ └─────────────────────────────────────────────────┘ │ │
│ └─────────────────────────────────────────────────────────┘ │
│ │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Infrastructure Services │ │
│ │ - MySQL StatefulSet │ │
│ │ - Redis StatefulSet │ │
│ │ - Kafka StatefulSet │ │
│ └─────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘
Docker 部署
Dockerfile
# Dockerfile
FROM node:18-alpine AS builder
WORKDIR /app
# 安装依赖
COPY package*.json ./
RUN npm ci --only=production
# 复制源代码
COPY . .
# 生成 Prisma Client
RUN npx prisma generate
# 构建
RUN npm run build
# 生产镜像
FROM node:18-alpine AS production
WORKDIR /app
# 复制构建产物
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/package*.json ./
COPY --from=builder /app/prisma ./prisma
# 创建非 root 用户
RUN addgroup -g 1001 -S nodejs && \
adduser -S nodejs -u 1001 -G nodejs
USER nodejs
# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:3006/api/v1/mpc-party/health || exit 1
# 启动服务
CMD ["node", "dist/main.js"]
EXPOSE 3006
Docker Compose
# docker-compose.yml
version: '3.8'
services:
mpc-party-service:
build: .
ports:
- "3006:3006"
environment:
- NODE_ENV=production
- APP_PORT=3006
- DATABASE_URL=mysql://mpc:password@mysql:3306/mpc_service
- REDIS_HOST=redis
- REDIS_PORT=6379
- JWT_SECRET=${JWT_SECRET}
- SHARE_MASTER_KEY=${SHARE_MASTER_KEY}
- MPC_PARTY_ID=party-server-1
- MPC_COORDINATOR_URL=http://coordinator:50051
- MPC_MESSAGE_ROUTER_WS_URL=ws://message-router:50052
- KAFKA_BROKERS=kafka:9092
- KAFKA_ENABLED=true
depends_on:
mysql:
condition: service_healthy
redis:
condition: service_healthy
networks:
- mpc-network
restart: unless-stopped
mysql:
image: mysql:8.0
environment:
- MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD}
- MYSQL_DATABASE=mpc_service
- MYSQL_USER=mpc
- MYSQL_PASSWORD=${MYSQL_PASSWORD}
volumes:
- mysql-data:/var/lib/mysql
healthcheck:
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
interval: 10s
timeout: 5s
retries: 5
networks:
- mpc-network
redis:
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis-data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
networks:
- mpc-network
kafka:
image: bitnami/kafka:3.5
environment:
- KAFKA_CFG_NODE_ID=0
- KAFKA_CFG_PROCESS_ROLES=controller,broker
- KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093
- KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT
- KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@kafka:9093
- KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER
volumes:
- kafka-data:/bitnami/kafka
networks:
- mpc-network
volumes:
mysql-data:
redis-data:
kafka-data:
networks:
mpc-network:
driver: bridge
构建和运行
# 构建镜像
docker build -t mpc-party-service:latest .
# 运行 Docker Compose
docker-compose up -d
# 查看日志
docker-compose logs -f mpc-party-service
# 停止服务
docker-compose down
Kubernetes 部署
ConfigMap
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: mpc-party-config
namespace: mpc-system
data:
NODE_ENV: "production"
APP_PORT: "3006"
API_PREFIX: "api/v1"
REDIS_PORT: "6379"
KAFKA_ENABLED: "true"
Secrets
# k8s/secrets.yaml
apiVersion: v1
kind: Secret
metadata:
name: mpc-party-secrets
namespace: mpc-system
type: Opaque
data:
JWT_SECRET: <base64-encoded-secret>
SHARE_MASTER_KEY: <base64-encoded-key>
MYSQL_PASSWORD: <base64-encoded-password>
Deployment
# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: mpc-party-service
namespace: mpc-system
labels:
app: mpc-party-service
spec:
replicas: 3
selector:
matchLabels:
app: mpc-party-service
template:
metadata:
labels:
app: mpc-party-service
spec:
serviceAccountName: mpc-party-sa
containers:
- name: mpc-party-service
image: your-registry/mpc-party-service:latest
imagePullPolicy: Always
ports:
- containerPort: 3006
protocol: TCP
envFrom:
- configMapRef:
name: mpc-party-config
env:
- name: MPC_PARTY_ID
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: mpc-party-secrets
key: DATABASE_URL
- name: JWT_SECRET
valueFrom:
secretKeyRef:
name: mpc-party-secrets
key: JWT_SECRET
- name: SHARE_MASTER_KEY
valueFrom:
secretKeyRef:
name: mpc-party-secrets
key: SHARE_MASTER_KEY
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
cpu: "2000m"
memory: "2Gi"
livenessProbe:
httpGet:
path: /api/v1/mpc-party/health
port: 3006
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /api/v1/mpc-party/health
port: 3006
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
volumeMounts:
- name: logs
mountPath: /app/logs
volumes:
- name: logs
emptyDir: {}
Service
# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
name: mpc-party-service
namespace: mpc-system
spec:
type: ClusterIP
selector:
app: mpc-party-service
ports:
- port: 3006
targetPort: 3006
protocol: TCP
Ingress
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: mpc-party-ingress
namespace: mpc-system
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
spec:
ingressClassName: nginx
tls:
- hosts:
- mpc-api.example.com
secretName: mpc-tls-secret
rules:
- host: mpc-api.example.com
http:
paths:
- path: /api/v1/mpc-party
pathType: Prefix
backend:
service:
name: mpc-party-service
port:
number: 3006
HPA (Horizontal Pod Autoscaler)
# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: mpc-party-hpa
namespace: mpc-system
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: mpc-party-service
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
部署命令
# 创建命名空间
kubectl create namespace mpc-system
# 应用配置
kubectl apply -f k8s/configmap.yaml
kubectl apply -f k8s/secrets.yaml
kubectl apply -f k8s/deployment.yaml
kubectl apply -f k8s/service.yaml
kubectl apply -f k8s/ingress.yaml
kubectl apply -f k8s/hpa.yaml
# 检查部署状态
kubectl get pods -n mpc-system
kubectl get svc -n mpc-system
kubectl get ingress -n mpc-system
# 查看日志
kubectl logs -f deployment/mpc-party-service -n mpc-system
# 扩容/缩容
kubectl scale deployment mpc-party-service --replicas=5 -n mpc-system
环境配置
开发环境
NODE_ENV=development
APP_PORT=3006
LOG_LEVEL=debug
DATABASE_URL=mysql://root:password@localhost:3306/mpc_service_dev
REDIS_HOST=localhost
REDIS_PORT=6379
KAFKA_ENABLED=false
测试环境
NODE_ENV=test
APP_PORT=3006
LOG_LEVEL=info
DATABASE_URL=mysql://mpc:password@mysql-test:3306/mpc_service_test
REDIS_HOST=redis-test
REDIS_PORT=6379
KAFKA_ENABLED=true
KAFKA_BROKERS=kafka-test:9092
生产环境
NODE_ENV=production
APP_PORT=3006
LOG_LEVEL=warn
DATABASE_URL=mysql://mpc:${DB_PASSWORD}@mysql-prod:3306/mpc_service
REDIS_HOST=redis-prod
REDIS_PORT=6379
REDIS_PASSWORD=${REDIS_PASSWORD}
KAFKA_ENABLED=true
KAFKA_BROKERS=kafka-prod-1:9092,kafka-prod-2:9092,kafka-prod-3:9092
# 安全配置
JWT_SECRET=${JWT_SECRET}
SHARE_MASTER_KEY=${SHARE_MASTER_KEY}
数据库迁移
Prisma 迁移
# 生产环境迁移
npx prisma migrate deploy
# 开发环境迁移
npx prisma migrate dev
# 重置数据库(仅开发环境)
npx prisma migrate reset
迁移策略
-
零停机迁移
- 使用蓝绿部署或金丝雀发布
- 确保迁移向后兼容
-
回滚计划
- 保留迁移历史
- 准备回滚脚本
# 回滚到特定版本
npx prisma migrate resolve --rolled-back <migration-name>
监控和告警
Prometheus 指标
# prometheus/mpc-service-rules.yaml
groups:
- name: mpc-service
rules:
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: critical
annotations:
summary: High error rate detected
- alert: HighLatency
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 5m
labels:
severity: warning
annotations:
summary: High latency detected
- alert: ServiceDown
expr: up{job="mpc-party-service"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: MPC Party Service is down
Grafana Dashboard
关键监控指标:
- 请求速率和延迟
- 错误率
- CPU 和内存使用率
- 活跃连接数
- MPC 操作统计
日志聚合
使用 ELK Stack 或 Loki 进行日志聚合:
# fluentd 配置
<source>
@type tail
path /app/logs/*.log
pos_file /var/log/fluentd/mpc-service.log.pos
tag mpc.service
<parse>
@type json
</parse>
</source>
<match mpc.**>
@type elasticsearch
host elasticsearch
port 9200
logstash_format true
logstash_prefix mpc-service
</match>
安全配置
TLS 配置
# 使用 cert-manager 自动管理证书
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: mpc-tls
namespace: mpc-system
spec:
secretName: mpc-tls-secret
issuerRef:
name: letsencrypt-prod
kind: ClusterIssuer
dnsNames:
- mpc-api.example.com
网络策略
# k8s/network-policy.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: mpc-party-network-policy
namespace: mpc-system
spec:
podSelector:
matchLabels:
app: mpc-party-service
policyTypes:
- Ingress
- Egress
ingress:
- from:
- namespaceSelector:
matchLabels:
name: ingress-nginx
ports:
- protocol: TCP
port: 3006
egress:
- to:
- podSelector:
matchLabels:
app: mysql
ports:
- protocol: TCP
port: 3306
- to:
- podSelector:
matchLabels:
app: redis
ports:
- protocol: TCP
port: 6379
- to:
- podSelector:
matchLabels:
app: kafka
ports:
- protocol: TCP
port: 9092
Secret 管理
推荐使用:
- HashiCorp Vault
- AWS Secrets Manager
- Kubernetes External Secrets
# 使用 External Secrets Operator
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: mpc-party-secrets
namespace: mpc-system
spec:
refreshInterval: 1h
secretStoreRef:
kind: ClusterSecretStore
name: vault-backend
target:
name: mpc-party-secrets
creationPolicy: Owner
data:
- secretKey: JWT_SECRET
remoteRef:
key: mpc-service/jwt-secret
- secretKey: SHARE_MASTER_KEY
remoteRef:
key: mpc-service/share-master-key
备份和恢复
数据库备份
# MySQL 备份
mysqldump -h mysql-host -u mpc -p mpc_service > backup_$(date +%Y%m%d_%H%M%S).sql
# 压缩备份
gzip backup_*.sql
# 上传到 S3
aws s3 cp backup_*.sql.gz s3://your-bucket/backups/mpc-service/
自动备份 CronJob
# k8s/backup-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: mysql-backup
namespace: mpc-system
spec:
schedule: "0 2 * * *" # 每天凌晨 2 点
jobTemplate:
spec:
template:
spec:
containers:
- name: backup
image: mysql:8.0
command:
- /bin/bash
- -c
- |
mysqldump -h mysql -u mpc -p${MYSQL_PASSWORD} mpc_service | \
gzip > /backup/mpc_$(date +%Y%m%d_%H%M%S).sql.gz
envFrom:
- secretRef:
name: mpc-party-secrets
volumeMounts:
- name: backup-volume
mountPath: /backup
restartPolicy: OnFailure
volumes:
- name: backup-volume
persistentVolumeClaim:
claimName: backup-pvc
恢复流程
# 1. 停止服务
kubectl scale deployment mpc-party-service --replicas=0 -n mpc-system
# 2. 恢复数据库
gunzip -c backup_20240115_020000.sql.gz | mysql -h mysql-host -u mpc -p mpc_service
# 3. 启动服务
kubectl scale deployment mpc-party-service --replicas=3 -n mpc-system
# 4. 验证服务
curl https://mpc-api.example.com/api/v1/mpc-party/health
故障排除
常见问题
1. Pod 无法启动
# 查看 Pod 事件
kubectl describe pod <pod-name> -n mpc-system
# 查看日志
kubectl logs <pod-name> -n mpc-system --previous
2. 数据库连接失败
# 检查数据库连接
kubectl exec -it <pod-name> -n mpc-system -- \
mysql -h mysql -u mpc -p -e "SELECT 1"
3. Redis 连接失败
# 检查 Redis 连接
kubectl exec -it <pod-name> -n mpc-system -- \
redis-cli -h redis ping
4. 服务不可达
# 检查 Service
kubectl get svc -n mpc-system
kubectl get endpoints mpc-party-service -n mpc-system
# 检查 Ingress
kubectl describe ingress mpc-party-ingress -n mpc-system
健康检查端点
# 检查服务健康
curl -v https://mpc-api.example.com/api/v1/mpc-party/health
日志查询
# 查看实时日志
kubectl logs -f deployment/mpc-party-service -n mpc-system
# 查看特定时间范围的日志
kubectl logs deployment/mpc-party-service -n mpc-system --since=1h
# 搜索错误日志
kubectl logs deployment/mpc-party-service -n mpc-system | grep -i error
版本发布流程
1. 构建新版本
# 打标签
git tag -a v1.2.3 -m "Release v1.2.3"
git push origin v1.2.3
# 构建镜像
docker build -t your-registry/mpc-party-service:v1.2.3 .
docker push your-registry/mpc-party-service:v1.2.3
2. 滚动更新
# 更新镜像
kubectl set image deployment/mpc-party-service \
mpc-party-service=your-registry/mpc-party-service:v1.2.3 \
-n mpc-system
# 监控更新状态
kubectl rollout status deployment/mpc-party-service -n mpc-system
3. 回滚
# 查看历史
kubectl rollout history deployment/mpc-party-service -n mpc-system
# 回滚到上一版本
kubectl rollout undo deployment/mpc-party-service -n mpc-system
# 回滚到特定版本
kubectl rollout undo deployment/mpc-party-service \
--to-revision=2 -n mpc-system
联系与支持
- 技术支持: tech@example.com
- 紧急问题: oncall@example.com
- 文档: https://docs.example.com/mpc-service