fix(deploy-mining): 修正 full-reset 步骤顺序避免 CDC offset 重置失败

- 在 migration 之前重置 CDC offsets(因为 migration 会启动容器)
- 停止服务后等待 15 秒让 Kafka consumer 变成 inactive
- 添加重试机制,最多重试 3 次,每次间隔 10 秒
- 步骤从 6 步改为 7 步

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-01-12 00:33:19 -08:00
parent ff27195be2
commit 24412794e6
1 changed files with 44 additions and 31 deletions

View File

@ -724,49 +724,62 @@ full_reset() {
fi
echo ""
log_step "Step 1/6: Stopping 2.0 services..."
log_step "Step 1/7: Stopping 2.0 services..."
for service in "${MINING_SERVICES[@]}"; do
service_stop "$service"
done
log_step "Step 2/6: Dropping 2.0 databases..."
db_drop
log_step "Step 2/7: Waiting for Kafka consumers to become inactive..."
log_info "Waiting 15 seconds for consumer group session timeout..."
sleep 15
log_step "Step 3/6: Creating 2.0 databases..."
db_create
log_step "Step 4/6: Running migrations..."
db_migrate
log_step "Step 5/6: Resetting CDC consumer offsets..."
# Reset all CDC consumer groups
log_step "Step 3/7: Resetting CDC consumer offsets..."
# Reset offsets BEFORE migrations (which may start containers)
for group in "${CDC_CONSUMER_GROUPS[@]}"; do
log_info "Resetting consumer group: $group"
local reset_success=false
local retry_count=0
local max_retries=3
# Try local kafka-consumer-groups.sh first
if command -v kafka-consumer-groups.sh &>/dev/null; then
kafka-consumer-groups.sh --bootstrap-server "$KAFKA_BROKERS" \
--group "$group" \
--reset-offsets \
--to-earliest \
--all-topics \
--execute 2>/dev/null && log_success "CDC offsets reset for $group" && continue
fi
while [ "$reset_success" = false ] && [ $retry_count -lt $max_retries ]; do
# Try docker exec with the correct container name
if docker ps --format '{{.Names}}' 2>/dev/null | grep -q "^${KAFKA_CONTAINER}$"; then
docker exec "$KAFKA_CONTAINER" kafka-consumer-groups --bootstrap-server localhost:9092 \
if docker exec "$KAFKA_CONTAINER" kafka-consumer-groups --bootstrap-server localhost:9092 \
--group "$group" \
--reset-offsets \
--to-earliest \
--all-topics \
--execute 2>&1 && log_success "CDC offsets reset for $group" || log_warn "Could not reset offsets for $group"
--execute 2>&1 | grep -q "NEW-OFFSET"; then
log_success "CDC offsets reset for $group"
reset_success=true
else
log_warn "Kafka container '$KAFKA_CONTAINER' not found. Manual offset reset may be needed."
retry_count=$((retry_count + 1))
if [ $retry_count -lt $max_retries ]; then
log_warn "Consumer group still active, waiting 10s (retry $retry_count/$max_retries)..."
sleep 10
fi
fi
else
log_warn "Kafka container '$KAFKA_CONTAINER' not found"
break
fi
done
log_step "Step 6/6: Starting 2.0 services..."
if [ "$reset_success" = false ]; then
log_warn "Could not reset offsets for $group after $max_retries attempts"
fi
done
log_step "Step 4/7: Dropping 2.0 databases..."
db_drop
log_step "Step 5/7: Creating 2.0 databases..."
db_create
log_step "Step 6/7: Running migrations..."
db_migrate
log_step "Step 7/7: Starting 2.0 services..."
for service in "${MINING_SERVICES[@]}"; do
service_start "$service"
done