From 24412794e60001782e640c739f199be8d34f943d Mon Sep 17 00:00:00 2001 From: hailin Date: Mon, 12 Jan 2026 00:33:19 -0800 Subject: [PATCH] =?UTF-8?q?fix(deploy-mining):=20=E4=BF=AE=E6=AD=A3=20full?= =?UTF-8?q?-reset=20=E6=AD=A5=E9=AA=A4=E9=A1=BA=E5=BA=8F=E9=81=BF=E5=85=8D?= =?UTF-8?q?=20CDC=20offset=20=E9=87=8D=E7=BD=AE=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 migration 之前重置 CDC offsets(因为 migration 会启动容器) - 停止服务后等待 15 秒让 Kafka consumer 变成 inactive - 添加重试机制,最多重试 3 次,每次间隔 10 秒 - 步骤从 6 步改为 7 步 Co-Authored-By: Claude Opus 4.5 --- backend/services/deploy-mining.sh | 75 ++++++++++++++++++------------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/backend/services/deploy-mining.sh b/backend/services/deploy-mining.sh index e6d75868..23ae9b18 100755 --- a/backend/services/deploy-mining.sh +++ b/backend/services/deploy-mining.sh @@ -724,49 +724,62 @@ full_reset() { fi echo "" - log_step "Step 1/6: Stopping 2.0 services..." + log_step "Step 1/7: Stopping 2.0 services..." for service in "${MINING_SERVICES[@]}"; do service_stop "$service" done - log_step "Step 2/6: Dropping 2.0 databases..." - db_drop + log_step "Step 2/7: Waiting for Kafka consumers to become inactive..." + log_info "Waiting 15 seconds for consumer group session timeout..." + sleep 15 - log_step "Step 3/6: Creating 2.0 databases..." - db_create - - log_step "Step 4/6: Running migrations..." - db_migrate - - log_step "Step 5/6: Resetting CDC consumer offsets..." - # Reset all CDC consumer groups + log_step "Step 3/7: Resetting CDC consumer offsets..." + # Reset offsets BEFORE migrations (which may start containers) for group in "${CDC_CONSUMER_GROUPS[@]}"; do log_info "Resetting consumer group: $group" + local reset_success=false + local retry_count=0 + local max_retries=3 - # Try local kafka-consumer-groups.sh first - if command -v kafka-consumer-groups.sh &>/dev/null; then - kafka-consumer-groups.sh --bootstrap-server "$KAFKA_BROKERS" \ - --group "$group" \ - --reset-offsets \ - --to-earliest \ - --all-topics \ - --execute 2>/dev/null && log_success "CDC offsets reset for $group" && continue - fi + while [ "$reset_success" = false ] && [ $retry_count -lt $max_retries ]; do + # Try docker exec with the correct container name + if docker ps --format '{{.Names}}' 2>/dev/null | grep -q "^${KAFKA_CONTAINER}$"; then + if docker exec "$KAFKA_CONTAINER" kafka-consumer-groups --bootstrap-server localhost:9092 \ + --group "$group" \ + --reset-offsets \ + --to-earliest \ + --all-topics \ + --execute 2>&1 | grep -q "NEW-OFFSET"; then + log_success "CDC offsets reset for $group" + reset_success=true + else + retry_count=$((retry_count + 1)) + if [ $retry_count -lt $max_retries ]; then + log_warn "Consumer group still active, waiting 10s (retry $retry_count/$max_retries)..." + sleep 10 + fi + fi + else + log_warn "Kafka container '$KAFKA_CONTAINER' not found" + break + fi + done - # Try docker exec with the correct container name - if docker ps --format '{{.Names}}' 2>/dev/null | grep -q "^${KAFKA_CONTAINER}$"; then - docker exec "$KAFKA_CONTAINER" kafka-consumer-groups --bootstrap-server localhost:9092 \ - --group "$group" \ - --reset-offsets \ - --to-earliest \ - --all-topics \ - --execute 2>&1 && log_success "CDC offsets reset for $group" || log_warn "Could not reset offsets for $group" - else - log_warn "Kafka container '$KAFKA_CONTAINER' not found. Manual offset reset may be needed." + if [ "$reset_success" = false ]; then + log_warn "Could not reset offsets for $group after $max_retries attempts" fi done - log_step "Step 6/6: Starting 2.0 services..." + log_step "Step 4/7: Dropping 2.0 databases..." + db_drop + + log_step "Step 5/7: Creating 2.0 databases..." + db_create + + log_step "Step 6/7: Running migrations..." + db_migrate + + log_step "Step 7/7: Starting 2.0 services..." for service in "${MINING_SERVICES[@]}"; do service_start "$service" done