From 83384ff198bd26403f58932e62f665300ed9b015 Mon Sep 17 00:00:00 2001 From: hailin Date: Sun, 4 Jan 2026 22:44:29 -0800 Subject: [PATCH] feat(scripts): add system snapshot backup and restore tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive Docker volume backup/restore script supporting: - PostgreSQL online logical backup (pg_dumpall) - Redis BGSAVE triggered backup - Kafka/Zookeeper volume backup - Multiple restore options (logical/physical/selective) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- scripts/tools/snapshot.sh | 692 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 692 insertions(+) create mode 100644 scripts/tools/snapshot.sh diff --git a/scripts/tools/snapshot.sh b/scripts/tools/snapshot.sh new file mode 100644 index 00000000..c52ca30c --- /dev/null +++ b/scripts/tools/snapshot.sh @@ -0,0 +1,692 @@ +#!/bin/bash +# +# RWA 系统数据快照工具 +# 功能:备份与恢复 Docker 数据卷(PostgreSQL、Redis、Kafka、Zookeeper) +# +# 使用方法: +# ./snapshot.sh backup # 创建完整备份 +# ./snapshot.sh restore <目录> # 从备份恢复 +# ./snapshot.sh list # 列出所有备份 +# ./snapshot.sh info <目录> # 查看备份详情 +# ./snapshot.sh clean [保留天数] # 清理旧备份(默认保留7天) +# +# 备份说明: +# - 备份时无需停止容器,但建议确保无用户操作(避免数据不一致) +# - PostgreSQL 使用 pg_dumpall 进行在线逻辑备份(事务一致性) +# - Redis 先触发 BGSAVE 确保数据持久化 +# - 数据卷以只读方式挂载进行备份 +# +# 恢复说明: +# - PostgreSQL 逻辑恢复(推荐):无需停止容器,直接导入 SQL +# - 数据卷物理恢复:需要先停止相关容器 +# + +set -e + +# ============================================ +# 配置 +# ============================================ +BACKUP_ROOT=~/docker-backups +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Services 容器名 +SERVICES_POSTGRES_CONTAINER="rwa-postgres" +SERVICES_REDIS_CONTAINER="rwa-redis" +SERVICES_KAFKA_CONTAINER="rwa-kafka" + +# MPC System 容器名 +MPC_POSTGRES_CONTAINER="mpc-postgres" + +# 需要备份的数据卷 +declare -a VOLUMES=( + "services_postgres_data:Services PostgreSQL 数据" + "services_redis_data:Services Redis 数据" + "services_kafka_data:Kafka 消息队列数据" + "services_zookeeper_data:Zookeeper 配置数据" + "services_zookeeper_log:Zookeeper 日志" + "mpc-system_postgres-data:MPC PostgreSQL 数据" +) + +# 颜色 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' + +# ============================================ +# 工具函数 +# ============================================ +log_info() { echo -e "${GREEN}[INFO]${NC} $1"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +log_error() { echo -e "${RED}[ERROR]${NC} $1"; } +log_step() { echo -e "${CYAN}[STEP]${NC} $1"; } + +print_header() { + echo "" + echo -e "${BLUE}============================================${NC}" + echo -e "${BLUE} $1${NC}" + echo -e "${BLUE}============================================${NC}" + echo "" +} + +check_docker() { + if ! command -v docker &> /dev/null; then + log_error "Docker 未安装或不在 PATH 中" + exit 1 + fi + if ! docker info &> /dev/null; then + log_error "Docker daemon 未运行" + exit 1 + fi +} + +confirm() { + local prompt="$1" + local default="${2:-n}" + + if [[ "$default" == "y" ]]; then + prompt="$prompt [Y/n]: " + else + prompt="$prompt [y/N]: " + fi + + read -p "$prompt" response + response=${response:-$default} + [[ "$response" =~ ^[Yy]$ ]] +} + +# ============================================ +# 备份函数 +# ============================================ +do_backup() { + local backup_dir="$BACKUP_ROOT/$TIMESTAMP" + local log_file="$backup_dir/backup.log" + + print_header "开始系统完整数据快照" + check_docker + + echo -e "${YELLOW}备份提示:${NC}" + echo " - 备份过程中无需停止容器" + echo " - 建议确保当前没有用户正在进行关键操作" + echo " - PostgreSQL 使用事务一致性备份,Redis 会触发 BGSAVE" + echo "" + + if ! confirm "确认开始备份?" "y"; then + log_info "已取消备份" + exit 0 + fi + + # 创建备份目录 + mkdir -p "$backup_dir" + echo "备份开始时间: $(date)" > "$log_file" + log_info "备份目录: $backup_dir" + + # ---------------------------------------- + # 1. PostgreSQL 逻辑备份 + # ---------------------------------------- + log_step "1/5 备份 PostgreSQL 数据库 (逻辑备份)" + + # Services PostgreSQL + if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then + log_info "导出 Services PostgreSQL..." + if docker exec "$SERVICES_POSTGRES_CONTAINER" pg_dumpall -U postgres > "$backup_dir/services_postgres_dump.sql" 2>>"$log_file"; then + log_info "✓ Services PostgreSQL 导出完成 ($(du -h "$backup_dir/services_postgres_dump.sql" | cut -f1))" + else + log_warn "Services PostgreSQL 导出失败" + fi + else + log_warn "容器 $SERVICES_POSTGRES_CONTAINER 未运行,跳过" + fi + + # MPC PostgreSQL + if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then + log_info "导出 MPC PostgreSQL..." + if docker exec "$MPC_POSTGRES_CONTAINER" pg_dumpall -U postgres > "$backup_dir/mpc_postgres_dump.sql" 2>>"$log_file"; then + log_info "✓ MPC PostgreSQL 导出完成 ($(du -h "$backup_dir/mpc_postgres_dump.sql" | cut -f1))" + else + log_warn "MPC PostgreSQL 导出失败" + fi + else + log_warn "容器 $MPC_POSTGRES_CONTAINER 未运行,跳过" + fi + + # ---------------------------------------- + # 2. Redis 持久化 + # ---------------------------------------- + log_step "2/5 触发 Redis 持久化" + + if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_REDIS_CONTAINER}$"; then + log_info "触发 Redis BGSAVE..." + docker exec "$SERVICES_REDIS_CONTAINER" redis-cli BGSAVE >>"$log_file" 2>&1 || true + sleep 2 + log_info "✓ Redis BGSAVE 已触发" + else + log_warn "容器 $SERVICES_REDIS_CONTAINER 未运行,跳过" + fi + + # ---------------------------------------- + # 3. 备份 Docker 数据卷 + # ---------------------------------------- + log_step "3/5 备份 Docker 数据卷" + + local success_count=0 + local skip_count=0 + + for entry in "${VOLUMES[@]}"; do + IFS=':' read -r volume desc <<< "$entry" + + if docker volume inspect "$volume" &>/dev/null; then + log_info "备份 $volume ($desc)..." + if docker run --rm \ + -v "$volume":/data:ro \ + -v "$backup_dir":/backup \ + alpine tar czf "/backup/${volume}.tar.gz" -C /data . 2>>"$log_file"; then + local size=$(du -h "$backup_dir/${volume}.tar.gz" | cut -f1) + log_info "✓ $volume 完成 ($size)" + ((success_count++)) + else + log_error "$volume 备份失败" + fi + else + log_warn "数据卷 $volume 不存在,跳过" + ((skip_count++)) + fi + done + + log_info "数据卷备份: ${success_count} 成功, ${skip_count} 跳过" + + # ---------------------------------------- + # 4. Kafka Topics 列表 + # ---------------------------------------- + log_step "4/5 保存 Kafka Topics 列表" + + if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_KAFKA_CONTAINER}$"; then + if docker exec "$SERVICES_KAFKA_CONTAINER" kafka-topics --bootstrap-server localhost:9092 --list > "$backup_dir/kafka_topics.txt" 2>>"$log_file"; then + local topic_count=$(wc -l < "$backup_dir/kafka_topics.txt") + log_info "✓ 保存了 $topic_count 个 topics" + else + log_warn "获取 Kafka topics 失败" + fi + else + log_warn "Kafka 容器未运行,跳过" + fi + + # ---------------------------------------- + # 5. 系统状态信息 + # ---------------------------------------- + log_step "5/5 保存系统状态信息" + + # 容器状态 + docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" > "$backup_dir/containers_status.txt" + log_info "✓ 容器状态已保存" + + # 数据卷列表 + docker volume ls > "$backup_dir/volumes_list.txt" + log_info "✓ 数据卷列表已保存" + + # Docker Compose 配置 + for compose_dir in ~/services ~/mpc-system; do + if [ -f "$compose_dir/docker-compose.yml" ]; then + cp "$compose_dir/docker-compose.yml" "$backup_dir/$(basename "$compose_dir")_docker-compose.yml" + log_info "✓ $(basename "$compose_dir")/docker-compose.yml 已备份" + fi + done + + # ---------------------------------------- + # 生成备份摘要 + # ---------------------------------------- + local total_size=$(du -sh "$backup_dir" | cut -f1) + + cat > "$backup_dir/BACKUP_INFO.txt" << EOF +=========================================== +RWA 系统数据快照 +=========================================== + +备份时间: $(date) +备份目录: $backup_dir +总大小: $total_size + +包含文件: +$(ls -lh "$backup_dir" | tail -n +2) + +------------------------------------------- +恢复命令: +------------------------------------------- + +# 完整恢复 +./snapshot.sh restore $backup_dir + +# 或手动恢复 PostgreSQL +docker exec -i $SERVICES_POSTGRES_CONTAINER psql -U postgres < $backup_dir/services_postgres_dump.sql +docker exec -i $MPC_POSTGRES_CONTAINER psql -U postgres < $backup_dir/mpc_postgres_dump.sql + +=========================================== +EOF + + echo "备份完成时间: $(date)" >> "$log_file" + + print_header "备份完成" + log_info "备份目录: $backup_dir" + log_info "总大小: $total_size" + echo "" + ls -lh "$backup_dir" +} + +# ============================================ +# 恢复函数 +# ============================================ +do_restore() { + local backup_dir="$1" + + if [ -z "$backup_dir" ]; then + log_error "请指定备份目录" + echo "用法: $0 restore <备份目录>" + echo "" + echo "可用备份:" + do_list + exit 1 + fi + + # 支持相对路径 + if [[ ! "$backup_dir" = /* ]]; then + backup_dir="$BACKUP_ROOT/$backup_dir" + fi + + if [ ! -d "$backup_dir" ]; then + log_error "备份目录不存在: $backup_dir" + exit 1 + fi + + print_header "从备份恢复系统数据" + check_docker + + log_info "备份目录: $backup_dir" + echo "" + + if [ -f "$backup_dir/BACKUP_INFO.txt" ]; then + cat "$backup_dir/BACKUP_INFO.txt" + echo "" + fi + + echo -e "${RED}警告: 恢复操作将覆盖现有数据!${NC}" + echo "" + + if ! confirm "确定要继续恢复吗?"; then + log_info "已取消恢复操作" + exit 0 + fi + + # ---------------------------------------- + # 选择恢复内容 + # ---------------------------------------- + echo "" + echo "请选择恢复内容:" + echo " 1) 完整恢复 (PostgreSQL + 所有数据卷)" + echo " 2) 仅恢复 PostgreSQL (逻辑恢复,推荐)" + echo " 3) 仅恢复数据卷 (物理恢复)" + echo " 4) 选择性恢复" + echo "" + read -p "请选择 [1-4]: " choice + + case $choice in + 1) restore_all "$backup_dir" ;; + 2) restore_postgres "$backup_dir" ;; + 3) restore_volumes "$backup_dir" ;; + 4) restore_selective "$backup_dir" ;; + *) log_error "无效选择"; exit 1 ;; + esac + + print_header "恢复完成" +} + +restore_postgres() { + local backup_dir="$1" + + log_step "恢复 PostgreSQL 数据库" + + # Services PostgreSQL + if [ -f "$backup_dir/services_postgres_dump.sql" ]; then + if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then + log_info "恢复 Services PostgreSQL..." + if docker exec -i "$SERVICES_POSTGRES_CONTAINER" psql -U postgres < "$backup_dir/services_postgres_dump.sql" 2>/dev/null; then + log_info "✓ Services PostgreSQL 恢复完成" + else + log_warn "Services PostgreSQL 恢复可能有警告(通常是正常的)" + fi + else + log_warn "容器 $SERVICES_POSTGRES_CONTAINER 未运行" + fi + else + log_warn "未找到 services_postgres_dump.sql" + fi + + # MPC PostgreSQL + if [ -f "$backup_dir/mpc_postgres_dump.sql" ]; then + if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then + log_info "恢复 MPC PostgreSQL..." + if docker exec -i "$MPC_POSTGRES_CONTAINER" psql -U postgres < "$backup_dir/mpc_postgres_dump.sql" 2>/dev/null; then + log_info "✓ MPC PostgreSQL 恢复完成" + else + log_warn "MPC PostgreSQL 恢复可能有警告(通常是正常的)" + fi + else + log_warn "容器 $MPC_POSTGRES_CONTAINER 未运行" + fi + else + log_warn "未找到 mpc_postgres_dump.sql" + fi +} + +restore_volumes() { + local backup_dir="$1" + + log_step "恢复 Docker 数据卷" + + echo "" + echo -e "${RED}警告: 恢复数据卷需要先停止相关容器!${NC}" + echo -e "${YELLOW}提示: 如果只是恢复 PostgreSQL 数据,建议使用选项 2 (逻辑恢复),无需停止容器${NC}" + echo "" + + if ! confirm "是否已停止所有相关容器?"; then + log_info "请先停止容器,然后重新运行恢复" + echo "" + echo "停止命令:" + echo " cd ~/services && docker compose down" + echo " cd ~/mpc-system && docker compose down" + exit 1 + fi + + for entry in "${VOLUMES[@]}"; do + IFS=':' read -r volume desc <<< "$entry" + local archive="$backup_dir/${volume}.tar.gz" + + if [ -f "$archive" ]; then + log_info "恢复 $volume ($desc)..." + + # 检查数据卷是否存在,不存在则创建 + if ! docker volume inspect "$volume" &>/dev/null; then + docker volume create "$volume" >/dev/null + log_info "创建数据卷 $volume" + fi + + # 恢复数据 + if docker run --rm \ + -v "$volume":/data \ + -v "$backup_dir":/backup:ro \ + alpine sh -c "rm -rf /data/* /data/..?* /data/.[!.]* 2>/dev/null; tar xzf /backup/${volume}.tar.gz -C /data"; then + log_info "✓ $volume 恢复完成" + else + log_error "$volume 恢复失败" + fi + else + log_warn "未找到 $archive,跳过" + fi + done +} + +restore_all() { + local backup_dir="$1" + + # 先恢复数据卷 + restore_volumes "$backup_dir" + + echo "" + log_info "请先启动容器,然后继续恢复 PostgreSQL" + echo "" + echo "启动命令:" + echo " cd ~/services && docker compose up -d" + echo " cd ~/mpc-system && docker compose up -d" + echo "" + + if confirm "容器已启动,继续恢复 PostgreSQL?"; then + restore_postgres "$backup_dir" + fi +} + +restore_selective() { + local backup_dir="$1" + + echo "" + echo "可恢复的内容:" + echo "" + + local items=() + local i=1 + + if [ -f "$backup_dir/services_postgres_dump.sql" ]; then + echo " $i) Services PostgreSQL (services_postgres_dump.sql)" + items+=("services_postgres") + ((i++)) + fi + + if [ -f "$backup_dir/mpc_postgres_dump.sql" ]; then + echo " $i) MPC PostgreSQL (mpc_postgres_dump.sql)" + items+=("mpc_postgres") + ((i++)) + fi + + for entry in "${VOLUMES[@]}"; do + IFS=':' read -r volume desc <<< "$entry" + if [ -f "$backup_dir/${volume}.tar.gz" ]; then + echo " $i) $desc (${volume}.tar.gz)" + items+=("$volume") + ((i++)) + fi + done + + echo "" + read -p "请输入要恢复的序号(逗号分隔,如 1,3,4): " selection + + IFS=',' read -ra selected <<< "$selection" + + for idx in "${selected[@]}"; do + idx=$((idx - 1)) + if [ $idx -ge 0 ] && [ $idx -lt ${#items[@]} ]; then + local item="${items[$idx]}" + + case "$item" in + "services_postgres") + if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then + log_info "恢复 Services PostgreSQL..." + docker exec -i "$SERVICES_POSTGRES_CONTAINER" psql -U postgres < "$backup_dir/services_postgres_dump.sql" 2>/dev/null || true + log_info "✓ Services PostgreSQL 恢复完成" + else + log_warn "容器未运行" + fi + ;; + "mpc_postgres") + if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then + log_info "恢复 MPC PostgreSQL..." + docker exec -i "$MPC_POSTGRES_CONTAINER" psql -U postgres < "$backup_dir/mpc_postgres_dump.sql" 2>/dev/null || true + log_info "✓ MPC PostgreSQL 恢复完成" + else + log_warn "容器未运行" + fi + ;; + *) + # 数据卷恢复 + log_info "恢复数据卷 $item..." + if ! docker volume inspect "$item" &>/dev/null; then + docker volume create "$item" >/dev/null + fi + docker run --rm \ + -v "$item":/data \ + -v "$backup_dir":/backup:ro \ + alpine sh -c "rm -rf /data/* /data/..?* /data/.[!.]* 2>/dev/null; tar xzf /backup/${item}.tar.gz -C /data" + log_info "✓ $item 恢复完成" + ;; + esac + fi + done +} + +# ============================================ +# 列表函数 +# ============================================ +do_list() { + print_header "可用备份列表" + + if [ ! -d "$BACKUP_ROOT" ]; then + log_info "暂无备份" + return + fi + + local backups=$(ls -d "$BACKUP_ROOT"/*/ 2>/dev/null | sort -r) + + if [ -z "$backups" ]; then + log_info "暂无备份" + return + fi + + printf "%-20s %-10s %-20s\n" "目录名" "大小" "时间" + printf "%-20s %-10s %-20s\n" "--------------------" "----------" "--------------------" + + for dir in $backups; do + local name=$(basename "$dir") + local size=$(du -sh "$dir" 2>/dev/null | cut -f1) + local date_str="" + + # 解析目录名中的时间戳 + if [[ "$name" =~ ^([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})([0-9]{2})$ ]]; then + date_str="${BASH_REMATCH[1]}-${BASH_REMATCH[2]}-${BASH_REMATCH[3]} ${BASH_REMATCH[4]}:${BASH_REMATCH[5]}:${BASH_REMATCH[6]}" + fi + + printf "%-20s %-10s %-20s\n" "$name" "$size" "$date_str" + done +} + +# ============================================ +# 详情函数 +# ============================================ +do_info() { + local backup_dir="$1" + + if [ -z "$backup_dir" ]; then + log_error "请指定备份目录" + echo "用法: $0 info <备份目录>" + exit 1 + fi + + if [[ ! "$backup_dir" = /* ]]; then + backup_dir="$BACKUP_ROOT/$backup_dir" + fi + + if [ ! -d "$backup_dir" ]; then + log_error "备份目录不存在: $backup_dir" + exit 1 + fi + + print_header "备份详情" + + if [ -f "$backup_dir/BACKUP_INFO.txt" ]; then + cat "$backup_dir/BACKUP_INFO.txt" + else + echo "目录: $backup_dir" + echo "" + ls -lh "$backup_dir" + fi +} + +# ============================================ +# 清理函数 +# ============================================ +do_clean() { + local keep_days="${1:-7}" + + print_header "清理旧备份" + + if [ ! -d "$BACKUP_ROOT" ]; then + log_info "备份目录不存在" + return + fi + + log_info "保留最近 $keep_days 天的备份" + echo "" + + local to_delete=$(find "$BACKUP_ROOT" -maxdepth 1 -type d -mtime +"$keep_days" 2>/dev/null | grep -v "^$BACKUP_ROOT$") + + if [ -z "$to_delete" ]; then + log_info "没有需要清理的备份" + return + fi + + echo "将删除以下备份:" + for dir in $to_delete; do + local size=$(du -sh "$dir" | cut -f1) + echo " - $(basename "$dir") ($size)" + done + echo "" + + if confirm "确认删除?"; then + for dir in $to_delete; do + rm -rf "$dir" + log_info "已删除: $(basename "$dir")" + done + log_info "清理完成" + else + log_info "已取消" + fi +} + +# ============================================ +# 帮助信息 +# ============================================ +show_help() { + cat << EOF +RWA 系统数据快照工具 + +用法: + $0 <命令> [参数] + +命令: + backup 创建完整系统备份 + restore <目录> 从指定备份恢复 + list 列出所有可用备份 + info <目录> 查看备份详情 + clean [天数] 清理旧备份(默认保留7天) + help 显示此帮助信息 + +示例: + $0 backup # 创建新备份 + $0 restore 20260104_153022 # 恢复指定备份 + $0 list # 查看所有备份 + $0 clean 14 # 保留14天内的备份 + +备份内容: + - PostgreSQL 数据库 (逻辑备份 + 数据卷) + - Redis 数据 + - Kafka 消息队列数据 + - Zookeeper 配置数据 + - 系统状态信息 + +备份位置: $BACKUP_ROOT + +EOF +} + +# ============================================ +# 主入口 +# ============================================ +main() { + local command="${1:-help}" + shift || true + + case "$command" in + backup) do_backup ;; + restore) do_restore "$@" ;; + list) do_list ;; + info) do_info "$@" ;; + clean) do_clean "$@" ;; + help|--help|-h) show_help ;; + *) + log_error "未知命令: $command" + echo "" + show_help + exit 1 + ;; + esac +} + +main "$@"