#!/bin/bash # # RWA 系统数据快照工具 # 功能:备份与恢复 Docker 数据卷(PostgreSQL、Redis、Kafka、Zookeeper) # # 使用方法: # ./snapshot.sh backup # 创建完整备份 # ./snapshot.sh restore <目录> # 从备份恢复 # ./snapshot.sh list # 列出所有备份 # ./snapshot.sh info <目录> # 查看备份详情 # ./snapshot.sh clean [保留天数] # 清理旧备份(默认保留7天) # # 备份说明: # - 备份时无需停止容器,但建议确保无用户操作(避免数据不一致) # - PostgreSQL 使用 pg_dumpall 进行在线逻辑备份(事务一致性) # - Redis 先触发 BGSAVE 确保数据持久化 # - 数据卷以只读方式挂载进行备份 # # 恢复说明: # - PostgreSQL 逻辑恢复(推荐):无需停止容器,直接导入 SQL # - 数据卷物理恢复:需要先停止相关容器 # set -e # ============================================ # 配置 # ============================================ BACKUP_ROOT=~/docker-backups TIMESTAMP=$(date +%Y%m%d_%H%M%S) # Services 容器名 SERVICES_POSTGRES_CONTAINER="rwa-postgres" SERVICES_REDIS_CONTAINER="rwa-redis" SERVICES_KAFKA_CONTAINER="rwa-kafka" # MPC System 容器名 MPC_POSTGRES_CONTAINER="mpc-postgres" # 需要备份的数据卷 declare -a VOLUMES=( "services_postgres_data:Services PostgreSQL 数据" "services_redis_data:Services Redis 数据" "services_kafka_data:Kafka 消息队列数据" "services_zookeeper_data:Zookeeper 配置数据" "services_zookeeper_log:Zookeeper 日志" "mpc-system_postgres-data:MPC PostgreSQL 数据" ) # 颜色 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' # ============================================ # 工具函数 # ============================================ log_info() { echo -e "${GREEN}[INFO]${NC} $1"; } log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } log_error() { echo -e "${RED}[ERROR]${NC} $1"; } log_step() { echo -e "${CYAN}[STEP]${NC} $1"; } print_header() { echo "" echo -e "${BLUE}============================================${NC}" echo -e "${BLUE} $1${NC}" echo -e "${BLUE}============================================${NC}" echo "" } check_docker() { if ! command -v docker &> /dev/null; then log_error "Docker 未安装或不在 PATH 中" exit 1 fi if ! docker info &> /dev/null; then log_error "Docker daemon 未运行" exit 1 fi } confirm() { local prompt="$1" local default="${2:-n}" if [[ "$default" == "y" ]]; then prompt="$prompt [Y/n]: " else prompt="$prompt [y/N]: " fi read -p "$prompt" response response=${response:-$default} [[ "$response" =~ ^[Yy]$ ]] } # ============================================ # 备份函数 # ============================================ do_backup() { local backup_dir="$BACKUP_ROOT/$TIMESTAMP" local log_file="$backup_dir/backup.log" print_header "开始系统完整数据快照" check_docker echo -e "${YELLOW}备份提示:${NC}" echo " - 备份过程中无需停止容器" echo " - 建议确保当前没有用户正在进行关键操作" echo " - PostgreSQL 使用事务一致性备份,Redis 会触发 BGSAVE" echo "" if ! confirm "确认开始备份?" "y"; then log_info "已取消备份" exit 0 fi # 创建备份目录 mkdir -p "$backup_dir" echo "备份开始时间: $(date)" > "$log_file" log_info "备份目录: $backup_dir" # ---------------------------------------- # 1. PostgreSQL 逻辑备份 # ---------------------------------------- log_step "1/5 备份 PostgreSQL 数据库 (逻辑备份)" # Services PostgreSQL if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then log_info "导出 Services PostgreSQL..." if docker exec "$SERVICES_POSTGRES_CONTAINER" pg_dumpall -U rwa_user > "$backup_dir/services_postgres_dump.sql" 2>>"$log_file"; then log_info "✓ Services PostgreSQL 导出完成 ($(du -h "$backup_dir/services_postgres_dump.sql" | cut -f1))" else log_warn "Services PostgreSQL 导出失败" fi else log_warn "容器 $SERVICES_POSTGRES_CONTAINER 未运行,跳过" fi # MPC PostgreSQL if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then log_info "导出 MPC PostgreSQL..." if docker exec "$MPC_POSTGRES_CONTAINER" pg_dumpall -U postgres > "$backup_dir/mpc_postgres_dump.sql" 2>>"$log_file"; then log_info "✓ MPC PostgreSQL 导出完成 ($(du -h "$backup_dir/mpc_postgres_dump.sql" | cut -f1))" else log_warn "MPC PostgreSQL 导出失败" fi else log_warn "容器 $MPC_POSTGRES_CONTAINER 未运行,跳过" fi # ---------------------------------------- # 2. Redis 持久化 # ---------------------------------------- log_step "2/5 触发 Redis 持久化" if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_REDIS_CONTAINER}$"; then log_info "触发 Redis BGSAVE..." docker exec "$SERVICES_REDIS_CONTAINER" redis-cli BGSAVE >>"$log_file" 2>&1 || true sleep 2 log_info "✓ Redis BGSAVE 已触发" else log_warn "容器 $SERVICES_REDIS_CONTAINER 未运行,跳过" fi # ---------------------------------------- # 3. 备份 Docker 数据卷 # ---------------------------------------- log_step "3/5 备份 Docker 数据卷" local success_count=0 local skip_count=0 for entry in "${VOLUMES[@]}"; do IFS=':' read -r volume desc <<< "$entry" if docker volume inspect "$volume" &>/dev/null; then log_info "备份 $volume ($desc)..." if docker run --rm \ -v "$volume":/data:ro \ -v "$backup_dir":/backup \ alpine tar czf "/backup/${volume}.tar.gz" -C /data . 2>>"$log_file"; then local size=$(du -h "$backup_dir/${volume}.tar.gz" | cut -f1) log_info "✓ $volume 完成 ($size)" ((success_count++)) else log_error "$volume 备份失败" fi else log_warn "数据卷 $volume 不存在,跳过" ((skip_count++)) fi done log_info "数据卷备份: ${success_count} 成功, ${skip_count} 跳过" # ---------------------------------------- # 4. Kafka Topics 列表 # ---------------------------------------- log_step "4/5 保存 Kafka Topics 列表" if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_KAFKA_CONTAINER}$"; then if docker exec "$SERVICES_KAFKA_CONTAINER" kafka-topics --bootstrap-server localhost:9092 --list > "$backup_dir/kafka_topics.txt" 2>>"$log_file"; then local topic_count=$(wc -l < "$backup_dir/kafka_topics.txt") log_info "✓ 保存了 $topic_count 个 topics" else log_warn "获取 Kafka topics 失败" fi else log_warn "Kafka 容器未运行,跳过" fi # ---------------------------------------- # 5. 系统状态信息 # ---------------------------------------- log_step "5/5 保存系统状态信息" # 容器状态 docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" > "$backup_dir/containers_status.txt" log_info "✓ 容器状态已保存" # 数据卷列表 docker volume ls > "$backup_dir/volumes_list.txt" log_info "✓ 数据卷列表已保存" # Docker Compose 配置 for compose_dir in ~/services ~/mpc-system; do if [ -f "$compose_dir/docker-compose.yml" ]; then cp "$compose_dir/docker-compose.yml" "$backup_dir/$(basename "$compose_dir")_docker-compose.yml" log_info "✓ $(basename "$compose_dir")/docker-compose.yml 已备份" fi done # ---------------------------------------- # 生成备份摘要 # ---------------------------------------- local total_size=$(du -sh "$backup_dir" | cut -f1) cat > "$backup_dir/BACKUP_INFO.txt" << EOF =========================================== RWA 系统数据快照 =========================================== 备份时间: $(date) 备份目录: $backup_dir 总大小: $total_size 包含文件: $(ls -lh "$backup_dir" | tail -n +2) ------------------------------------------- 恢复命令: ------------------------------------------- # 完整恢复 ./snapshot.sh restore $backup_dir # 或手动恢复 PostgreSQL docker exec -i $SERVICES_POSTGRES_CONTAINER psql -U rwa_user < $backup_dir/services_postgres_dump.sql docker exec -i $MPC_POSTGRES_CONTAINER psql -U postgres < $backup_dir/mpc_postgres_dump.sql =========================================== EOF echo "备份完成时间: $(date)" >> "$log_file" print_header "备份完成" log_info "备份目录: $backup_dir" log_info "总大小: $total_size" echo "" ls -lh "$backup_dir" } # ============================================ # 恢复函数 # ============================================ do_restore() { local backup_dir="$1" if [ -z "$backup_dir" ]; then log_error "请指定备份目录" echo "用法: $0 restore <备份目录>" echo "" echo "可用备份:" do_list exit 1 fi # 支持相对路径 if [[ ! "$backup_dir" = /* ]]; then backup_dir="$BACKUP_ROOT/$backup_dir" fi if [ ! -d "$backup_dir" ]; then log_error "备份目录不存在: $backup_dir" exit 1 fi print_header "从备份恢复系统数据" check_docker log_info "备份目录: $backup_dir" echo "" if [ -f "$backup_dir/BACKUP_INFO.txt" ]; then cat "$backup_dir/BACKUP_INFO.txt" echo "" fi echo -e "${RED}警告: 恢复操作将覆盖现有数据!${NC}" echo "" if ! confirm "确定要继续恢复吗?"; then log_info "已取消恢复操作" exit 0 fi # ---------------------------------------- # 选择恢复内容 # ---------------------------------------- echo "" echo "请选择恢复内容:" echo " 1) 完整恢复 (PostgreSQL + 所有数据卷)" echo " 2) 仅恢复 PostgreSQL (逻辑恢复,推荐)" echo " 3) 仅恢复数据卷 (物理恢复)" echo " 4) 选择性恢复" echo "" read -p "请选择 [1-4]: " choice case $choice in 1) restore_all "$backup_dir" ;; 2) restore_postgres "$backup_dir" ;; 3) restore_volumes "$backup_dir" ;; 4) restore_selective "$backup_dir" ;; *) log_error "无效选择"; exit 1 ;; esac print_header "恢复完成" } restore_postgres() { local backup_dir="$1" log_step "恢复 PostgreSQL 数据库" # Services PostgreSQL if [ -f "$backup_dir/services_postgres_dump.sql" ]; then if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then log_info "恢复 Services PostgreSQL..." if docker exec -i "$SERVICES_POSTGRES_CONTAINER" psql -U rwa_user < "$backup_dir/services_postgres_dump.sql" 2>/dev/null; then log_info "✓ Services PostgreSQL 恢复完成" else log_warn "Services PostgreSQL 恢复可能有警告(通常是正常的)" fi else log_warn "容器 $SERVICES_POSTGRES_CONTAINER 未运行" fi else log_warn "未找到 services_postgres_dump.sql" fi # MPC PostgreSQL if [ -f "$backup_dir/mpc_postgres_dump.sql" ]; then if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then log_info "恢复 MPC PostgreSQL..." if docker exec -i "$MPC_POSTGRES_CONTAINER" psql -U postgres < "$backup_dir/mpc_postgres_dump.sql" 2>/dev/null; then log_info "✓ MPC PostgreSQL 恢复完成" else log_warn "MPC PostgreSQL 恢复可能有警告(通常是正常的)" fi else log_warn "容器 $MPC_POSTGRES_CONTAINER 未运行" fi else log_warn "未找到 mpc_postgres_dump.sql" fi } restore_volumes() { local backup_dir="$1" log_step "恢复 Docker 数据卷" echo "" echo -e "${RED}警告: 恢复数据卷需要先停止相关容器!${NC}" echo -e "${YELLOW}提示: 如果只是恢复 PostgreSQL 数据,建议使用选项 2 (逻辑恢复),无需停止容器${NC}" echo "" if ! confirm "是否已停止所有相关容器?"; then log_info "请先停止容器,然后重新运行恢复" echo "" echo "停止命令:" echo " cd ~/services && docker compose down" echo " cd ~/mpc-system && docker compose down" exit 1 fi for entry in "${VOLUMES[@]}"; do IFS=':' read -r volume desc <<< "$entry" local archive="$backup_dir/${volume}.tar.gz" if [ -f "$archive" ]; then log_info "恢复 $volume ($desc)..." # 检查数据卷是否存在,不存在则创建 if ! docker volume inspect "$volume" &>/dev/null; then docker volume create "$volume" >/dev/null log_info "创建数据卷 $volume" fi # 恢复数据 if docker run --rm \ -v "$volume":/data \ -v "$backup_dir":/backup:ro \ alpine sh -c "rm -rf /data/* /data/..?* /data/.[!.]* 2>/dev/null; tar xzf /backup/${volume}.tar.gz -C /data"; then log_info "✓ $volume 恢复完成" else log_error "$volume 恢复失败" fi else log_warn "未找到 $archive,跳过" fi done } restore_all() { local backup_dir="$1" # 先恢复数据卷 restore_volumes "$backup_dir" echo "" log_info "请先启动容器,然后继续恢复 PostgreSQL" echo "" echo "启动命令:" echo " cd ~/services && docker compose up -d" echo " cd ~/mpc-system && docker compose up -d" echo "" if confirm "容器已启动,继续恢复 PostgreSQL?"; then restore_postgres "$backup_dir" fi } restore_selective() { local backup_dir="$1" echo "" echo "可恢复的内容:" echo "" local items=() local i=1 if [ -f "$backup_dir/services_postgres_dump.sql" ]; then echo " $i) Services PostgreSQL (services_postgres_dump.sql)" items+=("services_postgres") ((i++)) fi if [ -f "$backup_dir/mpc_postgres_dump.sql" ]; then echo " $i) MPC PostgreSQL (mpc_postgres_dump.sql)" items+=("mpc_postgres") ((i++)) fi for entry in "${VOLUMES[@]}"; do IFS=':' read -r volume desc <<< "$entry" if [ -f "$backup_dir/${volume}.tar.gz" ]; then echo " $i) $desc (${volume}.tar.gz)" items+=("$volume") ((i++)) fi done echo "" read -p "请输入要恢复的序号(逗号分隔,如 1,3,4): " selection IFS=',' read -ra selected <<< "$selection" for idx in "${selected[@]}"; do idx=$((idx - 1)) if [ $idx -ge 0 ] && [ $idx -lt ${#items[@]} ]; then local item="${items[$idx]}" case "$item" in "services_postgres") if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then log_info "恢复 Services PostgreSQL..." docker exec -i "$SERVICES_POSTGRES_CONTAINER" psql -U rwa_user < "$backup_dir/services_postgres_dump.sql" 2>/dev/null || true log_info "✓ Services PostgreSQL 恢复完成" else log_warn "容器未运行" fi ;; "mpc_postgres") if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then log_info "恢复 MPC PostgreSQL..." docker exec -i "$MPC_POSTGRES_CONTAINER" psql -U postgres < "$backup_dir/mpc_postgres_dump.sql" 2>/dev/null || true log_info "✓ MPC PostgreSQL 恢复完成" else log_warn "容器未运行" fi ;; *) # 数据卷恢复 log_info "恢复数据卷 $item..." if ! docker volume inspect "$item" &>/dev/null; then docker volume create "$item" >/dev/null fi docker run --rm \ -v "$item":/data \ -v "$backup_dir":/backup:ro \ alpine sh -c "rm -rf /data/* /data/..?* /data/.[!.]* 2>/dev/null; tar xzf /backup/${item}.tar.gz -C /data" log_info "✓ $item 恢复完成" ;; esac fi done } # ============================================ # 列表函数 # ============================================ do_list() { print_header "可用备份列表" if [ ! -d "$BACKUP_ROOT" ]; then log_info "暂无备份" return fi local backups=$(ls -d "$BACKUP_ROOT"/*/ 2>/dev/null | sort -r) if [ -z "$backups" ]; then log_info "暂无备份" return fi printf "%-20s %-10s %-20s\n" "目录名" "大小" "时间" printf "%-20s %-10s %-20s\n" "--------------------" "----------" "--------------------" for dir in $backups; do local name=$(basename "$dir") local size=$(du -sh "$dir" 2>/dev/null | cut -f1) local date_str="" # 解析目录名中的时间戳 if [[ "$name" =~ ^([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})([0-9]{2})$ ]]; then date_str="${BASH_REMATCH[1]}-${BASH_REMATCH[2]}-${BASH_REMATCH[3]} ${BASH_REMATCH[4]}:${BASH_REMATCH[5]}:${BASH_REMATCH[6]}" fi printf "%-20s %-10s %-20s\n" "$name" "$size" "$date_str" done } # ============================================ # 详情函数 # ============================================ do_info() { local backup_dir="$1" if [ -z "$backup_dir" ]; then log_error "请指定备份目录" echo "用法: $0 info <备份目录>" exit 1 fi if [[ ! "$backup_dir" = /* ]]; then backup_dir="$BACKUP_ROOT/$backup_dir" fi if [ ! -d "$backup_dir" ]; then log_error "备份目录不存在: $backup_dir" exit 1 fi print_header "备份详情" if [ -f "$backup_dir/BACKUP_INFO.txt" ]; then cat "$backup_dir/BACKUP_INFO.txt" else echo "目录: $backup_dir" echo "" ls -lh "$backup_dir" fi } # ============================================ # 清理函数 # ============================================ do_clean() { local keep_days="${1:-7}" print_header "清理旧备份" if [ ! -d "$BACKUP_ROOT" ]; then log_info "备份目录不存在" return fi log_info "保留最近 $keep_days 天的备份" echo "" local to_delete=$(find "$BACKUP_ROOT" -maxdepth 1 -type d -mtime +"$keep_days" 2>/dev/null | grep -v "^$BACKUP_ROOT$") if [ -z "$to_delete" ]; then log_info "没有需要清理的备份" return fi echo "将删除以下备份:" for dir in $to_delete; do local size=$(du -sh "$dir" | cut -f1) echo " - $(basename "$dir") ($size)" done echo "" if confirm "确认删除?"; then for dir in $to_delete; do rm -rf "$dir" log_info "已删除: $(basename "$dir")" done log_info "清理完成" else log_info "已取消" fi } # ============================================ # 帮助信息 # ============================================ show_help() { cat << EOF RWA 系统数据快照工具 用法: $0 <命令> [参数] 命令: backup 创建完整系统备份 restore <目录> 从指定备份恢复 list 列出所有可用备份 info <目录> 查看备份详情 clean [天数] 清理旧备份(默认保留7天) help 显示此帮助信息 示例: $0 backup # 创建新备份 $0 restore 20260104_153022 # 恢复指定备份 $0 list # 查看所有备份 $0 clean 14 # 保留14天内的备份 备份内容: - PostgreSQL 数据库 (逻辑备份 + 数据卷) - Redis 数据 - Kafka 消息队列数据 - Zookeeper 配置数据 - 系统状态信息 备份位置: $BACKUP_ROOT EOF } # ============================================ # 主入口 # ============================================ main() { local command="${1:-help}" shift || true case "$command" in backup) do_backup ;; restore) do_restore "$@" ;; list) do_list ;; info) do_info "$@" ;; clean) do_clean "$@" ;; help|--help|-h) show_help ;; *) log_error "未知命令: $command" echo "" show_help exit 1 ;; esac } main "$@"