rwadurian/scripts/tools/snapshot.sh

693 lines
21 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
#
# RWA 系统数据快照工具
# 功能:备份与恢复 Docker 数据卷PostgreSQL、Redis、Kafka、Zookeeper
#
# 使用方法:
# ./snapshot.sh backup # 创建完整备份
# ./snapshot.sh restore <目录> # 从备份恢复
# ./snapshot.sh list # 列出所有备份
# ./snapshot.sh info <目录> # 查看备份详情
# ./snapshot.sh clean [保留天数] # 清理旧备份默认保留7天
#
# 备份说明:
# - 备份时无需停止容器,但建议确保无用户操作(避免数据不一致)
# - PostgreSQL 使用 pg_dumpall 进行在线逻辑备份(事务一致性)
# - Redis 先触发 BGSAVE 确保数据持久化
# - 数据卷以只读方式挂载进行备份
#
# 恢复说明:
# - PostgreSQL 逻辑恢复(推荐):无需停止容器,直接导入 SQL
# - 数据卷物理恢复:需要先停止相关容器
#
set -e
# ============================================
# 配置
# ============================================
BACKUP_ROOT=~/docker-backups
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
# Services 容器名
SERVICES_POSTGRES_CONTAINER="rwa-postgres"
SERVICES_REDIS_CONTAINER="rwa-redis"
SERVICES_KAFKA_CONTAINER="rwa-kafka"
# MPC System 容器名
MPC_POSTGRES_CONTAINER="mpc-postgres"
# 需要备份的数据卷
declare -a VOLUMES=(
"services_postgres_data:Services PostgreSQL 数据"
"services_redis_data:Services Redis 数据"
"services_kafka_data:Kafka 消息队列数据"
"services_zookeeper_data:Zookeeper 配置数据"
"services_zookeeper_log:Zookeeper 日志"
"mpc-system_postgres-data:MPC PostgreSQL 数据"
)
# 颜色
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
# ============================================
# 工具函数
# ============================================
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_step() { echo -e "${CYAN}[STEP]${NC} $1"; }
print_header() {
echo ""
echo -e "${BLUE}============================================${NC}"
echo -e "${BLUE} $1${NC}"
echo -e "${BLUE}============================================${NC}"
echo ""
}
check_docker() {
if ! command -v docker &> /dev/null; then
log_error "Docker 未安装或不在 PATH 中"
exit 1
fi
if ! docker info &> /dev/null; then
log_error "Docker daemon 未运行"
exit 1
fi
}
confirm() {
local prompt="$1"
local default="${2:-n}"
if [[ "$default" == "y" ]]; then
prompt="$prompt [Y/n]: "
else
prompt="$prompt [y/N]: "
fi
read -p "$prompt" response
response=${response:-$default}
[[ "$response" =~ ^[Yy]$ ]]
}
# ============================================
# 备份函数
# ============================================
do_backup() {
local backup_dir="$BACKUP_ROOT/$TIMESTAMP"
local log_file="$backup_dir/backup.log"
print_header "开始系统完整数据快照"
check_docker
echo -e "${YELLOW}备份提示:${NC}"
echo " - 备份过程中无需停止容器"
echo " - 建议确保当前没有用户正在进行关键操作"
echo " - PostgreSQL 使用事务一致性备份Redis 会触发 BGSAVE"
echo ""
if ! confirm "确认开始备份?" "y"; then
log_info "已取消备份"
exit 0
fi
# 创建备份目录
mkdir -p "$backup_dir"
echo "备份开始时间: $(date)" > "$log_file"
log_info "备份目录: $backup_dir"
# ----------------------------------------
# 1. PostgreSQL 逻辑备份
# ----------------------------------------
log_step "1/5 备份 PostgreSQL 数据库 (逻辑备份)"
# Services PostgreSQL
if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then
log_info "导出 Services PostgreSQL..."
if docker exec "$SERVICES_POSTGRES_CONTAINER" pg_dumpall -U rwa_user > "$backup_dir/services_postgres_dump.sql" 2>>"$log_file"; then
log_info "✓ Services PostgreSQL 导出完成 ($(du -h "$backup_dir/services_postgres_dump.sql" | cut -f1))"
else
log_warn "Services PostgreSQL 导出失败"
fi
else
log_warn "容器 $SERVICES_POSTGRES_CONTAINER 未运行,跳过"
fi
# MPC PostgreSQL
if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then
log_info "导出 MPC PostgreSQL..."
if docker exec "$MPC_POSTGRES_CONTAINER" pg_dumpall -U postgres > "$backup_dir/mpc_postgres_dump.sql" 2>>"$log_file"; then
log_info "✓ MPC PostgreSQL 导出完成 ($(du -h "$backup_dir/mpc_postgres_dump.sql" | cut -f1))"
else
log_warn "MPC PostgreSQL 导出失败"
fi
else
log_warn "容器 $MPC_POSTGRES_CONTAINER 未运行,跳过"
fi
# ----------------------------------------
# 2. Redis 持久化
# ----------------------------------------
log_step "2/5 触发 Redis 持久化"
if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_REDIS_CONTAINER}$"; then
log_info "触发 Redis BGSAVE..."
docker exec "$SERVICES_REDIS_CONTAINER" redis-cli BGSAVE >>"$log_file" 2>&1 || true
sleep 2
log_info "✓ Redis BGSAVE 已触发"
else
log_warn "容器 $SERVICES_REDIS_CONTAINER 未运行,跳过"
fi
# ----------------------------------------
# 3. 备份 Docker 数据卷
# ----------------------------------------
log_step "3/5 备份 Docker 数据卷"
local success_count=0
local skip_count=0
for entry in "${VOLUMES[@]}"; do
IFS=':' read -r volume desc <<< "$entry"
if docker volume inspect "$volume" &>/dev/null; then
log_info "备份 $volume ($desc)..."
if docker run --rm \
-v "$volume":/data:ro \
-v "$backup_dir":/backup \
alpine tar czf "/backup/${volume}.tar.gz" -C /data . 2>>"$log_file"; then
local size=$(du -h "$backup_dir/${volume}.tar.gz" | cut -f1)
log_info "$volume 完成 ($size)"
((success_count++))
else
log_error "$volume 备份失败"
fi
else
log_warn "数据卷 $volume 不存在,跳过"
((skip_count++))
fi
done
log_info "数据卷备份: ${success_count} 成功, ${skip_count} 跳过"
# ----------------------------------------
# 4. Kafka Topics 列表
# ----------------------------------------
log_step "4/5 保存 Kafka Topics 列表"
if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_KAFKA_CONTAINER}$"; then
if docker exec "$SERVICES_KAFKA_CONTAINER" kafka-topics --bootstrap-server localhost:9092 --list > "$backup_dir/kafka_topics.txt" 2>>"$log_file"; then
local topic_count=$(wc -l < "$backup_dir/kafka_topics.txt")
log_info "✓ 保存了 $topic_count 个 topics"
else
log_warn "获取 Kafka topics 失败"
fi
else
log_warn "Kafka 容器未运行,跳过"
fi
# ----------------------------------------
# 5. 系统状态信息
# ----------------------------------------
log_step "5/5 保存系统状态信息"
# 容器状态
docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" > "$backup_dir/containers_status.txt"
log_info "✓ 容器状态已保存"
# 数据卷列表
docker volume ls > "$backup_dir/volumes_list.txt"
log_info "✓ 数据卷列表已保存"
# Docker Compose 配置
for compose_dir in ~/services ~/mpc-system; do
if [ -f "$compose_dir/docker-compose.yml" ]; then
cp "$compose_dir/docker-compose.yml" "$backup_dir/$(basename "$compose_dir")_docker-compose.yml"
log_info "$(basename "$compose_dir")/docker-compose.yml 已备份"
fi
done
# ----------------------------------------
# 生成备份摘要
# ----------------------------------------
local total_size=$(du -sh "$backup_dir" | cut -f1)
cat > "$backup_dir/BACKUP_INFO.txt" << EOF
===========================================
RWA 系统数据快照
===========================================
备份时间: $(date)
备份目录: $backup_dir
总大小: $total_size
包含文件:
$(ls -lh "$backup_dir" | tail -n +2)
-------------------------------------------
恢复命令:
-------------------------------------------
# 完整恢复
./snapshot.sh restore $backup_dir
# 或手动恢复 PostgreSQL
docker exec -i $SERVICES_POSTGRES_CONTAINER psql -U rwa_user < $backup_dir/services_postgres_dump.sql
docker exec -i $MPC_POSTGRES_CONTAINER psql -U postgres < $backup_dir/mpc_postgres_dump.sql
===========================================
EOF
echo "备份完成时间: $(date)" >> "$log_file"
print_header "备份完成"
log_info "备份目录: $backup_dir"
log_info "总大小: $total_size"
echo ""
ls -lh "$backup_dir"
}
# ============================================
# 恢复函数
# ============================================
do_restore() {
local backup_dir="$1"
if [ -z "$backup_dir" ]; then
log_error "请指定备份目录"
echo "用法: $0 restore <备份目录>"
echo ""
echo "可用备份:"
do_list
exit 1
fi
# 支持相对路径
if [[ ! "$backup_dir" = /* ]]; then
backup_dir="$BACKUP_ROOT/$backup_dir"
fi
if [ ! -d "$backup_dir" ]; then
log_error "备份目录不存在: $backup_dir"
exit 1
fi
print_header "从备份恢复系统数据"
check_docker
log_info "备份目录: $backup_dir"
echo ""
if [ -f "$backup_dir/BACKUP_INFO.txt" ]; then
cat "$backup_dir/BACKUP_INFO.txt"
echo ""
fi
echo -e "${RED}警告: 恢复操作将覆盖现有数据!${NC}"
echo ""
if ! confirm "确定要继续恢复吗?"; then
log_info "已取消恢复操作"
exit 0
fi
# ----------------------------------------
# 选择恢复内容
# ----------------------------------------
echo ""
echo "请选择恢复内容:"
echo " 1) 完整恢复 (PostgreSQL + 所有数据卷)"
echo " 2) 仅恢复 PostgreSQL (逻辑恢复,推荐)"
echo " 3) 仅恢复数据卷 (物理恢复)"
echo " 4) 选择性恢复"
echo ""
read -p "请选择 [1-4]: " choice
case $choice in
1) restore_all "$backup_dir" ;;
2) restore_postgres "$backup_dir" ;;
3) restore_volumes "$backup_dir" ;;
4) restore_selective "$backup_dir" ;;
*) log_error "无效选择"; exit 1 ;;
esac
print_header "恢复完成"
}
restore_postgres() {
local backup_dir="$1"
log_step "恢复 PostgreSQL 数据库"
# Services PostgreSQL
if [ -f "$backup_dir/services_postgres_dump.sql" ]; then
if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then
log_info "恢复 Services PostgreSQL..."
if docker exec -i "$SERVICES_POSTGRES_CONTAINER" psql -U rwa_user < "$backup_dir/services_postgres_dump.sql" 2>/dev/null; then
log_info "✓ Services PostgreSQL 恢复完成"
else
log_warn "Services PostgreSQL 恢复可能有警告(通常是正常的)"
fi
else
log_warn "容器 $SERVICES_POSTGRES_CONTAINER 未运行"
fi
else
log_warn "未找到 services_postgres_dump.sql"
fi
# MPC PostgreSQL
if [ -f "$backup_dir/mpc_postgres_dump.sql" ]; then
if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then
log_info "恢复 MPC PostgreSQL..."
if docker exec -i "$MPC_POSTGRES_CONTAINER" psql -U postgres < "$backup_dir/mpc_postgres_dump.sql" 2>/dev/null; then
log_info "✓ MPC PostgreSQL 恢复完成"
else
log_warn "MPC PostgreSQL 恢复可能有警告(通常是正常的)"
fi
else
log_warn "容器 $MPC_POSTGRES_CONTAINER 未运行"
fi
else
log_warn "未找到 mpc_postgres_dump.sql"
fi
}
restore_volumes() {
local backup_dir="$1"
log_step "恢复 Docker 数据卷"
echo ""
echo -e "${RED}警告: 恢复数据卷需要先停止相关容器!${NC}"
echo -e "${YELLOW}提示: 如果只是恢复 PostgreSQL 数据,建议使用选项 2 (逻辑恢复),无需停止容器${NC}"
echo ""
if ! confirm "是否已停止所有相关容器?"; then
log_info "请先停止容器,然后重新运行恢复"
echo ""
echo "停止命令:"
echo " cd ~/services && docker compose down"
echo " cd ~/mpc-system && docker compose down"
exit 1
fi
for entry in "${VOLUMES[@]}"; do
IFS=':' read -r volume desc <<< "$entry"
local archive="$backup_dir/${volume}.tar.gz"
if [ -f "$archive" ]; then
log_info "恢复 $volume ($desc)..."
# 检查数据卷是否存在,不存在则创建
if ! docker volume inspect "$volume" &>/dev/null; then
docker volume create "$volume" >/dev/null
log_info "创建数据卷 $volume"
fi
# 恢复数据
if docker run --rm \
-v "$volume":/data \
-v "$backup_dir":/backup:ro \
alpine sh -c "rm -rf /data/* /data/..?* /data/.[!.]* 2>/dev/null; tar xzf /backup/${volume}.tar.gz -C /data"; then
log_info "$volume 恢复完成"
else
log_error "$volume 恢复失败"
fi
else
log_warn "未找到 $archive,跳过"
fi
done
}
restore_all() {
local backup_dir="$1"
# 先恢复数据卷
restore_volumes "$backup_dir"
echo ""
log_info "请先启动容器,然后继续恢复 PostgreSQL"
echo ""
echo "启动命令:"
echo " cd ~/services && docker compose up -d"
echo " cd ~/mpc-system && docker compose up -d"
echo ""
if confirm "容器已启动,继续恢复 PostgreSQL?"; then
restore_postgres "$backup_dir"
fi
}
restore_selective() {
local backup_dir="$1"
echo ""
echo "可恢复的内容:"
echo ""
local items=()
local i=1
if [ -f "$backup_dir/services_postgres_dump.sql" ]; then
echo " $i) Services PostgreSQL (services_postgres_dump.sql)"
items+=("services_postgres")
((i++))
fi
if [ -f "$backup_dir/mpc_postgres_dump.sql" ]; then
echo " $i) MPC PostgreSQL (mpc_postgres_dump.sql)"
items+=("mpc_postgres")
((i++))
fi
for entry in "${VOLUMES[@]}"; do
IFS=':' read -r volume desc <<< "$entry"
if [ -f "$backup_dir/${volume}.tar.gz" ]; then
echo " $i) $desc (${volume}.tar.gz)"
items+=("$volume")
((i++))
fi
done
echo ""
read -p "请输入要恢复的序号(逗号分隔,如 1,3,4: " selection
IFS=',' read -ra selected <<< "$selection"
for idx in "${selected[@]}"; do
idx=$((idx - 1))
if [ $idx -ge 0 ] && [ $idx -lt ${#items[@]} ]; then
local item="${items[$idx]}"
case "$item" in
"services_postgres")
if docker ps --format '{{.Names}}' | grep -q "^${SERVICES_POSTGRES_CONTAINER}$"; then
log_info "恢复 Services PostgreSQL..."
docker exec -i "$SERVICES_POSTGRES_CONTAINER" psql -U rwa_user < "$backup_dir/services_postgres_dump.sql" 2>/dev/null || true
log_info "✓ Services PostgreSQL 恢复完成"
else
log_warn "容器未运行"
fi
;;
"mpc_postgres")
if docker ps --format '{{.Names}}' | grep -q "^${MPC_POSTGRES_CONTAINER}$"; then
log_info "恢复 MPC PostgreSQL..."
docker exec -i "$MPC_POSTGRES_CONTAINER" psql -U postgres < "$backup_dir/mpc_postgres_dump.sql" 2>/dev/null || true
log_info "✓ MPC PostgreSQL 恢复完成"
else
log_warn "容器未运行"
fi
;;
*)
# 数据卷恢复
log_info "恢复数据卷 $item..."
if ! docker volume inspect "$item" &>/dev/null; then
docker volume create "$item" >/dev/null
fi
docker run --rm \
-v "$item":/data \
-v "$backup_dir":/backup:ro \
alpine sh -c "rm -rf /data/* /data/..?* /data/.[!.]* 2>/dev/null; tar xzf /backup/${item}.tar.gz -C /data"
log_info "$item 恢复完成"
;;
esac
fi
done
}
# ============================================
# 列表函数
# ============================================
do_list() {
print_header "可用备份列表"
if [ ! -d "$BACKUP_ROOT" ]; then
log_info "暂无备份"
return
fi
local backups=$(ls -d "$BACKUP_ROOT"/*/ 2>/dev/null | sort -r)
if [ -z "$backups" ]; then
log_info "暂无备份"
return
fi
printf "%-20s %-10s %-20s\n" "目录名" "大小" "时间"
printf "%-20s %-10s %-20s\n" "--------------------" "----------" "--------------------"
for dir in $backups; do
local name=$(basename "$dir")
local size=$(du -sh "$dir" 2>/dev/null | cut -f1)
local date_str=""
# 解析目录名中的时间戳
if [[ "$name" =~ ^([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})([0-9]{2})$ ]]; then
date_str="${BASH_REMATCH[1]}-${BASH_REMATCH[2]}-${BASH_REMATCH[3]} ${BASH_REMATCH[4]}:${BASH_REMATCH[5]}:${BASH_REMATCH[6]}"
fi
printf "%-20s %-10s %-20s\n" "$name" "$size" "$date_str"
done
}
# ============================================
# 详情函数
# ============================================
do_info() {
local backup_dir="$1"
if [ -z "$backup_dir" ]; then
log_error "请指定备份目录"
echo "用法: $0 info <备份目录>"
exit 1
fi
if [[ ! "$backup_dir" = /* ]]; then
backup_dir="$BACKUP_ROOT/$backup_dir"
fi
if [ ! -d "$backup_dir" ]; then
log_error "备份目录不存在: $backup_dir"
exit 1
fi
print_header "备份详情"
if [ -f "$backup_dir/BACKUP_INFO.txt" ]; then
cat "$backup_dir/BACKUP_INFO.txt"
else
echo "目录: $backup_dir"
echo ""
ls -lh "$backup_dir"
fi
}
# ============================================
# 清理函数
# ============================================
do_clean() {
local keep_days="${1:-7}"
print_header "清理旧备份"
if [ ! -d "$BACKUP_ROOT" ]; then
log_info "备份目录不存在"
return
fi
log_info "保留最近 $keep_days 天的备份"
echo ""
local to_delete=$(find "$BACKUP_ROOT" -maxdepth 1 -type d -mtime +"$keep_days" 2>/dev/null | grep -v "^$BACKUP_ROOT$")
if [ -z "$to_delete" ]; then
log_info "没有需要清理的备份"
return
fi
echo "将删除以下备份:"
for dir in $to_delete; do
local size=$(du -sh "$dir" | cut -f1)
echo " - $(basename "$dir") ($size)"
done
echo ""
if confirm "确认删除?"; then
for dir in $to_delete; do
rm -rf "$dir"
log_info "已删除: $(basename "$dir")"
done
log_info "清理完成"
else
log_info "已取消"
fi
}
# ============================================
# 帮助信息
# ============================================
show_help() {
cat << EOF
RWA 系统数据快照工具
用法:
$0 <命令> [参数]
命令:
backup 创建完整系统备份
restore <目录> 从指定备份恢复
list 列出所有可用备份
info <目录> 查看备份详情
clean [天数] 清理旧备份默认保留7天
help 显示此帮助信息
示例:
$0 backup # 创建新备份
$0 restore 20260104_153022 # 恢复指定备份
$0 list # 查看所有备份
$0 clean 14 # 保留14天内的备份
备份内容:
- PostgreSQL 数据库 (逻辑备份 + 数据卷)
- Redis 数据
- Kafka 消息队列数据
- Zookeeper 配置数据
- 系统状态信息
备份位置: $BACKUP_ROOT
EOF
}
# ============================================
# 主入口
# ============================================
main() {
local command="${1:-help}"
shift || true
case "$command" in
backup) do_backup ;;
restore) do_restore "$@" ;;
list) do_list ;;
info) do_info "$@" ;;
clean) do_clean "$@" ;;
help|--help|-h) show_help ;;
*)
log_error "未知命令: $command"
echo ""
show_help
exit 1
;;
esac
}
main "$@"