[Unit] Description=Open-Sora Celery Worker GPU%i After=network.target redis.service Requires=redis.service [Service] Type=simple User=ceshi Group=ceshi WorkingDirectory=/home/ceshi/mysora Environment="PATH=/home/ceshi/venv/bin:/usr/local/bin:/usr/bin:/bin" # %i 是 systemd 模板参数,即 GPU ID (0-7) Environment="WORKER_GPU_ID=%i" ExecStart=/home/ceshi/venv/bin/celery \ --app api.tasks \ worker \ --loglevel=info \ --concurrency=1 \ --hostname=gpu%i@%%h \ --logfile=/data/train-output/logs/worker-gpu%i.log \ --max-tasks-per-child=10 # 每处理 10 个任务重启 Worker,防止 GPU 内存碎片积累 # 崩溃自动重启 Restart=always RestartSec=10 StartLimitIntervalSec=120 StartLimitBurst=5 [Install] WantedBy=multi-user.target