sglang_v0.5.2/sglang/sgl-router/py_test/integration/test_fault_tolerance.py

37 lines
1005 B
Python

import concurrent.futures
import subprocess
import time
import pytest
import requests
@pytest.mark.integration
def test_worker_crash_reroute_with_retries(router_manager, mock_workers):
# Start one healthy and one that will crash on first request
_, [ok_url], _ = mock_workers(n=1)
_, [crash_url], _ = mock_workers(n=1, args=["--crash-on-request"])
rh = router_manager.start_router(
worker_urls=[crash_url, ok_url],
policy="round_robin",
extra={
"retry_max_retries": 3,
"retry_initial_backoff_ms": 10,
"retry_max_backoff_ms": 50,
},
)
# A single request should succeed via retry to the healthy worker
r = requests.post(
f"{rh.url}/v1/completions",
json={
"model": "test-model",
"prompt": "crash",
"max_tokens": 1,
"stream": False,
},
timeout=5,
)
assert r.status_code == 200
# mock_workers fixture handles cleanup