sglang_v0.5.2/sglang/sgl-router/py_test/integration/test_circuit_breaker.py

192 lines
5.3 KiB
Python

import time
import pytest
import requests
@pytest.mark.integration
def test_circuit_breaker_opens_and_recovers(router_manager, mock_workers):
# A single worker that fails first 3 requests, then succeeds
_, [wurl], _ = mock_workers(n=1, args=["--fail-first-n", "3"]) # fails first 3
rh = router_manager.start_router(
worker_urls=[wurl],
policy="round_robin",
extra={
"cb_failure_threshold": 3,
"cb_success_threshold": 2,
"cb_timeout_duration_secs": 3,
"cb_window_duration_secs": 10,
"disable_retries": True,
},
)
def post_once():
return requests.post(
f"{rh.url}/v1/completions",
json={
"model": "test-model",
"prompt": "trigger",
"max_tokens": 1,
"stream": False,
},
timeout=3,
)
saw_503 = False
for _ in range(8):
r = post_once()
if r.status_code == 503:
saw_503 = True
break
assert saw_503, "circuit breaker did not open to return 503"
time.sleep(4)
r1 = post_once()
r2 = post_once()
assert r1.status_code == 200 and r2.status_code == 200
@pytest.mark.integration
def test_circuit_breaker_half_open_failure_reopens(router_manager, mock_workers):
_, [wurl], _ = mock_workers(n=1, args=["--status-code", "500"]) # always fail
rh = router_manager.start_router(
worker_urls=[wurl],
policy="round_robin",
extra={
"cb_failure_threshold": 2,
"cb_success_threshold": 2,
"cb_timeout_duration_secs": 2,
"cb_window_duration_secs": 5,
"disable_retries": True,
},
)
def post_once():
return requests.post(
f"{rh.url}/v1/completions",
json={
"model": "test-model",
"prompt": "x",
"max_tokens": 1,
"stream": False,
},
timeout=3,
)
opened = False
for _ in range(8):
r = post_once()
if r.status_code == 503:
opened = True
break
assert opened, "circuit breaker did not open"
time.sleep(3)
r = post_once()
assert r.status_code == 500
r2 = post_once()
assert r2.status_code == 503
@pytest.mark.integration
def test_circuit_breaker_disable_flag(router_manager, mock_workers):
_, [wurl], _ = mock_workers(n=1, args=["--status-code", "500"]) # always fail
rh = router_manager.start_router(
worker_urls=[wurl],
policy="round_robin",
extra={
"disable_circuit_breaker": True,
"disable_retries": True,
},
)
r = requests.post(
f"{rh.url}/v1/completions",
json={
"model": "test-model",
"prompt": "x",
"max_tokens": 1,
"stream": False,
},
timeout=3,
)
assert r.status_code == 500
@pytest.mark.integration
def test_circuit_breaker_per_worker_isolation(router_manager, mock_workers):
_, [fail_url], _ = mock_workers(n=1, args=["--status-code", "500"]) # always fail
_, [ok_url], _ = mock_workers(n=1)
rh = router_manager.start_router(
worker_urls=[fail_url, ok_url],
policy="round_robin",
extra={
"cb_failure_threshold": 2,
"cb_success_threshold": 1,
"cb_timeout_duration_secs": 2,
"cb_window_duration_secs": 10,
"disable_retries": True,
},
)
def post_once():
return requests.post(
f"{rh.url}/v1/completions",
json={
"model": "test-model",
"prompt": "y",
"max_tokens": 1,
"stream": False,
},
timeout=3,
)
failures = 0
successes_after_open = 0
opened = False
for _ in range(30):
r = post_once()
if not opened:
if r.status_code == 500:
failures += 1
if failures >= 2:
_ = post_once()
_ = post_once()
opened = True
else:
if r.status_code == 200:
successes_after_open += 1
else:
assert False, f"Unexpected non-200 after CB open: {r.status_code}"
assert opened and successes_after_open >= 5
@pytest.mark.integration
def test_circuit_breaker_with_retries(router_manager, mock_workers):
_, [fail_url], _ = mock_workers(n=1, args=["--status-code", "500"]) # always fail
_, [ok_url], _ = mock_workers(n=1)
rh = router_manager.start_router(
worker_urls=[fail_url, ok_url],
policy="round_robin",
extra={
"retry_max_retries": 3,
"retry_initial_backoff_ms": 10,
"retry_max_backoff_ms": 50,
"cb_failure_threshold": 2,
"cb_success_threshold": 1,
"cb_timeout_duration_secs": 2,
"cb_window_duration_secs": 10,
},
)
r = requests.post(
f"{rh.url}/v1/completions",
json={
"model": "test-model",
"prompt": "z",
"max_tokens": 1,
"stream": False,
},
timeout=5,
)
assert r.status_code == 200