sglang_v0.5.2/sglang/scripts/playground/long_context_example.py

37 lines
1.3 KiB
Python

from urllib.request import urlopen
from openai import OpenAI
test_cases = {
"64k": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-1M/test-data/64k.txt",
"200k": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-1M/test-data/200k.txt",
"600k": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-1M/test-data/600k.txt",
"1m": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-1M/test-data/1m.txt",
}
client = OpenAI(api_key="EMPTY", base_url="http://127.0.0.1:30000/v1")
for name, url in test_cases.items():
print(f"\n==== Running test case: {name} ====")
try:
with urlopen(url, timeout=10) as response:
prompt = response.read().decode("utf-8")
except Exception as e:
print(f"Failed to load prompt for {name}: {e}")
continue
try:
response = client.chat.completions.create(
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
messages=[{"role": "user", "content": prompt}],
stream=True,
max_tokens=128,
temperature=0,
)
for chunk in response:
if chunk.choices and chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
except Exception as e:
print(f"\nError during completion for {name}: {e}")