37 lines
1.3 KiB
Python
37 lines
1.3 KiB
Python
from urllib.request import urlopen
|
|
|
|
from openai import OpenAI
|
|
|
|
test_cases = {
|
|
"64k": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-1M/test-data/64k.txt",
|
|
"200k": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-1M/test-data/200k.txt",
|
|
"600k": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-1M/test-data/600k.txt",
|
|
"1m": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-1M/test-data/1m.txt",
|
|
}
|
|
|
|
client = OpenAI(api_key="EMPTY", base_url="http://127.0.0.1:30000/v1")
|
|
|
|
for name, url in test_cases.items():
|
|
print(f"\n==== Running test case: {name} ====")
|
|
try:
|
|
with urlopen(url, timeout=10) as response:
|
|
prompt = response.read().decode("utf-8")
|
|
except Exception as e:
|
|
print(f"Failed to load prompt for {name}: {e}")
|
|
continue
|
|
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
messages=[{"role": "user", "content": prompt}],
|
|
stream=True,
|
|
max_tokens=128,
|
|
temperature=0,
|
|
)
|
|
|
|
for chunk in response:
|
|
if chunk.choices and chunk.choices[0].delta.content is not None:
|
|
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
except Exception as e:
|
|
print(f"\nError during completion for {name}: {e}")
|