sglang.0.4.8.post1/sglang/test/srt/test_bnb.py

315 lines
11 KiB
Python

"""
Usage:
python3 -m unittest test_bnb.TestVisionModel.test_vlm
python3 -m unittest test_bnb.TestLanguageModel.test_mmlu
"""
import base64
import io
import json
import multiprocessing as mp
import os
import unittest
from concurrent.futures import ThreadPoolExecutor
from types import SimpleNamespace
import numpy as np
import openai
import requests
from PIL import Image
from sglang.srt.utils import kill_process_tree
from sglang.test.run_eval import run_eval
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
is_in_ci,
popen_launch_server,
)
VISION_MODELS = [
"unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
"unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
"unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
"unsloth/Llama-3.2-11B-Vision-bnb-4bit",
"unsloth/gemma-3-4b-it-bnb-4bit",
"unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
]
LANGUAGE_MODELS = [
"unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
"unsloth/Qwen2-7B-Instruct-bnb-4bit",
"unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
"unsloth/gemma-3-1b-it-bnb-4bit",
]
# image
IMAGE_MAN_IRONING_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/man_ironing_on_back_of_suv.png"
IMAGE_SGL_LOGO_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/sgl_logo.png"
# video
VIDEO_JOBS_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/videos/jobs_presenting_ipod.mp4"
# audio
AUDIO_TRUMP_SPEECH_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/audios/Trump_WEF_2018_10s.mp3"
AUDIO_BIRD_SONG_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/audios/bird_song.mp3"
def popen_launch_server_wrapper(base_url, model, other_args):
process = popen_launch_server(
model,
base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args,
)
return process
class TestVisionModel(CustomTestCase):
@classmethod
def setUpClass(cls):
mp.set_start_method("spawn", force=True)
cls.base_url = DEFAULT_URL_FOR_TEST
cls.base_url += "/v1"
cls.api_key = "sk-123456"
def _run_single_image_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": IMAGE_MAN_IRONING_URL},
},
{
"type": "text",
"text": "Describe this image in a very short sentence.",
},
],
},
],
temperature=0,
)
assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
# `driver` is for gemma-3-it
assert "man" in text or "person" or "driver" in text, text
assert "cab" in text or "taxi" in text or "SUV" in text, text
# MiniCPMO fails to recognize `iron`, but `hanging`
assert "iron" in text or "hang" in text, text
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0
def _run_multi_turn_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": IMAGE_MAN_IRONING_URL},
},
{
"type": "text",
"text": "Describe this image in a very short sentence.",
},
],
},
{
"role": "assistant",
"content": [
{
"type": "text",
"text": "There is a man at the back of a yellow cab ironing his clothes.",
}
],
},
{
"role": "user",
"content": [
{"type": "text", "text": "Repeat your previous answer."}
],
},
],
temperature=0,
)
assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
assert "man" in text or "cab" in text, text
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0
def _run_multi_images_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": IMAGE_MAN_IRONING_URL},
"modalities": "multi-images",
},
{
"type": "image_url",
"image_url": {"url": IMAGE_SGL_LOGO_URL},
"modalities": "multi-images",
},
{
"type": "text",
"text": "I have two very different images. They are not related at all. "
"Please describe the first image in one sentence, and then describe the second image in another sentence.",
},
],
},
],
temperature=0,
)
assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
print("-" * 30)
print(f"Multi images response:\n{text}")
print("-" * 30)
assert "man" in text or "cab" in text or "SUV" in text or "taxi" in text, text
assert "logo" in text or '"S"' in text or "SG" in text, text
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0
def run_decode_with_image(self, image_id):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
content = []
if image_id == 0:
content.append(
{
"type": "image_url",
"image_url": {"url": IMAGE_MAN_IRONING_URL},
}
)
elif image_id == 1:
content.append(
{
"type": "image_url",
"image_url": {"url": IMAGE_SGL_LOGO_URL},
}
)
else:
pass
content.append(
{
"type": "text",
"text": "Describe this image in a very short sentence.",
}
)
response = client.chat.completions.create(
model="default",
messages=[
{"role": "user", "content": content},
],
temperature=0,
)
assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
def _run_test_mixed_batch(self):
image_ids = [0, 1, 2] * 4
with ThreadPoolExecutor(4) as executor:
list(executor.map(self.run_decode_with_image, image_ids))
def test_vlm(self):
models_to_test = VISION_MODELS
if is_in_ci():
models_to_test = [random.choice(VISION_MODELS)]
for model in models_to_test:
with self.subTest(model=model):
other_args = [
"--mem-fraction-static",
"0.6",
"--load-format",
"bitsandbytes",
"--enable-multimodal",
]
try:
process = popen_launch_server_wrapper(
DEFAULT_URL_FOR_TEST, model, other_args
)
self._run_test_mixed_batch()
self._run_multi_images_chat_completion()
self._run_multi_turn_chat_completion()
self._run_single_image_chat_completion()
finally:
kill_process_tree(process.pid)
class TestLanguageModel(CustomTestCase):
@classmethod
def setUpClass(cls):
mp.set_start_method("spawn", force=True)
cls.base_url = DEFAULT_URL_FOR_TEST
# cls.base_url += "/v1"
cls.api_key = "sk-123456"
def test_mmlu(self):
models_to_test = LANGUAGE_MODELS
if is_in_ci():
models_to_test = [random.choice(LANGUAGE_MODELS)]
for model in models_to_test:
with self.subTest(model=model):
other_args = [
"--mem-fraction-static",
"0.6",
"--load-format",
"bitsandbytes",
]
try:
process = popen_launch_server_wrapper(
DEFAULT_URL_FOR_TEST, model, other_args
)
args = SimpleNamespace(
base_url=self.base_url,
model=model,
eval_name="mmlu",
num_examples=32,
num_threads=16,
)
metrics = run_eval(args)
print(f"{metrics=}")
self.assertGreater(metrics["score"], 0.3)
finally:
kill_process_tree(process.pid)