""" Usage: python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion """ import unittest from test_vision_openai_server_common import * from sglang.srt.utils import kill_process_tree from sglang.test.test_utils import ( DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_URL_FOR_TEST, CustomTestCase, popen_launch_server, ) class TestQwen2VLServer(TestOpenAIVisionServer): @classmethod def setUpClass(cls): cls.model = "Qwen/Qwen2-VL-7B-Instruct" cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, other_args=[ "--mem-fraction-static", "0.4", ], ) cls.base_url += "/v1" class TestQwen2_5_VLServer(TestOpenAIVisionServer): @classmethod def setUpClass(cls): cls.model = "Qwen/Qwen2.5-VL-7B-Instruct" cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, other_args=[ "--mem-fraction-static", "0.4", ], ) cls.base_url += "/v1" class TestVLMContextLengthIssue(CustomTestCase): @classmethod def setUpClass(cls): cls.model = "Qwen/Qwen2-VL-7B-Instruct" cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, other_args=[ "--context-length", "300", "--mem-fraction-static=0.80", ], ) cls.base_url += "/v1" @classmethod def tearDownClass(cls): kill_process_tree(cls.process.pid) def test_single_image_chat_completion(self): client = openai.Client(api_key=self.api_key, base_url=self.base_url) with self.assertRaises(openai.BadRequestError) as cm: client.chat.completions.create( model="default", messages=[ { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": IMAGE_MAN_IRONING_URL}, }, { "type": "text", "text": "Give a lengthy description of this picture", }, ], }, ], temperature=0, ) # context length is checked first, then max_req_input_len, which is calculated from the former assert ( "Multimodal prompt is too long after expanding multimodal tokens." in str(cm.exception) or "is longer than the model's context length" in str(cm.exception) ) class TestMllamaServer(TestOpenAIVisionServer): @classmethod def setUpClass(cls): cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct" cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, ) cls.base_url += "/v1" def test_video_chat_completion(self): pass class TestMinicpmvServer(TestOpenAIVisionServer): @classmethod def setUpClass(cls): cls.model = "openbmb/MiniCPM-V-2_6" cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=[ "--trust-remote-code", "--mem-fraction-static", "0.4", ], ) cls.base_url += "/v1" class TestInternVL2_5Server(TestOpenAIVisionServer): @classmethod def setUpClass(cls): cls.model = "OpenGVLab/InternVL2_5-2B" cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=["--trust-remote-code"], ) cls.base_url += "/v1" class TestMinicpmoServer(TestOpenAIVisionServer): @classmethod def setUpClass(cls): cls.model = "openbmb/MiniCPM-o-2_6" cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=[ "--trust-remote-code", "--mem-fraction-static", "0.7", ], ) cls.base_url += "/v1" def test_audio_chat_completion(self): self._test_audio_speech_completion() self._test_audio_ambient_completion() if __name__ == "__main__": unittest.main()