""" Usage: python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_false python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_streaming_separate_reasoning_true_stream_reasoning_false python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_false python3 -m unittest test_reasoning_content.TestReasoningContentAPI.test_nonstreaming_separate_reasoning_true python3 -m unittest test_reasoning_content.TestReasoningContentStartup.test_nonstreaming python3 -m unittest test_reasoning_content.TestReasoningContentStartup.test_streaming """ import json import unittest import openai import requests from sglang.srt.utils import kill_process_tree from sglang.test.test_utils import ( DEFAULT_REASONING_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_URL_FOR_TEST, CustomTestCase, popen_launch_server, ) class TestReasoningContentAPI(CustomTestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-1234" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, other_args=[ "--reasoning-parser", "deepseek-r1", ], ) cls.base_url += "/v1" @classmethod def tearDownClass(cls): kill_process_tree(cls.process.pid) def test_streaming_separate_reasoning_false(self): # Test streaming with separate_reasoning=False, reasoning_content should be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "stream": True, "extra_body": {"separate_reasoning": False}, } response = client.chat.completions.create(**payload) reasoning_content = "" content = "" for chunk in response: if chunk.choices[0].delta.content: content += chunk.choices[0].delta.content elif chunk.choices[0].delta.reasoning_content: reasoning_content += chunk.choices[0].delta.reasoning_content assert len(reasoning_content) == 0 assert len(content) > 0 def test_streaming_separate_reasoning_true(self): # Test streaming with separate_reasoning=True, reasoning_content should not be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "stream": True, "extra_body": {"separate_reasoning": True}, } response = client.chat.completions.create(**payload) reasoning_content = "" content = "" for chunk in response: if chunk.choices[0].delta.content: content += chunk.choices[0].delta.content elif chunk.choices[0].delta.reasoning_content: reasoning_content += chunk.choices[0].delta.reasoning_content assert len(reasoning_content) > 0 assert len(content) > 0 def test_streaming_separate_reasoning_true_stream_reasoning_false(self): # Test streaming with separate_reasoning=True, reasoning_content should not be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "stream": True, "extra_body": {"separate_reasoning": True, "stream_reasoning": False}, } response = client.chat.completions.create(**payload) reasoning_content = "" content = "" first_chunk = False for chunk in response: if chunk.choices[0].delta.reasoning_content: reasoning_content = chunk.choices[0].delta.reasoning_content first_chunk = True if chunk.choices[0].delta.content: content += chunk.choices[0].delta.content if not first_chunk: reasoning_content = chunk.choices[0].delta.reasoning_content first_chunk = True if not first_chunk: assert ( not chunk.choices[0].delta.reasoning_content or len(chunk.choices[0].delta.reasoning_content) == 0 ) assert len(reasoning_content) > 0 assert len(content) > 0 def test_nonstreaming_separate_reasoning_false(self): # Test non-streaming with separate_reasoning=False, reasoning_content should be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "extra_body": {"separate_reasoning": False}, } response = client.chat.completions.create(**payload) assert ( not response.choices[0].message.reasoning_content or len(response.choices[0].message.reasoning_content) == 0 ) assert len(response.choices[0].message.content) > 0 def test_nonstreaming_separate_reasoning_true(self): # Test non-streaming with separate_reasoning=True, reasoning_content should not be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "extra_body": {"separate_reasoning": True}, } response = client.chat.completions.create(**payload) assert len(response.choices[0].message.reasoning_content) > 0 assert len(response.choices[0].message.content) > 0 class TestReasoningContentWithoutParser(CustomTestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_REASONING_MODEL_NAME_FOR_TEST cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-1234" cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, other_args=[], # No reasoning parser ) cls.base_url += "/v1" @classmethod def tearDownClass(cls): kill_process_tree(cls.process.pid) def test_streaming_separate_reasoning_false(self): # Test streaming with separate_reasoning=False, reasoning_content should be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "stream": True, "extra_body": {"separate_reasoning": False}, } response = client.chat.completions.create(**payload) reasoning_content = "" content = "" for chunk in response: if chunk.choices[0].delta.content: content += chunk.choices[0].delta.content elif chunk.choices[0].delta.reasoning_content: reasoning_content += chunk.choices[0].delta.reasoning_content assert len(reasoning_content) == 0 assert len(content) > 0 def test_streaming_separate_reasoning_true(self): # Test streaming with separate_reasoning=True, reasoning_content should not be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "stream": True, "extra_body": {"separate_reasoning": True}, } response = client.chat.completions.create(**payload) reasoning_content = "" content = "" for chunk in response: if chunk.choices[0].delta.content: content += chunk.choices[0].delta.content elif chunk.choices[0].delta.reasoning_content: reasoning_content += chunk.choices[0].delta.reasoning_content assert len(reasoning_content) == 0 assert len(content) > 0 def test_streaming_separate_reasoning_true_stream_reasoning_false(self): # Test streaming with separate_reasoning=True, reasoning_content should not be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "stream": True, "extra_body": {"separate_reasoning": True, "stream_reasoning": False}, } response = client.chat.completions.create(**payload) reasoning_content = "" content = "" first_chunk = False for chunk in response: if chunk.choices[0].delta.reasoning_content: reasoning_content = chunk.choices[0].delta.reasoning_content first_chunk = True if chunk.choices[0].delta.content: content += chunk.choices[0].delta.content if not first_chunk: reasoning_content = chunk.choices[0].delta.reasoning_content first_chunk = True if not first_chunk: assert ( not chunk.choices[0].delta.reasoning_content or len(chunk.choices[0].delta.reasoning_content) == 0 ) assert not reasoning_content or len(reasoning_content) == 0 assert len(content) > 0 def test_nonstreaming_separate_reasoning_false(self): # Test non-streaming with separate_reasoning=False, reasoning_content should be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "extra_body": {"separate_reasoning": False}, } response = client.chat.completions.create(**payload) assert ( not response.choices[0].message.reasoning_content or len(response.choices[0].message.reasoning_content) == 0 ) assert len(response.choices[0].message.content) > 0 def test_nonstreaming_separate_reasoning_true(self): # Test non-streaming with separate_reasoning=True, reasoning_content should not be empty client = openai.Client(api_key=self.api_key, base_url=self.base_url) payload = { "model": self.model, "messages": [ { "role": "user", "content": "What is 1+3?", } ], "max_tokens": 100, "extra_body": {"separate_reasoning": True}, } response = client.chat.completions.create(**payload) assert ( not response.choices[0].message.reasoning_content or len(response.choices[0].message.reasoning_content) == 0 ) assert len(response.choices[0].message.content) > 0 if __name__ == "__main__": unittest.main()