{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Launch A Server\n", "\n", "Launch the server with a reasoning model (Qwen 3.5-4B) and reasoning parser." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sglang import separate_reasoning, assistant_begin, assistant_end\n", "from sglang import assistant, function, gen, system, user\n", "from sglang import image\n", "from sglang import RuntimeEndpoint, set_default_backend\n", "from sglang.srt.utils import load_image\n", "from sglang.test.test_utils import is_in_ci\n", "from sglang.utils import print_highlight, terminate_process, wait_for_server\n", "\n", "\n", "if is_in_ci():\n", " from patch import launch_server_cmd\n", "else:\n", " from sglang.utils import launch_server_cmd\n", "\n", "\n", "server_process, port = launch_server_cmd(\n", " \"python3 -m sglang.launch_server --model-path Qwen/Qwen3-4B --reasoning-parser qwen3 --host 0.0.0.0\"\n", ")\n", "\n", "wait_for_server(f\"http://localhost:{port}\")\n", "print(f\"Server started on http://localhost:{port}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set the default backend. Note: you can set chat_template_name in RontimeEndpoint. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "set_default_backend(\n", " RuntimeEndpoint(f\"http://localhost:{port}\", chat_template_name=\"qwen\")\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's start with a basic question-answering task. And see how the reasoning content is generated." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@function\n", "def basic_qa(s, question):\n", " s += system(f\"You are a helpful assistant than can answer questions.\")\n", " s += user(question)\n", " s += assistant_begin()\n", " s += gen(\"answer\", max_tokens=512)\n", " s += assistant_end()\n", "\n", "\n", "state = basic_qa(\"List 3 countries and their capitals.\")\n", "print_highlight(state[\"answer\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "With `separate_reasoning`, you can move the reasoning content to `{param_name}_reasoning_content` in the state." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@function\n", "def basic_qa_separate_reasoning(s, question):\n", " s += system(f\"You are a helpful assistant than can answer questions.\")\n", " s += user(question)\n", " s += assistant_begin()\n", " s += separate_reasoning(gen(\"answer\", max_tokens=512), model_type=\"qwen3\")\n", " s += assistant_end()\n", "\n", "\n", "reasoning_state = basic_qa_separate_reasoning(\"List 3 countries and their capitals.\")\n", "print_highlight(reasoning_state.stream_executor.variable_event.keys())\n", "print_highlight(\n", " f\"\\nSeparated Reasoning Content:\\n{reasoning_state['answer_reasoning_content']}\"\n", ")\n", "\n", "print_highlight(f\"\\n\\nContent:\\n{reasoning_state['answer']}\")\n", "print_highlight(f\"\\n\\nMessages:\\n{reasoning_state.messages()[-1]}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`separate_reasoning` can also be used in multi-turn conversations." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@function\n", "def multi_turn_qa(s):\n", " s += system(f\"You are a helpful assistant than can answer questions.\")\n", " s += user(\"Please give me a list of 3 countries and their capitals.\")\n", " s += assistant(\n", " separate_reasoning(gen(\"first_answer\", max_tokens=512), model_type=\"qwen3\")\n", " )\n", " s += user(\"Please give me another list of 3 countries and their capitals.\")\n", " s += assistant(\n", " separate_reasoning(gen(\"second_answer\", max_tokens=512), model_type=\"qwen3\")\n", " )\n", " return s\n", "\n", "\n", "reasoning_state = multi_turn_qa()\n", "print_highlight(f\"\\n\\nfirst_answer:\\n{reasoning_state['first_answer']}\")\n", "print_highlight(\n", " f\"\\n\\nfirst_answer_reasoning_content:\\n{reasoning_state['first_answer_reasoning_content']}\"\n", ")\n", "print_highlight(f\"\\n\\nsecond_answer:\\n{reasoning_state['second_answer']}\")\n", "print_highlight(\n", " f\"\\n\\nsecond_answer_reasoning_content:\\n{reasoning_state['second_answer_reasoning_content']}\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using No thinking as Qwen 3's advanced feature \n", "\n", "sglang separate_reasoning is particularly useful when combined with Qwen 3's advanced feature.\n", "\n", "[Qwen 3's advanced usages](https://qwenlm.github.io/blog/qwen3/#advanced-usages)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "reasoning_state = basic_qa_separate_reasoning(\n", " \"List 3 countries and their capitals. /no_think\"\n", ")\n", "print_highlight(f\"Reasoning Content:\\n{reasoning_state['answer_reasoning_content']}\")\n", "print_highlight(f\"Content:\\n{reasoning_state['answer']}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`separate_reasoning` can also be used in regular expression generation." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@function\n", "def regular_expression_gen(s):\n", " s += user(\n", " \"What is the IP address of the Google DNS servers? just provide the answer\"\n", " )\n", " s += assistant(\n", " separate_reasoning(\n", " gen(\n", " \"answer\",\n", " temperature=0,\n", " regex=r\"((25[0-5]|2[0-4]\\d|[01]?\\d\\d?).){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\",\n", " max_tokens=512,\n", " ),\n", " model_type=\"qwen3\",\n", " ),\n", " )\n", "\n", "\n", "reasoning_state = regular_expression_gen()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print_highlight(f\"Answer:\\n{reasoning_state['answer']}\")\n", "print_highlight(\n", " f\"\\n\\nReasoning Content:\\n{reasoning_state['answer_reasoning_content']}\"\n", ")" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 2 }