[build-system] requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" [project] name = "sglang" version = "0.5.2" description = "SGLang is a fast serving framework for large language models and vision language models." readme = "README.md" requires-python = ">=3.10" license = { file = "LICENSE" } classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", ] dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle"] [project.optional-dependencies] runtime_common = [ "blobfile==3.0.0", "build", "compressed-tensors", "datasets", "einops", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "llguidance>=0.7.11,<0.8.0", "modelscope", "msgspec", "ninja", "openai==1.99.1", "openai-harmony==0.0.4", "orjson", "outlines==0.1.11", "packaging", "partial_json_parser", "pillow", "prometheus-client>=0.20.0", "psutil", "pybase64", "pydantic", "pynvml", "python-multipart", "pyzmq>=25.1.2", "sentencepiece", "soundfile==0.13.1", "scipy", "timm==1.0.16", "tiktoken", "torchao==0.9.0", "transformers==4.56.1", "uvicorn", "uvloop", "xgrammar==0.1.24", ] srt = [ "sglang[runtime_common]", "sgl-kernel==0.3.9.post2", "torch==2.8.0", "torchaudio==2.8.0", "torchvision", "cuda-python", "flashinfer_python==0.3.1", ] blackwell = [ "sglang[runtime_common]", "sgl-kernel==0.3.9.post2", "torch==2.8.0", "torchaudio==2.8.0", "torchvision", "cuda-python", "flashinfer_python==0.3.1", "nvidia-cutlass-dsl==4.1.0", ] # HIP (Heterogeneous-computing Interface for Portability) for AMD # => base docker rocm/vllm-dev:20250114, not from public vllm whl srt_hip = [ "sglang[runtime_common]", "torch", "petit_kernel==0.0.2", "wave-lang==3.7.0", ] # https://docs.sglang.ai/platforms/cpu_server.html srt_cpu = ["sglang[runtime_common]", "intel-openmp"] # https://docs.sglang.ai/platforms/ascend_npu.html srt_npu = ["sglang[runtime_common]"] # xpu is not enabled in public vllm and torch whl, # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm srt_xpu = ["sglang[runtime_common]"] # For Intel Gaudi(device : hpu) follow the installation guide # https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html srt_hpu = ["sglang[runtime_common]"] openai = ["openai==1.99.1", "tiktoken"] anthropic = ["anthropic>=0.20.0"] litellm = ["litellm>=1.0.0"] torch_memory_saver = ["torch_memory_saver==0.0.8"] decord = ["decord"] test = [ "accelerate", "expecttest", "jsonlines", "matplotlib", "pandas", "peft", "sentence_transformers", "pytest", "tabulate", ] all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"] all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"] dev = ["sglang[all]", "sglang[test]"] dev_hip = ["sglang[all_hip]", "sglang[test]"] dev_xpu = ["sglang[all_xpu]", "sglang[test]"] dev_hpu = ["sglang[all_hpu]", "sglang[test]"] dev_cpu = ["sglang[all_cpu]", "sglang[test]"] [project.urls] "Homepage" = "https://github.com/sgl-project/sglang" "Bug Tracker" = "https://github.com/sgl-project/sglang/issues" [tool.setuptools.package-data] "sglang" = [ "srt/layers/moe/fused_moe_triton/configs/*/*.json", "srt/layers/quantization/configs/*.json", "srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp", ] [tool.setuptools.packages.find] exclude = [ "assets*", "benchmark*", "docs*", "dist*", "playground*", "scripts*", "tests*", ] [tool.wheel] exclude = [ "assets*", "benchmark*", "docs*", "dist*", "playground*", "scripts*", "tests*", ] [tool.codespell] ignore-words-list = "ans, als, hel, boostrap, childs, te, vas, hsa, ment" skip = "*.json,*.jsonl,*.patch,*.txt"