# Copyright 2023-2024 SGLang Team # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Pydantic models for OpenAI API protocol""" import time from typing import Dict, List, Optional, Union from pydantic import BaseModel, Field, root_validator from typing_extensions import Literal class ModelCard(BaseModel): """Model cards.""" id: str object: str = "model" created: int = Field(default_factory=lambda: int(time.time())) owned_by: str = "sglang" root: Optional[str] = None max_model_len: Optional[int] = None class ModelList(BaseModel): """Model list consists of model cards.""" object: str = "list" data: List[ModelCard] = Field(default_factory=list) class ErrorResponse(BaseModel): object: str = "error" message: str type: str param: Optional[str] = None code: int class LogProbs(BaseModel): text_offset: List[int] = Field(default_factory=list) token_logprobs: List[Optional[float]] = Field(default_factory=list) tokens: List[str] = Field(default_factory=list) top_logprobs: List[Optional[Dict[str, float]]] = Field(default_factory=list) class TopLogprob(BaseModel): token: str bytes: List[int] logprob: float class ChatCompletionTokenLogprob(BaseModel): token: str bytes: List[int] logprob: float top_logprobs: List[TopLogprob] class ChoiceLogprobs(BaseModel): # build for v1/chat/completions response content: List[ChatCompletionTokenLogprob] class UsageInfo(BaseModel): prompt_tokens: int = 0 total_tokens: int = 0 completion_tokens: Optional[int] = 0 # only used to return cached tokens when --enable-cache-report is set prompt_tokens_details: Optional[Dict[str, int]] = None class StreamOptions(BaseModel): include_usage: Optional[bool] = False class JsonSchemaResponseFormat(BaseModel): name: str description: Optional[str] = None # use alias to workaround pydantic conflict schema_: Optional[Dict[str, object]] = Field(alias="schema", default=None) strict: Optional[bool] = False class FileRequest(BaseModel): # https://platform.openai.com/docs/api-reference/files/create file: bytes # The File object (not file name) to be uploaded purpose: str = ( "batch" # The intended purpose of the uploaded file, default is "batch" ) class FileResponse(BaseModel): id: str object: str = "file" bytes: int created_at: int filename: str purpose: str class FileDeleteResponse(BaseModel): id: str object: str = "file" deleted: bool class BatchRequest(BaseModel): input_file_id: ( str # The ID of an uploaded file that contains requests for the new batch ) endpoint: str # The endpoint to be used for all requests in the batch completion_window: str # The time frame within which the batch should be processed metadata: Optional[dict] = None # Optional custom metadata for the batch class BatchResponse(BaseModel): id: str object: str = "batch" endpoint: str errors: Optional[dict] = None input_file_id: str completion_window: str status: str = "validating" output_file_id: Optional[str] = None error_file_id: Optional[str] = None created_at: int in_progress_at: Optional[int] = None expires_at: Optional[int] = None finalizing_at: Optional[int] = None completed_at: Optional[int] = None failed_at: Optional[int] = None expired_at: Optional[int] = None cancelling_at: Optional[int] = None cancelled_at: Optional[int] = None request_counts: Optional[dict] = None metadata: Optional[dict] = None class CompletionRequest(BaseModel): # Ordered by official OpenAI API documentation # https://platform.openai.com/docs/api-reference/completions/create model: str prompt: Union[List[int], List[List[int]], str, List[str]] best_of: Optional[int] = None echo: bool = False frequency_penalty: float = 0.0 logit_bias: Optional[Dict[str, float]] = None logprobs: Optional[int] = None max_tokens: int = 16 n: int = 1 presence_penalty: float = 0.0 seed: Optional[int] = None stop: Optional[Union[str, List[str]]] = None stream: bool = False stream_options: Optional[StreamOptions] = None suffix: Optional[str] = None temperature: float = 1.0 top_p: float = 1.0 user: Optional[str] = None # Extra parameters for SRT backend only and will be ignored by OpenAI models. top_k: int = -1 min_p: float = 0.0 min_tokens: int = 0 json_schema: Optional[str] = None regex: Optional[str] = None ebnf: Optional[str] = None repetition_penalty: float = 1.0 stop_token_ids: Optional[List[int]] = None no_stop_trim: bool = False ignore_eos: bool = False skip_special_tokens: bool = True lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None session_params: Optional[Dict] = None class CompletionResponseChoice(BaseModel): index: int text: str logprobs: Optional[LogProbs] = None finish_reason: Literal["stop", "length", "content_filter"] matched_stop: Union[None, int, str] = None class CompletionResponse(BaseModel): id: str object: str = "text_completion" created: int = Field(default_factory=lambda: int(time.time())) model: str choices: List[CompletionResponseChoice] usage: UsageInfo class CompletionResponseStreamChoice(BaseModel): index: int text: str logprobs: Optional[LogProbs] = None finish_reason: Optional[Literal["stop", "length", "content_filter"]] = None matched_stop: Union[None, int, str] = None class CompletionStreamResponse(BaseModel): id: str object: str = "text_completion" created: int = Field(default_factory=lambda: int(time.time())) model: str choices: List[CompletionResponseStreamChoice] usage: Optional[UsageInfo] = None class ChatCompletionMessageContentTextPart(BaseModel): type: Literal["text"] text: str class ChatCompletionMessageContentImageURL(BaseModel): url: str detail: Optional[Literal["auto", "low", "high"]] = "auto" class ChatCompletionMessageContentAudioURL(BaseModel): url: str class ChatCompletionMessageContentImagePart(BaseModel): type: Literal["image_url"] image_url: ChatCompletionMessageContentImageURL modalities: Optional[Literal["image", "multi-images", "video"]] = "image" class ChatCompletionMessageContentAudioPart(BaseModel): type: Literal["audio_url"] audio_url: ChatCompletionMessageContentAudioURL ChatCompletionMessageContentPart = Union[ ChatCompletionMessageContentTextPart, ChatCompletionMessageContentImagePart, ChatCompletionMessageContentAudioPart, ] class ChatCompletionMessageGenericParam(BaseModel): role: Literal["system", "assistant", "tool"] content: Union[str, List[ChatCompletionMessageContentTextPart]] class ChatCompletionMessageUserParam(BaseModel): role: Literal["user"] content: Union[str, List[ChatCompletionMessageContentPart]] ChatCompletionMessageParam = Union[ ChatCompletionMessageGenericParam, ChatCompletionMessageUserParam ] class ResponseFormat(BaseModel): type: Literal["text", "json_object", "json_schema"] json_schema: Optional[JsonSchemaResponseFormat] = None class StructuresResponseFormat(BaseModel): begin: str schema_: Optional[Dict[str, object]] = Field(alias="schema", default=None) end: str class StructuralTagResponseFormat(BaseModel): type: Literal["structural_tag"] structures: List[StructuresResponseFormat] triggers: List[str] class Function(BaseModel): """Function descriptions.""" description: Optional[str] = Field(default=None, examples=[None]) name: Optional[str] = None parameters: Optional[object] = None strict: bool = False class Tool(BaseModel): """Function wrapper.""" type: str = Field(default="function", examples=["function"]) function: Function class ToolChoiceFuncName(BaseModel): """The name of tool choice function.""" name: Optional[str] = None class ToolChoice(BaseModel): """The tool choice definition.""" function: ToolChoiceFuncName type: Literal["function"] = Field(default="function", examples=["function"]) class ChatCompletionRequest(BaseModel): # Ordered by official OpenAI API documentation # https://platform.openai.com/docs/api-reference/chat/create messages: List[ChatCompletionMessageParam] model: str frequency_penalty: float = 0.0 logit_bias: Optional[Dict[str, float]] = None logprobs: bool = False top_logprobs: Optional[int] = None max_tokens: Optional[int] = None n: int = 1 presence_penalty: float = 0.0 response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None seed: Optional[int] = None stop: Optional[Union[str, List[str]]] = None stream: bool = False stream_options: Optional[StreamOptions] = None temperature: float = 0.7 top_p: float = 1.0 user: Optional[str] = None tools: Optional[List[Tool]] = Field(default=None, examples=[None]) tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = Field( default="auto", examples=["none"] ) # noqa @root_validator(pre=True) def set_tool_choice_default(cls, values): if values.get("tool_choice") is None: if values.get("tools") is None: values["tool_choice"] = "none" else: values["tool_choice"] = "auto" return values # Extra parameters for SRT backend only and will be ignored by OpenAI models. top_k: int = -1 min_p: float = 0.0 min_tokens: int = 0 regex: Optional[str] = None ebnf: Optional[str] = None repetition_penalty: float = 1.0 stop_token_ids: Optional[List[int]] = None no_stop_trim: bool = False ignore_eos: bool = False skip_special_tokens: bool = True lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None session_params: Optional[Dict] = None separate_reasoning: bool = True stream_reasoning: bool = True class FunctionResponse(BaseModel): """Function response.""" name: Optional[str] = None arguments: Optional[str] = None class ToolCall(BaseModel): """Tool call response.""" id: str type: Literal["function"] = "function" function: FunctionResponse class ChatMessage(BaseModel): role: Optional[str] = None content: Optional[str] = None reasoning_content: Optional[str] = None tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None]) class ChatCompletionResponseChoice(BaseModel): index: int message: ChatMessage logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None finish_reason: Literal[ "stop", "length", "tool_calls", "content_filter", "function_call" ] matched_stop: Union[None, int, str] = None class ChatCompletionResponse(BaseModel): id: str object: str = "chat.completion" created: int = Field(default_factory=lambda: int(time.time())) model: str choices: List[ChatCompletionResponseChoice] usage: UsageInfo class DeltaMessage(BaseModel): role: Optional[str] = None content: Optional[str] = None reasoning_content: Optional[str] = None tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None]) class ChatCompletionResponseStreamChoice(BaseModel): index: int delta: DeltaMessage logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None finish_reason: Optional[ Literal["stop", "length", "tool_calls", "content_filter", "function_call"] ] = None matched_stop: Union[None, int, str] = None class ChatCompletionStreamResponse(BaseModel): id: str object: str = "chat.completion.chunk" created: int = Field(default_factory=lambda: int(time.time())) model: str choices: List[ChatCompletionResponseStreamChoice] usage: Optional[UsageInfo] = None class MultimodalEmbeddingInput(BaseModel): text: Optional[str] = None image: Optional[str] = None class EmbeddingRequest(BaseModel): # Ordered by official OpenAI API documentation # https://platform.openai.com/docs/api-reference/embeddings/create input: Union[ List[int], List[List[int]], str, List[str], List[MultimodalEmbeddingInput] ] model: str encoding_format: str = "float" dimensions: int = None user: Optional[str] = None class EmbeddingObject(BaseModel): embedding: List[float] index: int object: str = "embedding" class EmbeddingResponse(BaseModel): data: List[EmbeddingObject] model: str object: str = "list" usage: Optional[UsageInfo] = None