# Copyright 2023-2024 SGLang Team # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import dataclasses from typing import List import torch @dataclasses.dataclass class LoRAAdaptor: name: str prefill_tolerance: float = None decode_tolerance: float = None rouge_l_tolerance: float = None @dataclasses.dataclass class LoRAModelCase: base: str adaptors: List[LoRAAdaptor] tp_size: int = 1 prefill_tolerance: float = 1e-1 decode_tolerance: float = 1e-1 rouge_l_tolerance: float = 1.0 max_loras_per_batch: int = 1 skip_long_prompt: bool = False def __post_init__(self): if len(self.adaptors) > self.max_loras_per_batch: raise ValueError( f"For base '{self.base}', number of adaptors ({len(self.adaptors)}) " f"must be <= max_loras_per_batch ({self.max_loras_per_batch})" ) TORCH_DTYPES = [torch.float16] BACKENDS = ["triton"]