60 lines
1.6 KiB
Python
60 lines
1.6 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
"""
|
|
Saves each worker's model state dict directly to a checkpoint, which enables a
|
|
fast load path for large tensor-parallel models where each worker only needs to
|
|
read its own shard rather than the entire checkpoint.
|
|
|
|
Example usage:
|
|
|
|
python save_remote_state.py \
|
|
--model-path /path/to/load \
|
|
--tensor-parallel-size 8 \
|
|
--remote-model-save-url [protocol]://[host]:[port]/[model_name] \
|
|
|
|
Then, the model can be loaded with
|
|
|
|
llm = Engine(
|
|
model_path="[protocol]://[host]:[port]/[model_name]",
|
|
tensor_parallel_size=8,
|
|
)
|
|
"""
|
|
import dataclasses
|
|
from argparse import ArgumentParser
|
|
from pathlib import Path
|
|
|
|
from sglang import Engine, ServerArgs
|
|
|
|
parser = ArgumentParser()
|
|
ServerArgs.add_cli_args(parser)
|
|
|
|
parser.add_argument(
|
|
"--remote-model-save-url",
|
|
required=True,
|
|
type=str,
|
|
help="remote address to store model weights",
|
|
)
|
|
parser.add_argument(
|
|
"--remote-draft-model-save-url",
|
|
default=None,
|
|
type=str,
|
|
help="remote address to store draft model weights",
|
|
)
|
|
|
|
|
|
def main(args):
|
|
engine_args = ServerArgs.from_cli_args(args)
|
|
model_path = engine_args.model_path
|
|
if not Path(model_path).is_dir():
|
|
raise ValueError("model path must be a local directory")
|
|
# Create LLM instance from arguments
|
|
llm = Engine(**dataclasses.asdict(engine_args))
|
|
llm.save_remote_model(
|
|
url=args.remote_model_save_url, draft_url=args.remote_draft_model_save_url
|
|
)
|
|
print("save remote (draft) model successfully")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parser.parse_args()
|
|
main(args)
|