""" Run live profiling. Usage: python3 -m sglang.profiler """ import argparse import json import os import time import urllib.parse from argparse import ArgumentParser from pathlib import Path from typing import List, Optional import requests PARENT_FOLDER = "/tmp/sglang-profile" def _run_profile( url: Optional[str], num_steps: int, activities: List[str], output_dir: Optional[str] = None, profile_name: Optional[str] = None, profile_by_stage: bool = False, ) -> str: if output_dir is None: output_dir = PARENT_FOLDER output_dir = os.path.normpath(output_dir) output_dir = os.path.abspath(output_dir) output_dir = Path(output_dir) # Add "profile_name/timestamp" to the path. if profile_name: output_dir = output_dir / profile_name output_dir = output_dir / str(time.time()) output_dir.mkdir(exist_ok=True, parents=True) print(f"Dump profiling traces to {output_dir}") print( f"Waiting for {num_steps} steps and the trace to be flushed.... ({profile_by_stage=})" ) # Dump server args. file_path = Path(output_dir) / "server_args.json" if not file_path.exists(): response = requests.get(url + "/get_server_info") response.raise_for_status() server_args_data = response.json() with open(file_path, "w") as file: file.write(json.dumps(server_args_data)) # Start profiler. The API replies when all steps are processed # and files are generated. json_data = { "output_dir": str(output_dir), "num_steps": str(num_steps), "activities": activities, "profile_by_stage": profile_by_stage, } response = requests.post(url=url + "/start_profile", json=json_data) response.raise_for_status() trace_link = str(output_dir) return trace_link def run_profile( url: Optional[str], num_steps: int, activities: List[str], output_dir: Optional[str] = None, profile_name: Optional[str] = None, profile_by_stage: bool = False, ): # step based profile will self terminate on num_steps constraints link = _run_profile( url, num_steps, activities, output_dir, profile_name, profile_by_stage ) return link if __name__ == "__main__": parser = ArgumentParser(description="Benchmark the online serving throughput.") parser.add_argument( "--url", type=str, default="http://localhost:30000", help="Server or API base url if not using http host and port.", ) parser.add_argument( "--output-dir", type=str, default=None, help="Profile directory to dump profile traces.", ) parser.add_argument( "--profile-name", type=str, default=None, help="The name of this profile run.", ) parser.add_argument( "--num-steps", type=int, default=5, help="The number of forward steps to profile.", ) parser.add_argument( "--profile-by-stage", action=argparse.BooleanOptionalAction, type=bool, default=False, help="The number of forward steps to profile.", ) parser.add_argument( "--cpu", action=argparse.BooleanOptionalAction, type=bool, default=True, help="Whether to profile CPU activity", ) parser.add_argument( "--gpu", action=argparse.BooleanOptionalAction, type=bool, default=True, help="Whether to profile GPU activity", ) parser.add_argument( "--mem", action=argparse.BooleanOptionalAction, type=bool, default=False, help="Whether to memory usage (https://pytorch.org/memory_viz)", ) parser.add_argument( "--rpd", action=argparse.BooleanOptionalAction, type=bool, default=False, help="Whether to use rpd profiler (https://github.com/ROCm/rocmProfileData)", ) args = parser.parse_args() activities = [] if args.cpu: activities.append("CPU") if args.gpu: activities.append("GPU") if args.mem: activities.append("MEM") if args.rpd: activities.append("RPD") run_profile( args.url, args.num_steps, activities, args.output_dir, args.profile_name, args.profile_by_stage, )