sglang.0.4.8.post1/sglang/python/sglang/profiler.py

168 lines
4.3 KiB
Python

"""
Run live profiling.
Usage:
python3 -m sglang.profiler
"""
import argparse
import json
import os
import time
import urllib.parse
from argparse import ArgumentParser
from pathlib import Path
from typing import List, Optional
import requests
PARENT_FOLDER = "/tmp/sglang-profile"
def _run_profile(
url: Optional[str],
num_steps: int,
activities: List[str],
output_dir: Optional[str] = None,
profile_name: Optional[str] = None,
profile_by_stage: bool = False,
) -> str:
if output_dir is None:
output_dir = PARENT_FOLDER
output_dir = os.path.normpath(output_dir)
output_dir = os.path.abspath(output_dir)
output_dir = Path(output_dir)
# Add "profile_name/timestamp" to the path.
if profile_name:
output_dir = output_dir / profile_name
output_dir = output_dir / str(time.time())
output_dir.mkdir(exist_ok=True, parents=True)
print(f"Dump profiling traces to {output_dir}")
print(
f"Waiting for {num_steps} steps and the trace to be flushed.... ({profile_by_stage=})"
)
# Dump server args.
file_path = Path(output_dir) / "server_args.json"
if not file_path.exists():
response = requests.get(url + "/get_server_info")
response.raise_for_status()
server_args_data = response.json()
with open(file_path, "w") as file:
file.write(json.dumps(server_args_data))
# Start profiler. The API replies when all steps are processed
# and files are generated.
json_data = {
"output_dir": str(output_dir),
"num_steps": str(num_steps),
"activities": activities,
"profile_by_stage": profile_by_stage,
}
response = requests.post(url=url + "/start_profile", json=json_data)
response.raise_for_status()
trace_link = str(output_dir)
return trace_link
def run_profile(
url: Optional[str],
num_steps: int,
activities: List[str],
output_dir: Optional[str] = None,
profile_name: Optional[str] = None,
profile_by_stage: bool = False,
):
# step based profile will self terminate on num_steps constraints
link = _run_profile(
url, num_steps, activities, output_dir, profile_name, profile_by_stage
)
return link
if __name__ == "__main__":
parser = ArgumentParser(description="Benchmark the online serving throughput.")
parser.add_argument(
"--url",
type=str,
default="http://localhost:30000",
help="Server or API base url if not using http host and port.",
)
parser.add_argument(
"--output-dir",
type=str,
default=None,
help="Profile directory to dump profile traces.",
)
parser.add_argument(
"--profile-name",
type=str,
default=None,
help="The name of this profile run.",
)
parser.add_argument(
"--num-steps",
type=int,
default=5,
help="The number of forward steps to profile.",
)
parser.add_argument(
"--profile-by-stage",
action=argparse.BooleanOptionalAction,
type=bool,
default=False,
help="The number of forward steps to profile.",
)
parser.add_argument(
"--cpu",
action=argparse.BooleanOptionalAction,
type=bool,
default=True,
help="Whether to profile CPU activity",
)
parser.add_argument(
"--gpu",
action=argparse.BooleanOptionalAction,
type=bool,
default=True,
help="Whether to profile GPU activity",
)
parser.add_argument(
"--mem",
action=argparse.BooleanOptionalAction,
type=bool,
default=False,
help="Whether to memory usage (https://pytorch.org/memory_viz)",
)
parser.add_argument(
"--rpd",
action=argparse.BooleanOptionalAction,
type=bool,
default=False,
help="Whether to use rpd profiler (https://github.com/ROCm/rocmProfileData)",
)
args = parser.parse_args()
activities = []
if args.cpu:
activities.append("CPU")
if args.gpu:
activities.append("GPU")
if args.mem:
activities.append("MEM")
if args.rpd:
activities.append("RPD")
run_profile(
args.url,
args.num_steps,
activities,
args.output_dir,
args.profile_name,
args.profile_by_stage,
)