| .. |
|
ascend
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
configs
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
cpu
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
entrypoints/http_server
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
ep
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
hicache
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
lora
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
models
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
openai_server
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
quant
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
rl
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
double-sparsity-config-Llama-3.1-8B-Instruct.json
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
experiment_runner.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
kv_cache_scales_llama3_1_8b.json
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
kv_cache_scales_llama3_8b.json
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
kv_cache_scales_qwen2_1_5b.json
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
parse_results.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
run_suite.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_abort.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_bench_one_batch.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_bench_serving.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_bnb.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_chunked_prefill.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_cpp_radix_cache.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_cpu_graph.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_create_kvindices.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_custom_allreduce.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_data_parallelism.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_disaggregation.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_disaggregation_different_tp.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_disaggregation_pp.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_double_sparsity.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_dp_attention.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_eagle_infer_a.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_eagle_infer_b.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_ebnf_constrained.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_eval_accuracy_large.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_eval_fp8_accuracy.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_expert_distribution.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_expert_location_updater.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_fa3.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_fim_completion.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_flashmla.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_forward_split_prefill.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_full_deepseek_v3.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_function_call_parser.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_fused_moe.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_get_weights_by_name.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_gguf.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_gpt_oss_1gpu.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_gpt_oss_4gpu.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_gpt_oss_common.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_gptqmodel_dynamic.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_harmony_parser.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_health_check.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_hidden_states.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_hybrid_attn_backend.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_input_embeddings.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_intel_amx_attention_backend.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_io_struct.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_jinja_template_utils.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_kv_events.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_local_attn.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_metrics.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_metrics_utils.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_mla.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_mla_deepseek_v3.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_mla_flashinfer.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_mla_fp8.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_mla_int8_deepseek_v3.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_mla_tp.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_modelopt.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_modelopt_fp8kvcache.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_models_from_modelscope.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_moe_eval_accuracy_large.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_mscclpp.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_multi_instance_release_memory_occupation.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_multi_tokenizer.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_nightly_gsm8k_eval.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_nightly_gsm8k_eval_amd.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_no_chunked_prefill.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_no_overlap_scheduler.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_original_logprobs.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_page_size.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_patch_torch.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_penalty.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_pp_single_node.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_pytorch_sampling_backend.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_quick_allreduce.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_radix_attention.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_reasoning_parser.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_regex_constrained.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_release_memory_occupation.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_request_queue_validation.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_retract_decode.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_rope_rocm.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_sagemaker_server.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_schedule_policy.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_score_api.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_server_args.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_session_control.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_skip_tokenizer_init.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_srt_endpoint.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_srt_engine.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_srt_engine_with_quant_args.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_standalone_speculative_decoding.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_start_profile.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_swa_unittest.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_tokenizer_batch_encode.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_torch_compile.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_torch_compile_moe.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_torch_native_attention_backend.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_torch_tp.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_torchao.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_triton_attention_backend.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_triton_attention_kernels.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_triton_attention_rocm_mla.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_triton_fused_moe.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_triton_moe_channel_fp8_kernel.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_triton_moe_wna16.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_triton_sliding_window.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_two_batch_overlap.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_utils_update_weights.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_vertex_endpoint.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_vision_chunked_prefill.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_vision_openai_server_a.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_vision_openai_server_b.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_vision_openai_server_common.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_vllm_dependency.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_vlm_accuracy.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_vlm_input_format.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_wave_attention_backend.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_wave_attention_kernels.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |
|
test_weight_version.py
|
first commit
|
2025-09-15 10:32:17 +08:00 |