opencompass/examples
hailin 8339154e58 Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
..
eval_OlympiadBench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_PMMEval.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_TheoremQA.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_academic_leaderboard_202407.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_academic_leaderboard_202412.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_academic_leaderboard_202502.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_alaya.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_api_demo.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_attack.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_babilong.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_base_demo.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_bluelm_32k_lveval.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_charm_mem.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_charm_rea.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_chat_agent.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_chat_agent_baseline.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_chat_demo.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_chat_last.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_chembench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_chinese_simpleqa.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_cibench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_cibench_api.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_circular.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_claude.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_code_passk.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_code_passk_repeat_dataset.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_codeagent.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_codegeex2.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_compassarena_subjectivebench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_compassarena_subjectivebench_bradleyterry.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_contamination.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_corebench_2409_base_objective.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_corebench_2409_chat_objective.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_corebench_2409_longcontext.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_corebench_2409_subjective.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_deepseek_r1.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_dingo.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_ds1000_interpreter.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_edgellm_demo.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_gpt3.5.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_gpt4.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_hellobench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_hf_llama2.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_hf_llama_7b.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_inference_ppl.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internLM.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm2_chat_keyset.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm2_keyset.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm3_math500_thinking.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm_7b.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm_chat_lmdeploy_apiserver.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm_chat_turbomind.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm_flames_chat.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm_lmdeploy_apiserver.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm_math_chat.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_internlm_turbomind.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_judgerbench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_korbench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_lightllm.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_livestembench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_llama2_7b.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_llama2_7b_lveval.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_llama3_instruct.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_llm_compression.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_llm_judge.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_lmdeploy_demo.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_longbenchv2.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_math_llm_judge.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_math_llm_judge_internal.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_math_verify.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_mathbench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_mmlu_cf.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_mmlu_pro.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_mmlu_with_zero_retriever_overwritten.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_modelscope_datasets.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_multi_prompt_demo.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_musr.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_needlebench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_qwen_7b.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_qwen_7b_chat.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_qwen_7b_chat_lawbench.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_ruler.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_ruler_fix_tokenizer.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_rwkv5_3b.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_simpleqa.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_subjective.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_subjective_alpacaeval_official.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_subjective_bradleyterry.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_teval.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00
eval_with_model_dataset_combinations.py Start at 0.4.2 of OpenCompass 2025-05-23 12:25:11 +00:00