sglang.0.4.8.post1/sglang/test/srt/kv_cache_scales_llama3_8b.json

43 lines
1.1 KiB
JSON

{
"model_type": "llama",
"kv_cache": {
"dtype": "float8_e4m3fn",
"scaling_factor": {
"0": {
"0": 0.0408,
"1": 0.0503,
"2": 0.0667,
"3": 0.0909,
"4": 0.1135,
"5": 0.127,
"6": 0.1768,
"7": 0.1488,
"8": 0.1135,
"9": 0.1203,
"10": 0.1013,
"11": 0.0842,
"12": 0.1231,
"13": 0.1096,
"14": 0.1221,
"15": 0.1013,
"16": 0.1067,
"17": 0.0952,
"18": 0.0899,
"19": 0.097,
"20": 0.087,
"21": 0.0994,
"22": 0.0904,
"23": 0.1013,
"24": 0.1019,
"25": 0.1053,
"26": 0.1,
"27": 0.0894,
"28": 0.1013,
"29": 0.1488,
"30": 0.0766,
"31": 0.0821
}
}
}
}