sglang0.4.5.post1/test/srt/kv_cache_scales_llama3_1_8b...

43 lines
949 B
JSON

{
"model_type": "llama",
"kv_cache": {
"dtype": "float8_e4m3fn",
"scaling_factor": {
"0": {
"0": 1,
"1": 1,
"2": 1,
"3": 1,
"4": 1,
"5": 1,
"6": 1,
"7": 1,
"8": 1,
"9": 1,
"10": 1,
"11": 1,
"12": 1,
"13": 1,
"14": 1,
"15": 1,
"16": 1,
"17": 1,
"18": 1,
"19": 1,
"20": 1,
"21": 1,
"22": 1,
"23": 1,
"24": 1,
"25": 1,
"26": 1,
"27": 1,
"28": 1,
"29": 1,
"30": 1,
"31": 1
}
}
}
}