53 lines
1.2 KiB
Python
53 lines
1.2 KiB
Python
"""
|
|
used for debug using tensor comparison
|
|
dump {name: tensor} into "log_hf.jsonl" and "log_srt.jsonl"
|
|
use the same name for two tensors that supposed to be close
|
|
recommend name like: "layer 2 after mlp"
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
|
|
import torch
|
|
|
|
if len(sys.argv) > 1:
|
|
assert sys.argv[1] == "base"
|
|
hf_log = "base_log_hf.jsonl"
|
|
srt_log = "base_log_srt.jsonl"
|
|
else:
|
|
hf_log = "log_hf.jsonl"
|
|
srt_log = "log_srt.jsonl"
|
|
|
|
|
|
def load_data(filepath):
|
|
tensors = {}
|
|
with open(filepath, "r") as f:
|
|
lines = f.readlines()
|
|
for line in lines:
|
|
data = json.loads(line)
|
|
for k, v in data.items():
|
|
tensors[k] = torch.tensor(v)
|
|
return tensors
|
|
|
|
|
|
hf_tensors = load_data(hf_log)
|
|
srt_tensors = load_data(srt_log)
|
|
|
|
|
|
def get_diff(t1, t2):
|
|
t1 = t1.reshape(t2.shape)
|
|
max_diff = torch.max(abs(t1.reshape(t2.shape) - t2))
|
|
l2_dis = torch.dist(t1, t2, p=2)
|
|
return l2_dis, max_diff
|
|
|
|
|
|
for k, _ in srt_tensors.items():
|
|
l2_dis, max_diff = get_diff(hf_tensors[k], srt_tensors[k])
|
|
print(f"{k} {l2_dis=} {max_diff=}")
|
|
if k == "layer 1 attn":
|
|
print(hf_tensors[k])
|
|
print(srt_tensors[k])
|
|
if k == "layer 0 prefill k":
|
|
print(srt_tensors[k].shape)
|
|
print(hf_tensors[k].shape)
|