51 lines
1.8 KiB
Python
51 lines
1.8 KiB
Python
#!/usr/bin/env python3
|
||
# extract_chat_template.py —— 最终版
|
||
#
|
||
# 用法示例:
|
||
# # 只导出原模板
|
||
# python extract_chat_template.py Qwen3-32B/tokenizer_config.json \
|
||
# -o Qwen3-32B/chat_template.jinja
|
||
#
|
||
# # 同时导出“无 <think>”版本
|
||
# python extract_chat_template.py Qwen3-32B/tokenizer_config.json \
|
||
# -o Qwen3-32B/chat_template.jinja \
|
||
# --no-think \
|
||
# -n Qwen3-32B/chat_template_nothink.jinja
|
||
#
|
||
# # -o/-n 若省略,就写到当前目录:chat_template.jinja / chat_template_nothink.jinja
|
||
|
||
import argparse, json, re, sys
|
||
from pathlib import Path
|
||
|
||
def main():
|
||
p = argparse.ArgumentParser()
|
||
p.add_argument("config", type=Path, help="tokenizer_config.json 路径")
|
||
p.add_argument("-o", "--output", type=Path,
|
||
default=Path("chat_template.jinja"),
|
||
help="原始模板输出文件(默认 chat_template.jinja)")
|
||
p.add_argument("--no-think", action="store_true",
|
||
help="额外生成去掉 <think> 块的模板")
|
||
p.add_argument("-n", "--no-think-out", type=Path,
|
||
help="无 <think> 模板输出文件(默认 chat_template_nothink.jinja)")
|
||
args = p.parse_args()
|
||
|
||
try:
|
||
tpl = json.loads(args.config.read_text(encoding="utf-8"))["chat_template"]
|
||
except Exception as e:
|
||
sys.exit(f"❌ 读取失败:{e}")
|
||
|
||
# 写原始模板
|
||
args.output.write_text(tpl, encoding="utf-8")
|
||
print(f"✅ 原模板 → {args.output.resolve()}")
|
||
|
||
# 写无思考模板(可选)
|
||
if args.no_think:
|
||
nt_path = args.no_think_out or Path("chat_template_nothink.jinja")
|
||
nt_path.write_text(re.sub(r"<think>.*?</think>", "", tpl, flags=re.S),
|
||
encoding="utf-8")
|
||
print(f"✅ 无 <think> 模板 → {nt_path.resolve()}")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|