42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
"""构建时预下载 Docling 模型到镜像缓存"""
|
|
import tempfile
|
|
import os
|
|
|
|
# 创建最小 PDF 文件
|
|
pdf = (
|
|
b"%PDF-1.4\n"
|
|
b"1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n"
|
|
b"2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj\n"
|
|
b"3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Contents 4 0 R"
|
|
b"/Resources<</Font<</F1 5 0 R>>>>>>endobj\n"
|
|
b"4 0 obj<</Length 44>>stream\n"
|
|
b"BT /F1 12 Tf 100 700 Td (hello) Tj ET\n"
|
|
b"endstream\nendobj\n"
|
|
b"5 0 obj<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>endobj\n"
|
|
b"xref\n0 6\n"
|
|
b"0000000000 65535 f \n"
|
|
b"0000000009 00000 n \n"
|
|
b"0000000058 00000 n \n"
|
|
b"0000000115 00000 n \n"
|
|
b"0000000266 00000 n \n"
|
|
b"0000000360 00000 n \n"
|
|
b"trailer<</Size 6/Root 1 0 R>>\n"
|
|
b"startxref\n454\n%%EOF"
|
|
)
|
|
|
|
tmp = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
|
|
tmp.write(pdf)
|
|
tmp.close()
|
|
|
|
try:
|
|
from docling.document_converter import DocumentConverter
|
|
|
|
conv = DocumentConverter()
|
|
conv.convert(tmp.name)
|
|
print("Models pre-downloaded successfully")
|
|
except Exception as e:
|
|
print(f"Conversion failed (expected for minimal PDF): {e}")
|
|
print("Models should still be cached")
|
|
finally:
|
|
os.unlink(tmp.name)
|