From d725864cd6c9a3e4f7e469d73ccdfd1742fa3754 Mon Sep 17 00:00:00 2001 From: hailin Date: Sat, 7 Feb 2026 07:13:54 -0800 Subject: [PATCH] fix(docling): pre-download models during Docker build DocumentConverter() constructor only sets up config, models are lazily downloaded on first convert(). Fix by running an actual PDF conversion during build to trigger HuggingFace model download and cache. Co-Authored-By: Claude Opus 4.6 --- packages/services/docling-service/Dockerfile | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/packages/services/docling-service/Dockerfile b/packages/services/docling-service/Dockerfile index 8c56aa2..70d4e9d 100644 --- a/packages/services/docling-service/Dockerfile +++ b/packages/services/docling-service/Dockerfile @@ -12,8 +12,17 @@ RUN pip install --no-cache-dir -r requirements.txt COPY app/ ./app/ -# 构建时预下载模型,避免首次请求延迟 -RUN python -c "from docling.document_converter import DocumentConverter; DocumentConverter()" +# 构建时预下载模型(需要实际转换才会触发 HuggingFace 模型下载) +RUN python -c "\ +import tempfile, os; \ +pdf = b'%PDF-1.4\n1 0 obj<>endobj\n2 0 obj<>endobj\n3 0 obj<>>>>>endobj\n4 0 obj<>stream\nBT /F1 12 Tf 100 700 Td (hello) Tj ET\nendstream\nendobj\n5 0 obj<>endobj\nxref\n0 6\n0000000000 65535 f \n0000000009 00000 n \n0000000058 00000 n \n0000000115 00000 n \n0000000266 00000 n \n0000000360 00000 n \ntrailer<>\nstartxref\n454\n%%EOF'; \ +f = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False); f.write(pdf); f.close(); \ +from docling.document_converter import DocumentConverter; \ +conv = DocumentConverter(); \ +try: conv.convert(f.name) \ +except: pass \ +finally: os.unlink(f.name); \ +print('Models pre-downloaded successfully')" EXPOSE 3007