fix(docling): pre-download models during Docker build
DocumentConverter() constructor only sets up config, models are lazily downloaded on first convert(). Fix by running an actual PDF conversion during build to trigger HuggingFace model download and cache. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0985214ab7
commit
d725864cd6
|
|
@ -12,8 +12,17 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||
|
||||
COPY app/ ./app/
|
||||
|
||||
# 构建时预下载模型,避免首次请求延迟
|
||||
RUN python -c "from docling.document_converter import DocumentConverter; DocumentConverter()"
|
||||
# 构建时预下载模型(需要实际转换才会触发 HuggingFace 模型下载)
|
||||
RUN python -c "\
|
||||
import tempfile, os; \
|
||||
pdf = b'%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj\n3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Contents 4 0 R/Resources<</Font<</F1 5 0 R>>>>>>endobj\n4 0 obj<</Length 44>>stream\nBT /F1 12 Tf 100 700 Td (hello) Tj ET\nendstream\nendobj\n5 0 obj<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>endobj\nxref\n0 6\n0000000000 65535 f \n0000000009 00000 n \n0000000058 00000 n \n0000000115 00000 n \n0000000266 00000 n \n0000000360 00000 n \ntrailer<</Size 6/Root 1 0 R>>\nstartxref\n454\n%%EOF'; \
|
||||
f = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False); f.write(pdf); f.close(); \
|
||||
from docling.document_converter import DocumentConverter; \
|
||||
conv = DocumentConverter(); \
|
||||
try: conv.convert(f.name) \
|
||||
except: pass \
|
||||
finally: os.unlink(f.name); \
|
||||
print('Models pre-downloaded successfully')"
|
||||
|
||||
EXPOSE 3007
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue