]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Enhancement: add timeout for Tika client (#8520)
authorHiranChaudhuri <hiran.chaudhuri@gmx.net>
Thu, 19 Dec 2024 16:58:26 +0000 (17:58 +0100)
committerGitHub <noreply@github.com>
Thu, 19 Dec 2024 16:58:26 +0000 (16:58 +0000)
Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
src/paperless_tika/parsers.py

index f51a039162953de16f8d40c52cdf86a18e9a99aa..40aa8e581e13d32a11cdd109d7e57e178d6c561d 100644 (file)
@@ -33,7 +33,10 @@ class TikaDocumentParser(DocumentParser):
 
     def extract_metadata(self, document_path, mime_type):
         try:
-            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
+            with TikaClient(
+                tika_url=settings.TIKA_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client:
                 parsed = client.metadata.from_file(document_path, mime_type)
                 return [
                     {
@@ -54,7 +57,10 @@ class TikaDocumentParser(DocumentParser):
         self.log.info(f"Sending {document_path} to Tika server")
 
         try:
-            with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
+            with TikaClient(
+                tika_url=settings.TIKA_ENDPOINT,
+                timeout=settings.CELERY_TASK_TIME_LIMIT,
+            ) as client:
                 try:
                     parsed = client.tika.as_text.from_file(document_path, mime_type)
                 except httpx.HTTPStatusError as err: