]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
fix a bug with thumbnail generation when TIKA was enabled
authorjonaswinkler <jonas.winkler@jpwinkler.de>
Tue, 9 Feb 2021 21:12:43 +0000 (22:12 +0100)
committerjonaswinkler <jonas.winkler@jpwinkler.de>
Tue, 9 Feb 2021 21:12:43 +0000 (22:12 +0100)
src/documents/consumer.py
src/documents/management/commands/document_thumbnails.py
src/documents/parsers.py
src/documents/tests/test_consumer.py
src/documents/tests/test_parsers.py
src/paperless_tesseract/parsers.py
src/paperless_text/parsers.py
src/paperless_tika/parsers.py

index acb3ad33f197dd547e0a0919fd4056dc90970447..3fd62fabc12c08322b7b8a759ea602444fb62393 100755 (executable)
@@ -241,7 +241,7 @@ class Consumer(LoggingMixin):
             self._send_progress(70, 100, 'WORKING',
                                 MESSAGE_GENERATING_THUMBNAIL)
             thumbnail = document_parser.get_optimised_thumbnail(
-                self.path, mime_type)
+                self.path, mime_type, self.filename)
 
             text = document_parser.get_text()
             date = document_parser.get_date()
index cf2cbeb777267c9c5176e2f3ecb9d70769b48a78..b7f935e3b6a420eac1ef13e6c8c3fbc2d43ab392 100644 (file)
@@ -22,7 +22,10 @@ def _process_document(doc_in):
 
     try:
         thumb = parser.get_optimised_thumbnail(
-            document.source_path, document.mime_type)
+            document.source_path,
+            document.mime_type,
+            document.get_public_filename()
+        )
 
         shutil.move(thumb, document.thumbnail_path)
     finally:
index 98af4f080738b2eb5f4c2a3d58bbf15152125ce6..1ed5deb3c082574ec0390bc8830c4b2df7c1bb8c 100644 (file)
@@ -288,14 +288,17 @@ class DocumentParser(LoggingMixin):
     def get_archive_path(self):
         return self.archive_path
 
-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
         """
         Returns the path to a file we can use as a thumbnail for this document.
         """
         raise NotImplementedError()
 
-    def get_optimised_thumbnail(self, document_path, mime_type):
-        thumbnail = self.get_thumbnail(document_path, mime_type)
+    def get_optimised_thumbnail(self,
+                                document_path,
+                                mime_type,
+                                file_name=None):
+        thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
         if settings.OPTIMIZE_THUMBNAILS:
             out_path = os.path.join(self.tempdir, "thumb_optipng.png")
 
index 1ed0411879cc0f8e5e314ea2635b70539602854b..44effd39ee4d237e70f5126d215b2b06c7b505ef 100644 (file)
@@ -167,7 +167,7 @@ class TestFieldPermutations(TestCase):
 
 class DummyParser(DocumentParser):
 
-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
         # not important during tests
         raise NotImplementedError()
 
@@ -176,7 +176,7 @@ class DummyParser(DocumentParser):
         _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
         self.archive_path = archive_path
 
-    def get_optimised_thumbnail(self, document_path, mime_type):
+    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
         return self.fake_thumb
 
     def parse(self, document_path, mime_type, file_name=None):
@@ -185,10 +185,10 @@ class DummyParser(DocumentParser):
 
 class CopyParser(DocumentParser):
 
-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
         return self.fake_thumb
 
-    def get_optimised_thumbnail(self, document_path, mime_type):
+    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
         return self.fake_thumb
 
     def __init__(self, logging_group, progress_callback=None):
@@ -203,7 +203,7 @@ class CopyParser(DocumentParser):
 
 class FaultyParser(DocumentParser):
 
-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
         # not important during tests
         raise NotImplementedError()
 
@@ -211,7 +211,7 @@ class FaultyParser(DocumentParser):
         super(FaultyParser, self).__init__(logging_group)
         _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
 
-    def get_optimised_thumbnail(self, document_path, mime_type):
+    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
         return self.fake_thumb
 
     def parse(self, document_path, mime_type, file_name=None):
index 8da6470a44d03636ef7fdcd7b8b3d8e0c028708a..9dd74313ff3f26582ba14109569ce39a6164e7d5 100644 (file)
@@ -68,7 +68,7 @@ class TestParserDiscovery(TestCase):
             )
 
 
-def fake_get_thumbnail(self, path, mimetype):
+def fake_get_thumbnail(self, path, mimetype, file_name):
     return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
 
 
@@ -89,15 +89,15 @@ class TestBaseParser(TestCase):
     def test_get_optimised_thumbnail(self):
         parser = DocumentParser(None)
 
-        parser.get_optimised_thumbnail("any", "not important")
+        parser.get_optimised_thumbnail("any", "not important", "document.pdf")
 
     @mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
     @override_settings(OPTIMIZE_THUMBNAILS=False)
     def test_get_optimised_thumb_disabled(self):
         parser = DocumentParser(None)
 
-        path = parser.get_optimised_thumbnail("any", "not important")
-        self.assertEqual(path, fake_get_thumbnail(None, None, None))
+        path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
+        self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
 
 
 class TestParserAvailability(TestCase):
index 0a976b569ca53ca50e6d77238633ef1e7f0a8a2d..271a840dfd54ae4459965f28bfe17b2d67a399a2 100644 (file)
@@ -48,7 +48,7 @@ class RasterisedDocumentParser(DocumentParser):
                     )
         return result
 
-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
         return make_thumbnail_from_pdf(
             document_path, self.tempdir, self.logging_group)
 
index c307bf10be9c7eef8af85c9a17d16ec6749798b7..837f05c9f999db494c401bdacc5dacfcd84d9a79 100644 (file)
@@ -13,7 +13,7 @@ class TextDocumentParser(DocumentParser):
 
     logging_name = "paperless.parsing.text"
 
-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
 
         def read_text():
             with open(document_path, 'r') as src:
index b888af820dde2de5b73100350af5a557d7f62470..6b0f62ada08374de9afe444e1bb1b2f73965f978 100644 (file)
@@ -16,9 +16,9 @@ class TikaDocumentParser(DocumentParser):
 
     logging_name = "paperless.parsing.tika"
 
-    def get_thumbnail(self, document_path, mime_type):
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
         if not self.archive_path:
-            self.archive_path = self.convert_to_pdf(document_path)
+            self.archive_path = self.convert_to_pdf(document_path, file_name)
 
         return make_thumbnail_from_pdf(
             self.archive_path, self.tempdir, self.logging_group)