]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
added a test case that replicates #511
authorjonaswinkler <jonas.winkler@jpwinkler.de>
Sun, 7 Feb 2021 17:23:54 +0000 (18:23 +0100)
committerjonaswinkler <jonas.winkler@jpwinkler.de>
Sun, 7 Feb 2021 17:23:54 +0000 (18:23 +0100)
src/documents/sanity_checker.py
src/documents/tests/samples/simple.png [new file with mode: 0644]
src/documents/tests/test_consumer.py

index bc0b689d4ad312f1316a3923651a047ad8715034..b8fd73f9808e27ba036a6e488645843ec4f46047 100644 (file)
@@ -56,7 +56,8 @@ def check_sanity():
             messages.append(SanityError(
                 f"Thumbnail of document {doc.pk} does not exist."))
         else:
-            present_files.remove(os.path.normpath(doc.thumbnail_path))
+            if os.path.normpath(doc.thumbnail_path) in present_files:
+                present_files.remove(os.path.normpath(doc.thumbnail_path))
             try:
                 with doc.thumbnail_file as f:
                     f.read()
@@ -71,7 +72,8 @@ def check_sanity():
             messages.append(SanityError(
                 f"Original of document {doc.pk} does not exist."))
         else:
-            present_files.remove(os.path.normpath(doc.source_path))
+            if os.path.normpath(doc.source_path) in present_files:
+                present_files.remove(os.path.normpath(doc.source_path))
             try:
                 with doc.source_file as f:
                     checksum = hashlib.md5(f.read()).hexdigest()
@@ -92,7 +94,8 @@ def check_sanity():
                     f"Archived version of document {doc.pk} does not exist."
                 ))
             else:
-                present_files.remove(os.path.normpath(doc.archive_path))
+                if os.path.normpath(doc.archive_path) in present_files:
+                    present_files.remove(os.path.normpath(doc.archive_path))
                 try:
                     with doc.archive_file as f:
                         checksum = hashlib.md5(f.read()).hexdigest()
@@ -103,7 +106,8 @@ def check_sanity():
                 else:
                     if not checksum == doc.archive_checksum:
                         messages.append(SanityError(
-                            f"Checksum mismatch of archive {doc.pk}. "
+                            f"Checksum mismatch of archived document "
+                            f"{doc.pk}. "
                             f"Stored: {doc.checksum}, actual: {checksum}."
                         ))
 
diff --git a/src/documents/tests/samples/simple.png b/src/documents/tests/samples/simple.png
new file mode 100644 (file)
index 0000000..a3a7684
Binary files /dev/null and b/src/documents/tests/samples/simple.png differ
index a6f0cc55a2310ec3a220d4b7c5ca04c54b21de15..8ead1ea41cc440101a7fec4700ce2868f484bf99 100644 (file)
@@ -5,12 +5,14 @@ import tempfile
 from unittest import mock
 from unittest.mock import MagicMock
 
+from django.conf import settings
 from django.test import TestCase, override_settings
 
 from .utils import DirectoriesMixin
 from ..consumer import Consumer, ConsumerError
 from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
 from ..parsers import DocumentParser, ParseError
+from ..tasks import sanity_check
 
 
 class TestAttributes(TestCase):
@@ -181,6 +183,24 @@ class DummyParser(DocumentParser):
         self.text = "The Text"
 
 
+class CopyParser(DocumentParser):
+
+    def get_thumbnail(self, document_path, mime_type):
+        return self.fake_thumb
+
+    def get_optimised_thumbnail(self, document_path, mime_type):
+        return self.fake_thumb
+
+    def __init__(self, logging_group, progress_callback=None):
+        super(CopyParser, self).__init__(logging_group, progress_callback)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
+
+    def parse(self, document_path, mime_type, file_name=None):
+        self.text = "The text"
+        self.archive_path = os.path.join(self.tempdir, "archive.pdf")
+        shutil.copy(document_path, self.archive_path)
+
+
 class FaultyParser(DocumentParser):
 
     def get_thumbnail(self, document_path, mime_type):
@@ -203,6 +223,8 @@ def fake_magic_from_file(file, mime=False):
     if mime:
         if os.path.splitext(file)[1] == ".pdf":
             return "application/pdf"
+        elif os.path.splitext(file)[1] == ".png":
+            return "image/png"
         else:
             return "unknown"
     else:
@@ -516,6 +538,19 @@ class TestConsumer(DirectoriesMixin, TestCase):
 
         self._assert_first_last_send_progress(last_status="FAILED")
 
+    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    def test_similar_filenames(self, m):
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
+        shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
+        m.return_value = [(None, {
+            "parser": CopyParser,
+            "mime_types": {"application/pdf": ".pdf", "image/png": ".zip"},
+            "weight": 0
+        })]
+        doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
+        doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
+
+        sanity_check()
 
 class PreConsumeTestCase(TestCase):