]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fix: skip fuzzy matching for empty document content (#10914)
authorshamoon <4887959+shamoon@users.noreply.github.com>
Tue, 23 Sep 2025 06:30:24 +0000 (23:30 -0700)
committerGitHub <noreply@github.com>
Tue, 23 Sep 2025 06:30:24 +0000 (23:30 -0700)
src/documents/management/commands/document_fuzzy_match.py
src/documents/tests/test_management_fuzzy.py

index 5eebeb1720a2b90f08c97eb745f5ca0e4537d585..4ecdf6d012c73c8dc95614ebdd63c0745623b9d1 100644 (file)
@@ -92,6 +92,9 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
                 # doc to doc is obviously not useful
                 if first_doc.pk == second_doc.pk:
                     continue
+                # Skip empty documents (e.g. password-protected)
+                if first_doc.content.strip() == "" or second_doc.content.strip() == "":
+                    continue
                 # Skip matching which have already been matched together
                 # doc 1 to doc 2 is the same as doc 2 to doc 1
                 doc_1_to_doc_2 = (first_doc.pk, second_doc.pk)
index 2d7d3735af762e6b6bcf6424cb863430b3a62c6e..453a8608218a7d9cdee391c0dc63f149e622d5d6 100644 (file)
@@ -206,3 +206,29 @@ class TestFuzzyMatchCommand(TestCase):
         self.assertEqual(Document.objects.count(), 2)
         self.assertIsNotNone(Document.objects.get(pk=1))
         self.assertIsNotNone(Document.objects.get(pk=2))
+
+    def test_empty_content(self):
+        """
+        GIVEN:
+            - 2 documents exist, content is empty (pw-protected)
+        WHEN:
+            - Command is called
+        THEN:
+            - No matches are found
+        """
+        Document.objects.create(
+            checksum="BEEFCAFE",
+            title="A",
+            content="",
+            mime_type="application/pdf",
+            filename="test.pdf",
+        )
+        Document.objects.create(
+            checksum="DEADBEAF",
+            title="A",
+            content="",
+            mime_type="application/pdf",
+            filename="other_test.pdf",
+        )
+        stdout, _ = self.call_command()
+        self.assertIn("No matches found", stdout)