]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Enhancement: support heic images (#9771)
authorshamoon <4887959+shamoon@users.noreply.github.com>
Wed, 23 Apr 2025 16:22:21 +0000 (09:22 -0700)
committerGitHub <noreply@github.com>
Wed, 23 Apr 2025 16:22:21 +0000 (09:22 -0700)
src/paperless_tesseract/parsers.py
src/paperless_tesseract/signals.py
src/paperless_tesseract/tests/samples/simple.heic [new file with mode: 0644]
src/paperless_tesseract/tests/test_parser.py

index 64c3030c71a9632c0d9a95eaeed309ee3ccf866d..9e8dbf35031d0eb66b09f6d7347ecaf2cc86ca33 100644 (file)
@@ -108,6 +108,7 @@ class RasterisedDocumentParser(DocumentParser):
             "image/bmp",
             "image/gif",
             "image/webp",
+            "image/heic",
         ]
 
     def has_alpha(self, image) -> bool:
index 7d6f6902fbfb23faf0874b1237df57de7ba737c7..e4d8449ed8a1c3823a041ceb5585552e9cbb0a60 100644 (file)
@@ -16,5 +16,6 @@ def tesseract_consumer_declaration(sender, **kwargs):
             "image/gif": ".gif",
             "image/bmp": ".bmp",
             "image/webp": ".webp",
+            "image/heic": ".heic",
         },
     }
diff --git a/src/paperless_tesseract/tests/samples/simple.heic b/src/paperless_tesseract/tests/samples/simple.heic
new file mode 100644 (file)
index 0000000..6b9a95e
Binary files /dev/null and b/src/paperless_tesseract/tests/samples/simple.heic differ
index f7490fbbf7f67700c19b767c188363f67d9648af..514b7163ffa9fd9a6c826fae2dc5830dce730f05 100644 (file)
@@ -880,6 +880,12 @@ class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         self.assertIsFile(parser.archive_path)
         self.assertIn("this is a test document", parser.get_text().lower())
 
+    def test_heic(self):
+        parser = RasterisedDocumentParser(None)
+        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.heic"), "image/heic")
+        self.assertIsFile(parser.archive_path)
+        self.assertIn("pizza", parser.get_text().lower())
+
     @override_settings(OCR_IMAGE_DPI=200)
     def test_gif(self):
         parser = RasterisedDocumentParser(None)