]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Enhancement: support retain barcode split pages (#7912)
authorshamoon <4887959+shamoon@users.noreply.github.com>
Mon, 14 Oct 2024 03:51:39 +0000 (20:51 -0700)
committerGitHub <noreply@github.com>
Mon, 14 Oct 2024 03:51:39 +0000 (20:51 -0700)
docs/configuration.md
src/documents/barcodes.py
src/documents/tests/test_barcodes.py
src/paperless/settings.py

index 5fa4ab0a7c465a8433678b0120dda4937fcef030..8e261f0f56e128a57c9be375b732b115357ca28a 100644 (file)
@@ -1287,6 +1287,12 @@ change this.
 
     Defaults to "PATCHT"
 
+#### [`PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES=<bool>`](#PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES) {#PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES}
+
+: If set to true, all pages that are split by a barcode (such as PATCHT) will be kept.
+
+    Defaults to false.
+
 #### [`PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE}
 
 : Enables the detection of barcodes in the scanned document and
index 746d6014d98afb8b656f987977532ff31d4f7e87..132e853b0b93deda2e61c4d3cb3e01300d00f1ac 100644 (file)
@@ -387,7 +387,12 @@ class BarcodePlugin(ConsumeTaskPlugin):
         """
         # filter all barcodes for the separator string
         # get the page numbers of the separating barcodes
-        separator_pages = {bc.page: False for bc in self.barcodes if bc.is_separator}
+        retain = settings.CONSUMER_BARCODE_RETAIN_SPLIT_PAGES
+        separator_pages = {
+            bc.page: retain
+            for bc in self.barcodes
+            if bc.is_separator and (not retain or (retain and bc.page > 0))
+        }  # as below, dont include the first page if retain is enabled
         if not settings.CONSUMER_ENABLE_ASN_BARCODE:
             return separator_pages
 
index b0c42963a45c433780c3aa3b2047a603a7a058f2..03b0903dd992d96da7138c42e1ca4fd32feabbca 100644 (file)
@@ -511,6 +511,42 @@ class TestBarcode(
             document_list = reader.separate_pages(separator_page_numbers)
             self.assertEqual(len(document_list), 5)
 
+    @override_settings(
+        CONSUMER_ENABLE_BARCODES=True,
+        CONSUMER_ENABLE_ASN_BARCODE=True,
+        CONSUMER_BARCODE_RETAIN_SPLIT_PAGES=True,
+    )
+    def test_separate_pages_by_asn_barcodes_and_patcht_retain_pages(self):
+        """
+        GIVEN:
+            - Input PDF with a patch code on page 3 and ASN barcodes on pages 1,5,6,9,11
+            - Retain split pages is enabled
+        WHEN:
+            - Input file is split on barcodes
+        THEN:
+            - Correct number of files produced, split correctly by correct pages, and the split pages are retained
+        """
+        test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
+
+        with self.get_reader(test_file) as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
+
+            self.assertEqual(
+                reader.pdf_file,
+                test_file,
+            )
+            self.assertDictEqual(
+                separator_page_numbers,
+                {
+                    2: True,
+                    4: True,
+                    5: True,
+                    8: True,
+                    10: True,
+                },
+            )
+
 
 @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
 class TestBarcodeNewConsume(
index d30a9d57d5fd964463f1f9372400562367c14c13..c1cb62c9e87c3cc1dd13373f14e9f24ae4ca10cf 100644 (file)
@@ -932,6 +932,10 @@ CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int(
     0,
 )
 
+CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = __get_boolean(
+    "PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
+)
+
 CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
     "PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
 )