]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Feature: support barcode upscaling for better detection of small barcodes (#3655)
authorBastian Machek <16717398+bmachek@users.noreply.github.com>
Tue, 27 Jun 2023 17:18:47 +0000 (19:18 +0200)
committerGitHub <noreply@github.com>
Tue, 27 Jun 2023 17:18:47 +0000 (10:18 -0700)
docs/configuration.md
paperless.conf.example
src/documents/barcodes.py
src/documents/tests/samples/barcodes/barcode-qr-asn-000123-upscale-dpi.pdf [new file with mode: 0644]
src/documents/tests/test_barcodes.py
src/paperless/settings.py

index d3874256f9844a784f6976e822c1c6f837e42e21..722db8a4bde26dfc60ab9f4250f2ded4426f27fc 100644 (file)
@@ -1095,6 +1095,27 @@ barcode.
 
     Defaults to "ASN"
 
+`PAPERLESS_CONSUMER_BARCODE_UPSCALE=<float>`
+
+: Defines the upscale factor used in barcode detection.
+Improves the detection of small barcodes, i.e. with a value of 1.5 by
+upscaling the document beforce the detection process. Upscaling will
+only take place if value is bigger than 1.0. Otherwise upscaling will
+not be performed to save resources. Try using in combination with
+PAPERLESS_CONSUMER_BARCODE_DPI set to a value higher than default.
+
+    Defaults to 0.0
+
+`PAPERLESS_CONSUMER_BARCODE_DPI=<int>`
+
+: During barcode detection every page from a PDF document needs
+to be converted to an image. A dpi value can be specified in the
+conversion process. Default is 300. If the detection of small barcodes
+fails a bigger dpi value i.e. 600 can fix the issue. Try using in
+combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
+
+    Defaults to "300"
+
 ## Binaries
 
 There are a few external software packages that Paperless expects to
index 6bd70697e35397d91c866b927f1230ec6b3e037d..9b168db0cc21fff62f5aa4fd60f0a29e8037b226 100644 (file)
@@ -66,6 +66,8 @@
 #PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
 #PAPERLESS_CONSUMER_ENABLE_BARCODES=false
 #PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
+#PAPERLESS_CONSUMER_BARCODE_UPSCALE=0.0
+#PAPERLESS_CONSUMER_BARCODE_DPI=300
 #PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 #PAPERLESS_FILENAME_DATE_ORDER=YMD
index f3d59bc5b2b938b5ad9c90206feb14fe21f9ebc7..3650593ae80866ad26b31e6987671481b05c0265 100644 (file)
@@ -203,11 +203,21 @@ class BarcodeReader:
         try:
             pages_from_path = convert_from_path(
                 self.pdf_file,
-                dpi=300,
+                dpi=settings.CONSUMER_BARCODE_DPI,
                 output_folder=self.temp_dir.name,
             )
 
             for current_page_number, page in enumerate(pages_from_path):
+                factor = settings.CONSUMER_BARCODE_UPSCALE
+                if factor > 1.0:
+                    logger.debug(
+                        f"Upscaling image by {factor} for better barcode detection",
+                    )
+                    x, y = page.size
+                    page = page.resize(
+                        (int(round(x * factor)), (int(round(y * factor)))),
+                    )
+
                 for barcode_value in reader(page):
                     self.barcodes.append(
                         Barcode(current_page_number, barcode_value),
diff --git a/src/documents/tests/samples/barcodes/barcode-qr-asn-000123-upscale-dpi.pdf b/src/documents/tests/samples/barcodes/barcode-qr-asn-000123-upscale-dpi.pdf
new file mode 100644 (file)
index 0000000..3d6d8ea
Binary files /dev/null and b/src/documents/tests/samples/barcodes/barcode-qr-asn-000123-upscale-dpi.pdf differ
index eda97554d690b6a6ace0461293369207f1e37bdb..70f7807cc81c25e2dea563c822af5afe00ae0e66 100644 (file)
@@ -906,6 +906,47 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
                 input_doc,
             )
 
+    @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
+    def test_scan_file_for_qrcode_without_upscale(self):
+        """
+        GIVEN:
+            - A printed and scanned PDF document with a rather small QR code
+        WHEN:
+            - ASN barcode detection is run with default settings
+            - pyzbar is used for detection, as zxing would behave differently, and detect the QR code
+        THEN:
+            - ASN is not detected
+        """
+
+        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
+
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            self.assertEqual(len(reader.barcodes), 0)
+
+    @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
+    @override_settings(CONSUMER_BARCODE_DPI=600)
+    @override_settings(CONSUMER_BARCODE_UPSCALE=1.5)
+    def test_scan_file_for_qrcode_with_upscale(self):
+        """
+        GIVEN:
+            - A printed and scanned PDF document with a rather small QR code
+        WHEN:
+            - ASN barcode detection is run with 600dpi and an upscale factor of 1.5 and pyzbar
+            - pyzbar is used for detection, as zxing would behave differently.
+              Upscaling is a workaround for detection problems with pyzbar,
+              when you cannot switch to zxing (aarch64 build problems of zxing)
+        THEN:
+            - ASN 123 is detected
+        """
+
+        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
+
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            self.assertEqual(len(reader.barcodes), 1)
+            self.assertEqual(reader.asn, 123)
+
 
 @pytest.mark.skipif(
     not HAS_ZXING_LIB,
index ab33e6b1a5b1fe5a61b3ead927b1684b5878e663..2b7a320917b676b16bb0464a6fc951abc87a5b89 100644 (file)
@@ -781,6 +781,16 @@ CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
 )
 
 
+CONSUMER_BARCODE_UPSCALE: Final[float] = float(
+    os.getenv("PAPERLESS_CONSUMER_BARCODE_UPSCALE", 0.0),
+)
+
+
+CONSUMER_BARCODE_DPI: Final[str] = int(
+    os.getenv("PAPERLESS_CONSUMER_BARCODE_DPI", 300),
+)
+
+
 OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
 
 # The default language that tesseract will attempt to use when parsing