]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Updates how barcodes are detected, using pikepdf images, instead of converting each...
authorTrenton Holmes <holmes.trenton@gmail.com>
Wed, 14 Sep 2022 18:49:22 +0000 (11:49 -0700)
committerTrenton H <holmes.trenton@gmail.com>
Fri, 16 Sep 2022 16:08:16 +0000 (09:08 -0700)
Pipfile
Pipfile.lock
src/documents/barcodes.py
src/documents/tasks.py
src/documents/tests/test_barcodes.py

diff --git a/Pipfile b/Pipfile
index 98655ee7807bcede94e1f5a8a9bde65b06958358..ef5212f5044d0b1e778e27a6f7c3e920d8eee51f 100644 (file)
--- a/Pipfile
+++ b/Pipfile
@@ -53,7 +53,6 @@ concurrent-log-handler = "*"
 "importlib-resources" = {version = "*", markers = "python_version < '3.9'"}
 zipp = {version = "*", markers = "python_version < '3.9'"}
 pyzbar = "*"
-pdf2image = "*"
 mysqlclient = "*"
 setproctitle = "*"
 
index 8497d06d1666210a1fa9976412f436e0180dc979..98499df9ef5158f47da0915d11767f22c3458bd2 100644 (file)
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "6f46be21b67938add11dbf0ecea4f722836f161f58fa5e47dec3f92edb346371"
+            "sha256": "896665b8ff6d8a99af44b729c581033add1ba5cbd927723ef275649491c92a4f"
         },
         "pipfile-spec": 6,
         "requires": {},
             "index": "pypi",
             "version": "==2.5.2"
         },
-        "pdf2image": {
-            "hashes": [
-                "sha256:84f79f2b8fad943e36323ea4e937fcb05f26ded0caa0a01181df66049e42fb65",
-                "sha256:d58ed94d978a70c73c2bb7fdf8acbaf2a7089c29ff8141be5f45433c0c4293bb"
-            ],
-            "index": "pypi",
-            "version": "==1.16.0"
-        },
         "pdfminer.six": {
             "hashes": [
                 "sha256:5a64c924410ac48501d6060b21638bf401db69f5b1bd57207df7fbc070ac8ae2",
         },
         "pyyaml": {
             "hashes": [
+                "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
                 "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
                 "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
                 "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
                 "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
                 "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
                 "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
+                "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
                 "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
                 "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
                 "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
                 "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
                 "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
+                "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
                 "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
                 "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
                 "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
                 "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
                 "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
                 "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
+                "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
                 "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
                 "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
+                "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
                 "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
                 "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
                 "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
+                "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
                 "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
                 "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
                 "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
                 "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
+                "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
                 "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
                 "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
             ],
         },
         "pyyaml": {
             "hashes": [
+                "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
                 "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
                 "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
                 "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
                 "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
                 "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
                 "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
+                "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
                 "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
                 "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
                 "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
                 "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
                 "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
+                "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
                 "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
                 "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
                 "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
                 "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
                 "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
                 "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
+                "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
                 "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
                 "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
+                "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
                 "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
                 "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
                 "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
+                "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
                 "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
                 "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
                 "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
                 "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
+                "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
                 "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
                 "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
             ],
index d8a73e27778f322536249c59b89ddad33b4aa44a..e473ce9382701dde8224e4734e789c4a5e27d6ad 100644 (file)
@@ -3,13 +3,15 @@ import os
 import shutil
 import tempfile
 from functools import lru_cache
-from typing import List  # for type hinting. Can be removed, if only Python >3.8 is used
+from typing import List
+from typing import Optional
+from typing import Tuple
 
 import magic
 from django.conf import settings
-from pdf2image import convert_from_path
 from pikepdf import Page
 from pikepdf import Pdf
+from pikepdf import PdfImage
 from PIL import Image
 from PIL import ImageSequence
 from pyzbar import pyzbar
@@ -32,7 +34,7 @@ def supported_file_type(mime_type) -> bool:
     return mime_type in supported_mime
 
 
-def barcode_reader(image) -> List[str]:
+def barcode_reader(image: Image) -> List[str]:
     """
     Read any barcodes contained in image
     Returns a list containing all found barcodes
@@ -99,21 +101,39 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
     return newpath
 
 
-def scan_file_for_separating_barcodes(filepath: str) -> List[int]:
+def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], List[int]]:
     """
     Scan the provided pdf file for page separating barcodes
-    Returns a list of pagenumbers, which separate the file
+    Returns a the PDF filepath and a list of pagenumbers,
+    which separate the file into new files
     """
+
     separator_page_numbers = []
-    separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
-    # use a temporary directory in case the file os too big to handle in memory
-    with tempfile.TemporaryDirectory() as path:
-        pages_from_path = convert_from_path(filepath, output_folder=path)
-        for current_page_number, page in enumerate(pages_from_path):
-            current_barcodes = barcode_reader(page)
-            if separator_barcode in current_barcodes:
-                separator_page_numbers.append(current_page_number)
-    return separator_page_numbers
+    pdf_filepath = None
+
+    mime_type = get_file_mime_type(filepath)
+
+    if supported_file_type(mime_type):
+        pdf_filepath = filepath
+        if mime_type == "image/tiff":
+            pdf_filepath = convert_from_tiff_to_pdf(filepath)
+
+        pdf = Pdf.open(pdf_filepath)
+
+        for page_num, page in enumerate(pdf.pages):
+            for image_key in page.images:
+                pdfimage = PdfImage(page.images[image_key])
+                pillow_img = pdfimage.as_pil_image()
+
+                detected_barcodes = barcode_reader(pillow_img)
+
+                if settings.CONSUMER_BARCODE_STRING in detected_barcodes:
+                    separator_page_numbers.append(page_num)
+    else:
+        logger.warning(
+            f"Unsupported file format for barcode reader: {str(mime_type)}",
+        )
+    return pdf_filepath, separator_page_numbers
 
 
 def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
index b1793e760d703aa02858bb5f09c0ba6ad3c01587..94b84945610c884fb67881d8e87c0303cac7f6df 100644 (file)
@@ -96,29 +96,13 @@ def consume_file(
     # check for separators in current document
     if settings.CONSUMER_ENABLE_BARCODES:
 
-        mime_type = barcodes.get_file_mime_type(path)
+        pdf_filepath, separators = barcodes.scan_file_for_separating_barcodes(path)
 
-        if not barcodes.supported_file_type(mime_type):
-            # if not supported, skip this routine
-            logger.warning(
-                f"Unsupported file format for barcode reader: {str(mime_type)}",
+        if separators:
+            logger.debug(
+                f"Pages with separators found in: {str(path)}",
             )
-        else:
-            separators = []
-            document_list = []
-
-            if mime_type == "image/tiff":
-                file_to_process = barcodes.convert_from_tiff_to_pdf(path)
-            else:
-                file_to_process = path
-
-            separators = barcodes.scan_file_for_separating_barcodes(file_to_process)
-
-            if separators:
-                logger.debug(
-                    f"Pages with separators found in: {str(path)}",
-                )
-                document_list = barcodes.separate_pages(file_to_process, separators)
+            document_list = barcodes.separate_pages(pdf_filepath, separators)
 
             if document_list:
                 for n, document in enumerate(document_list):
@@ -134,15 +118,13 @@ def consume_file(
                         target_dir=path.parent,
                     )
 
-                # if we got here, the document was successfully split
-                # and can safely be deleted
-                if mime_type == "image/tiff":
-                    # Remove the TIFF converted to PDF file
-                    logger.debug(f"Deleting file {file_to_process}")
-                    os.unlink(file_to_process)
-                # Remove the original file (new file is saved above)
-                logger.debug(f"Deleting file {path}")
-                os.unlink(path)
+                # Delete the PDF file which was split
+                os.remove(pdf_filepath)
+
+                # If the original was a TIFF, remove the original file as well
+                if str(pdf_filepath) != str(path):
+                    logger.debug(f"Deleting file {path}")
+                    os.unlink(path)
 
                 # notify the sender, otherwise the progress bar
                 # in the UI stays stuck
index 3ffd5d7538d7e6ea57d9533bc7c733cd903439ed..c58596a1fc0ff0ed063f7c15152fb576c0cff25a 100644 (file)
@@ -13,22 +13,23 @@ from PIL import Image
 
 
 class TestBarcode(DirectoriesMixin, TestCase):
+
+    SAMPLE_DIR = os.path.join(
+        os.path.dirname(__file__),
+        "samples",
+    )
+
+    BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
+
     def test_barcode_reader(self):
-        test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
-            "barcode-39-PATCHT.png",
-        )
+        test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
         img = Image.open(test_file)
         separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
         self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
 
     def test_barcode_reader2(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t.pbm",
         )
         img = Image.open(test_file)
@@ -37,9 +38,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_barcode_reader_distorsion(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-39-PATCHT-distorsion.png",
         )
         img = Image.open(test_file)
@@ -48,9 +47,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_barcode_reader_distorsion2(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-39-PATCHT-distorsion2.png",
         )
         img = Image.open(test_file)
@@ -59,9 +56,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_barcode_reader_unreadable(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-39-PATCHT-unreadable.png",
         )
         img = Image.open(test_file)
@@ -69,9 +64,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_barcode_reader_qr(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "qr-code-PATCHT.png",
         )
         img = Image.open(test_file)
@@ -80,9 +73,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_barcode_reader_128(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-128-PATCHT.png",
         )
         img = Image.open(test_file)
@@ -90,15 +81,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
         self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
 
     def test_barcode_reader_no_barcode(self):
-        test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
+        test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
         img = Image.open(test_file)
         self.assertEqual(barcodes.barcode_reader(img), [])
 
     def test_barcode_reader_custom_separator(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-39-custom.png",
         )
         img = Image.open(test_file)
@@ -106,9 +95,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_barcode_reader_custom_qr_separator(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-qr-custom.png",
         )
         img = Image.open(test_file)
@@ -116,9 +103,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_barcode_reader_custom_128_separator(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-128-custom.png",
         )
         img = Image.open(test_file)
@@ -126,19 +111,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_get_mime_type(self):
         tiff_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
+            self.SAMPLE_DIR,
             "simple.tiff",
         )
         pdf_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
+            self.SAMPLE_DIR,
             "simple.pdf",
         )
         png_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-128-custom.png",
         )
         tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
@@ -173,8 +154,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_convert_error_from_pdf_to_pdf(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
+            self.SAMPLE_DIR,
             "simple.pdf",
         )
         dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
@@ -183,107 +163,127 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_scan_file_for_separating_barcodes(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])
 
     def test_scan_file_for_separating_barcodes2(self):
-        test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [])
+        test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [])
 
     def test_scan_file_for_separating_barcodes3(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-middle.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [1])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [1])
 
     def test_scan_file_for_separating_barcodes4(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "several-patcht-codes.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [2, 5])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [2, 5])
 
     def test_scan_file_for_separating_barcodes_upsidedown(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-middle_reverse.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [1])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [1])
 
     def test_scan_file_for_separating_qr_barcodes(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-qr.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])
 
     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
     def test_scan_file_for_separating_custom_barcodes(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-39-custom.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])
 
     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
     def test_scan_file_for_separating_custom_qr_barcodes(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-qr-custom.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])
 
     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
     def test_scan_file_for_separating_custom_128_barcodes(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-128-custom.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [0])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [0])
 
     def test_scan_file_for_separating_wrong_qr_barcodes(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "barcode-39-custom.pdf",
         )
-        pages = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertEqual(pages, [])
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(pdf_file, test_file)
+        self.assertListEqual(separator_page_numbers, [])
 
     def test_separate_pages(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-middle.pdf",
         )
         pages = barcodes.separate_pages(test_file, [1])
@@ -311,9 +311,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_separate_pages_no_list(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-middle.pdf",
         )
         with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
@@ -328,9 +326,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_save_to_dir(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t.pdf",
         )
         tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
@@ -340,9 +336,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_save_to_dir2(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t.pdf",
         )
         nonexistingdir = "/nowhere"
@@ -360,9 +354,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_save_to_dir3(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t.pdf",
         )
         tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
@@ -372,31 +364,36 @@ class TestBarcode(DirectoriesMixin, TestCase):
 
     def test_barcode_splitter(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-middle.pdf",
         )
         tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
-        separators = barcodes.scan_file_for_separating_barcodes(test_file)
-        self.assertTrue(separators)
-        document_list = barcodes.separate_pages(test_file, separators)
+
+        pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
+            test_file,
+        )
+
+        self.assertEqual(test_file, pdf_file)
+        self.assertTrue(len(separator_page_numbers) > 0)
+
+        document_list = barcodes.separate_pages(test_file, separator_page_numbers)
         self.assertTrue(document_list)
         for document in document_list:
             barcodes.save_to_dir(document, target_dir=tempdir)
+
         target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
         target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
+
         self.assertTrue(os.path.isfile(target_file1))
         self.assertTrue(os.path.isfile(target_file2))
 
     @override_settings(CONSUMER_ENABLE_BARCODES=True)
     def test_consume_barcode_file(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-middle.pdf",
         )
+
         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
         shutil.copy(test_file, dst)
 
@@ -408,9 +405,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
     )
     def test_consume_barcode_tiff_file(self):
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-middle.tiff",
         )
         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
@@ -432,18 +427,17 @@ class TestBarcode(DirectoriesMixin, TestCase):
         and continue archiving the file as is.
         """
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
+            self.SAMPLE_DIR,
             "simple.jpg",
         )
         dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
         shutil.copy(test_file, dst)
-        with self.assertLogs("paperless.tasks", level="WARNING") as cm:
+        with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
             self.assertIn("Success", tasks.consume_file(dst))
         self.assertListEqual(
             cm.output,
             [
-                "WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg",
+                "WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
             ],
         )
         m.assert_called_once()
@@ -465,9 +459,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
         the user uploads a supported image file, but without extension
         """
         test_file = os.path.join(
-            os.path.dirname(__file__),
-            "samples",
-            "barcodes",
+            self.BARCODE_SAMPLE_DIR,
             "patch-code-t-middle.tiff",
         )
         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")