]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Updates handling of barcodes to encapsulate logic, moving it out of tasks and into...
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Fri, 19 May 2023 16:59:57 +0000 (09:59 -0700)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Mon, 22 May 2023 13:52:31 +0000 (06:52 -0700)
21 files changed:
src/documents/barcodes.py
src/documents/tasks.py
src/documents/tests/samples/barcodes/barcode-128-PATCHT.png [deleted file]
src/documents/tests/samples/barcodes/barcode-128-custom.png [deleted file]
src/documents/tests/samples/barcodes/barcode-39-PATCHT-distortion.png [deleted file]
src/documents/tests/samples/barcodes/barcode-39-PATCHT-distortion2.png [deleted file]
src/documents/tests/samples/barcodes/barcode-39-PATCHT-unreadable.png [deleted file]
src/documents/tests/samples/barcodes/barcode-39-PATCHT.png [deleted file]
src/documents/tests/samples/barcodes/barcode-39-asn-123.png [deleted file]
src/documents/tests/samples/barcodes/barcode-39-asn-custom-prefix.png [deleted file]
src/documents/tests/samples/barcodes/barcode-39-asn-invalid.png [deleted file]
src/documents/tests/samples/barcodes/barcode-39-custom.png [deleted file]
src/documents/tests/samples/barcodes/barcode-qr-custom.png [deleted file]
src/documents/tests/samples/barcodes/patch-code-t-middle-alpha.tiff [new file with mode: 0644]
src/documents/tests/samples/barcodes/patch-code-t-middle-distorted.pdf [new file with mode: 0755]
src/documents/tests/samples/barcodes/patch-code-t-middle-fuzzy.pdf [new file with mode: 0755]
src/documents/tests/samples/barcodes/patch-code-t-middle-reverse.pdf [moved from src/documents/tests/samples/barcodes/patch-code-t-middle_reverse.pdf with 100% similarity]
src/documents/tests/samples/barcodes/patch-code-t-middle-unreadable.pdf [new file with mode: 0755]
src/documents/tests/samples/barcodes/patch-code-t.pbm [deleted file]
src/documents/tests/samples/barcodes/qr-code-PATCHT.png [deleted file]
src/documents/tests/test_barcodes.py

index 8d114d68c818db4edb06844e0ddd6f56b87981fd..79fa2746fba180839c413ca980443803e5a46c65 100644 (file)
@@ -1,12 +1,11 @@
 import logging
-import os
 import shutil
 import tempfile
 from dataclasses import dataclass
-from functools import lru_cache
 from pathlib import Path
 from subprocess import run
 from typing import Dict
+from typing import Final
 from typing import List
 from typing import Optional
 
@@ -18,11 +17,9 @@ from pikepdf import Page
 from pikepdf import Pdf
 from PIL import Image
 
-logger = logging.getLogger("paperless.barcodes")
-
+from documents.data_models import DocumentSource
 
-class BarcodeImageFormatError(Exception):
-    pass
+logger = logging.getLogger("paperless.barcodes")
 
 
 @dataclass(frozen=True)
@@ -51,56 +48,72 @@ class Barcode:
         return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)
 
 
-@dataclass
-class DocumentBarcodeInfo:
-    """
-    Describes a single document's barcode status
-    """
+class BarcodeReader:
+    def __init__(self, filepath: Path, mime_type: str) -> None:
+        self.file: Final[Path] = filepath
+        self.mime: Final[str] = mime_type
+        self.pdf_file: Path = self.file
+        self.barcodes: List[Barcode] = []
+        self.temp_dir: Optional[Path] = None
 
-    pdf_path: Path
-    barcodes: List[Barcode]
+        if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
+            self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"}
+        else:
+            self.SUPPORTED_FILE_MIMES = {"application/pdf"}
 
+    def __enter__(self):
+        if self.supported_mime_type:
+            self.temp_dir = tempfile.TemporaryDirectory(prefix="paperless-barcodes")
+        return self
 
-@lru_cache(maxsize=8)
-def supported_file_type(mime_type: str) -> bool:
-    """
-    Determines if the file is valid for barcode
-    processing, based on MIME type and settings
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.temp_dir is not None:
+            self.temp_dir.cleanup()
+            self.temp_dir = None
 
-    :return: True if the file is supported, False otherwise
-    """
-    supported_mime = ["application/pdf"]
-    if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
-        supported_mime += ["image/tiff"]
+    @property
+    def supported_mime_type(self) -> bool:
+        """
+        Return True if the given mime type is supported for barcodes, false otherwise
+        """
+        return self.mime in self.SUPPORTED_FILE_MIMES
 
-    return mime_type in supported_mime
+    @property
+    def asn(self) -> Optional[int]:
+        """
+        Search the parsed barcodes for any ASNs.
+        The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
+        is considered the ASN to be used.
+        Returns the detected ASN (or None)
+        """
+        asn = None
 
+        # Ensure the barcodes have been read
+        self.detect()
 
-def barcode_reader(image: Image) -> List[str]:
-    """
-    Read any barcodes contained in image
-    Returns a list containing all found barcodes
-    """
-    barcodes = []
+        # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
+        asn_text = next(
+            (x.value for x in self.barcodes if x.is_asn),
+            None,
+        )
 
-    if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
-        logger.debug("Scanning for barcodes using PYZBAR")
-        from pyzbar import pyzbar
+        if asn_text:
+            logger.debug(f"Found ASN Barcode: {asn_text}")
+            # remove the prefix and remove whitespace
+            asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
 
-        # Decode the barcode image
-        detected_barcodes = pyzbar.decode(image)
+            # now, try parsing the ASN number
+            try:
+                asn = int(asn_text)
+            except ValueError as e:
+                logger.warning(f"Failed to parse ASN number because: {e}")
+
+        return asn
+
+    @staticmethod
+    def read_barcodes_zxing(image: Image) -> List[str]:
+        barcodes = []
 
-        if detected_barcodes:
-            # Traverse through all the detected barcodes in image
-            for barcode in detected_barcodes:
-                if barcode.data:
-                    decoded_barcode = barcode.data.decode("utf-8")
-                    barcodes.append(decoded_barcode)
-                    logger.debug(
-                        f"Barcode of type {str(barcode.type)} found: {decoded_barcode}",
-                    )
-    elif settings.CONSUMER_BARCODE_SCANNER == "ZXING":
-        logger.debug("Scanning for barcodes using ZXING")
         import zxingcpp
 
         detected_barcodes = zxingcpp.read_barcodes(image)
@@ -111,74 +124,92 @@ def barcode_reader(image: Image) -> List[str]:
                     f"Barcode of type {str(barcode.format)} found: {barcode.text}",
                 )
 
-    return barcodes
+        return barcodes
 
+    @staticmethod
+    def read_barcodes_pyzbar(image: Image) -> List[str]:
+        barcodes = []
 
-def convert_from_tiff_to_pdf(filepath: Path) -> Path:
-    """
-    converts a given TIFF image file to pdf into a temporary directory.
+        from pyzbar import pyzbar
 
-    Returns the new pdf file.
-    """
-    tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
-    # use old file name with pdf extension
-    newpath = Path(tempdir) / Path(filepath.name).with_suffix(".pdf")
-
-    with Image.open(filepath) as im:
-        has_alpha_layer = im.mode in ("RGBA", "LA")
-    if has_alpha_layer:
-        run(
-            [
-                settings.CONVERT_BINARY,
-                "-alpha",
-                "off",
-                filepath,
-                filepath,
-            ],
+        # Decode the barcode image
+        detected_barcodes = pyzbar.decode(image)
+
+        # Traverse through all the detected barcodes in image
+        for barcode in detected_barcodes:
+            if barcode.data:
+                decoded_barcode = barcode.data.decode("utf-8")
+                barcodes.append(decoded_barcode)
+                logger.debug(
+                    f"Barcode of type {str(barcode.type)} found: {decoded_barcode}",
+                )
+
+        return barcodes
+
+    def convert_from_tiff_to_pdf(self):
+        """
+        May convert a TIFF image into a PDF, if the input is a TIFF
+        """
+        # Nothing to do, pdf_file is already assigned correctly
+        if self.mime != "image/tiff":
+            return
+
+        with Image.open(self.file) as im:
+            has_alpha_layer = im.mode in ("RGBA", "LA")
+        if has_alpha_layer:
+            # Note the save into the temp folder, so as not to trigger a new
+            # consume
+            scratch_image = Path(self.temp_dir.name) / Path(self.file.name)
+            run(
+                [
+                    settings.CONVERT_BINARY,
+                    "-alpha",
+                    "off",
+                    self.file,
+                    scratch_image,
+                ],
+            )
+        else:
+            # Not modifying the original, safe to use in place
+            scratch_image = self.file
+
+        self.pdf_file = Path(self.temp_dir.name) / Path(self.file.name).with_suffix(
+            ".pdf",
         )
-    with filepath.open("rb") as img_file, newpath.open("wb") as pdf_file:
-        pdf_file.write(img2pdf.convert(img_file))
-    return newpath
 
+        with scratch_image.open("rb") as img_file, self.pdf_file.open("wb") as pdf_file:
+            pdf_file.write(img2pdf.convert(img_file))
 
-def scan_file_for_barcodes(
-    filepath: Path,
-    mime_type: str,
-) -> DocumentBarcodeInfo:
-    """
-    Scan the provided pdf file for any barcodes
-    Returns a PDF filepath and a list of
-    (page_number, barcode_text) tuples
-    """
+    def detect(self) -> None:
+        """
+        Scan all pages of the PDF as images, updating barcodes and the pages
+        found on as we go
+        """
+        # Bail if barcodes already exist
+        if self.barcodes:
+            return
+
+        # Choose the library for reading
+        if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
+            reader = self.read_barcodes_pyzbar
+            logger.debug("Scanning for barcodes using PYZBAR")
+        else:
+            reader = self.read_barcodes_zxing
+            logger.debug("Scanning for barcodes using ZXING")
 
-    def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
-        detected_barcodes = []
-        # use a temporary directory in case the file is too big to handle in memory
-        with tempfile.TemporaryDirectory() as path:
+        try:
             pages_from_path = convert_from_path(
-                pdf_filepath,
+                self.pdf_file,
                 dpi=300,
-                output_folder=path,
+                output_folder=self.temp_dir.name,
             )
+
             for current_page_number, page in enumerate(pages_from_path):
-                for barcode_value in barcode_reader(page):
-                    detected_barcodes.append(
+                for barcode_value in reader(page):
+                    self.barcodes.append(
                         Barcode(current_page_number, barcode_value),
                     )
-        return detected_barcodes
-
-    pdf_filepath = None
-    barcodes = []
-
-    if supported_file_type(mime_type):
-        pdf_filepath = filepath
-        if mime_type == "image/tiff":
-            pdf_filepath = convert_from_tiff_to_pdf(filepath)
 
-        # Always try pikepdf first, it's usually fine, faster and
-        # uses less memory
-        try:
-            barcodes = _pdf2image_barcode_scan(pdf_filepath)
         # Password protected files can't be checked
         # This is the exception raised for those
         except PDFPageCountError as e:
@@ -191,141 +222,130 @@ def scan_file_for_barcodes(
             logger.warning(
                 f"Exception during barcode scanning: {e}",
             )
-    else:
-        logger.warning(
-            f"Unsupported file format for barcode reader: {str(mime_type)}",
-        )
-
-    return DocumentBarcodeInfo(pdf_filepath, barcodes)
-
-
-def get_separating_barcodes(barcodes: List[Barcode]) -> Dict[int, bool]:
-    """
-    Search the parsed barcodes for separators
-    and returns a dict of page numbers, which
-    separate the file into new files, together
-    with the information whether to keep the page.
-    """
-    # filter all barcodes for the separator string
-    # get the page numbers of the separating barcodes
-    separator_pages = {bc.page: False for bc in barcodes if bc.is_separator}
-    if not settings.CONSUMER_ENABLE_ASN_BARCODE:
-        return separator_pages
-
-    # add the page numbers of the ASN barcodes
-    # (except for first page, that might lead to infinite loops).
-    return {
-        **separator_pages,
-        **{bc.page: True for bc in barcodes if bc.is_asn and bc.page != 0},
-    }
-
-
-def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
-    """
-    Search the parsed barcodes for any ASNs.
-    The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
-    is considered the ASN to be used.
-    Returns the detected ASN (or None)
-    """
-    asn = None
-
-    # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
-    asn_text = next(
-        (x.value for x in barcodes if x.is_asn),
-        None,
-    )
-
-    if asn_text:
-        logger.debug(f"Found ASN Barcode: {asn_text}")
-        # remove the prefix and remove whitespace
-        asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
-
-        # now, try parsing the ASN number
-        try:
-            asn = int(asn_text)
-        except ValueError as e:
-            logger.warning(f"Failed to parse ASN number because: {e}")
-
-    return asn
 
+    def get_separation_pages(self) -> Dict[int, bool]:
+        """
+        Search the parsed barcodes for separators and returns a dict of page
+        numbers, which separate the file into new files, together with the
+        information whether to keep the page.
+        """
+        # filter all barcodes for the separator string
+        # get the page numbers of the separating barcodes
+        separator_pages = {bc.page: False for bc in self.barcodes if bc.is_separator}
+        if not settings.CONSUMER_ENABLE_ASN_BARCODE:
+            return separator_pages
+
+        # add the page numbers of the ASN barcodes
+        # (except for first page, that might lead to infinite loops).
+        return {
+            **separator_pages,
+            **{bc.page: True for bc in self.barcodes if bc.is_asn and bc.page != 0},
+        }
+
+    def separate_pages(self, pages_to_split_on: Dict[int, bool]) -> List[Path]:
+        """
+        Separate the provided pdf file on the pages_to_split_on.
+        The pages which are defined by the keys in page_numbers
+        will be removed if the corresponding value is false.
+        Returns a list of (temporary) filepaths to consume.
+        These will need to be deleted later.
+        """
 
-def separate_pages(filepath: Path, pages_to_split_on: Dict[int, bool]) -> List[Path]:
-    """
-    Separate the provided pdf file on the pages_to_split_on.
-    The pages which are defined by the keys in page_numbers
-    will be removed if the corresponding value is false.
-    Returns a list of (temporary) filepaths to consume.
-    These will need to be deleted later.
-    """
+        document_paths = []
+        fname = self.file.with_suffix("").name
+        with Pdf.open(self.pdf_file) as input_pdf:
+            # Start with an empty document
+            current_document: List[Page] = []
+            # A list of documents, ie a list of lists of pages
+            documents: List[List[Page]] = [current_document]
+
+            for idx, page in enumerate(input_pdf.pages):
+                # Keep building the new PDF as long as it is not a
+                # separator index
+                if idx not in pages_to_split_on:
+                    current_document.append(page)
+                    continue
+
+                # This is a split index
+                # Start a new destination page listing
+                logger.debug(f"Starting new document at idx {idx}")
+                current_document = []
+                documents.append(current_document)
+                keep_page = pages_to_split_on[idx]
+                if keep_page:
+                    # Keep the page
+                    # (new document is started by asn barcode)
+                    current_document.append(page)
+
+            documents = [x for x in documents if len(x)]
+
+            logger.debug(f"Split into {len(documents)} new documents")
+
+            # Write the new documents out
+            for doc_idx, document in enumerate(documents):
+                dst = Pdf.new()
+                dst.pages.extend(document)
+
+                output_filename = f"{fname}_document_{doc_idx}.pdf"
+
+                logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
+                savepath = Path(self.temp_dir.name) / output_filename
+                with open(savepath, "wb") as out:
+                    dst.save(out)
+                document_paths.append(savepath)
+
+            return document_paths
+
+    def separate(
+        self,
+        source: DocumentSource,
+        override_name: Optional[str] = None,
+    ) -> bool:
+        """
+        Separates the document, based on barcodes and configuration, creating new
+        documents as required in the appropriate location.
 
-    document_paths = []
-
-    if not pages_to_split_on:
-        logger.warning("No pages to split on!")
-        return document_paths
-
-    os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
-    tempdir = Path(tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR))
-    fname = filepath.with_suffix("").name
-    pdf = Pdf.open(filepath)
-
-    # Start with an empty document
-    current_document: List[Page] = []
-    # A list of documents, ie a list of lists of pages
-    documents: List[List[Page]] = [current_document]
-
-    for idx, page in enumerate(pdf.pages):
-        # Keep building the new PDF as long as it is not a
-        # separator index
-        if idx not in pages_to_split_on:
-            current_document.append(page)
-            continue
-
-        # This is a split index
-        # Start a new destination page listing
-        logger.debug(f"Starting new document at idx {idx}")
-        current_document = []
-        documents.append(current_document)
-        keep_page = pages_to_split_on[idx]
-        if keep_page:
-            # Keep the page
-            # (new document is started by asn barcode)
-            current_document.append(page)
-
-    documents = [x for x in documents if len(x)]
-
-    logger.debug(f"Split into {len(documents)} new documents")
-
-    # Write the new documents out
-    for doc_idx, document in enumerate(documents):
-        dst = Pdf.new()
-        dst.pages.extend(document)
-
-        output_filename = f"{fname}_document_{doc_idx}.pdf"
-
-        logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
-        savepath = tempdir / output_filename
-        with open(savepath, "wb") as out:
-            dst.save(out)
-        document_paths.append(savepath)
-
-    return document_paths
-
-
-def save_to_dir(
-    filepath: Path,
-    newname: str = None,
-    target_dir: Path = settings.CONSUMPTION_DIR,
-):
-    """
-    Copies filepath to target_dir.
-    Optionally rename the file.
-    """
-    if filepath.is_file() and target_dir.is_dir():
-        dest = target_dir
-        if newname is not None:
-            dest = dest / newname
-        shutil.copy(filepath, dest)
-        logging.debug(f"saved {str(filepath)} to {str(dest)}")
-    else:
-        logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
+        Returns True if a split happened, False otherwise
+        """
+        # Do nothing
+        if not self.supported_mime_type:
+            logger.warning(f"Unsupported file format for barcode reader: {self.mime}")
+            return False
+
+        # Does nothing unless needed
+        self.convert_from_tiff_to_pdf()
+
+        # Actually read the codes, if any
+        self.detect()
+
+        separator_pages = self.get_separation_pages()
+
+        # Also do nothing
+        if not separator_pages:
+            logger.warning("No pages to split on!")
+            return False
+
+        # Create the split documents
+        doc_paths = self.separate_pages(separator_pages)
+
+        # Save the new documents to correct folder
+        if source != DocumentSource.ConsumeFolder:
+            # The given file is somewhere in SCRATCH_DIR,
+            # and new documents must be moved to the CONSUMPTION_DIR
+            # for the consumer to notice them
+            save_to_dir = settings.CONSUMPTION_DIR
+        else:
+            # The given file is somewhere in CONSUMPTION_DIR,
+            # and may be some levels down for recursive tagging
+            # so use the file's parent to preserve any metadata
+            save_to_dir = self.file.parent
+
+        for idx, document_path in enumerate(doc_paths):
+            if override_name is not None:
+                newname = f"{str(idx)}_{override_name}"
+                dest = save_to_dir / newname
+            else:
+                dest = save_to_dir
+            logger.info(f"Saving {document_path} to {dest}")
+            shutil.copy2(document_path, dest)
+        return True
index f51fa9828e22a77d79c5f1bf36cbc97d2202883e..1603a13590a3c4293cfa1740d05be4539fd55777 100644 (file)
@@ -16,16 +16,15 @@ from filelock import FileLock
 from redis.exceptions import ConnectionError
 from whoosh.writing import AsyncWriter
 
-from documents import barcodes
 from documents import index
 from documents import sanity_checker
+from documents.barcodes import BarcodeReader
 from documents.classifier import DocumentClassifier
 from documents.classifier import load_classifier
 from documents.consumer import Consumer
 from documents.consumer import ConsumerError
 from documents.data_models import ConsumableDocument
 from documents.data_models import DocumentMetadataOverrides
-from documents.data_models import DocumentSource
 from documents.file_handling import create_source_path_directory
 from documents.file_handling import generate_unique_filename
 from documents.models import Correspondent
@@ -96,95 +95,39 @@ def consume_file(
 
     # read all barcodes in the current document
     if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE:
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            input_doc.original_file,
-            input_doc.mime_type,
-        )
-
-        # split document by separator pages, if enabled
-        if settings.CONSUMER_ENABLE_BARCODES:
-            separators = barcodes.get_separating_barcodes(doc_barcode_info.barcodes)
-
-            if len(separators) > 0:
-                logger.debug(
-                    f"Pages with separators found in: {input_doc.original_file}",
-                )
-                document_list = barcodes.separate_pages(
-                    doc_barcode_info.pdf_path,
-                    separators,
-                )
-
-                if document_list:
-                    # If the file is an upload, it's in the scratch directory
-                    # Move it to consume directory to be picked up
-                    # Otherwise, use the current parent to keep possible tags
-                    # from subdirectories
-                    if input_doc.source != DocumentSource.ConsumeFolder:
-                        save_to_dir = settings.CONSUMPTION_DIR
-                    else:
-                        # Note this uses the original file, because it's in the
-                        # consume folder already and may include additional path
-                        # components for tagging
-                        # the .path is somewhere in scratch in this case
-                        save_to_dir = input_doc.original_file.parent
-
-                    for n, document in enumerate(document_list):
-                        # save to consumption dir
-                        # rename it to the original filename  with number prefix
-                        if overrides.filename is not None:
-                            newname = f"{str(n)}_{overrides.filename}"
-                        else:
-                            newname = None
-
-                        barcodes.save_to_dir(
-                            document,
-                            newname=newname,
-                            target_dir=save_to_dir,
-                        )
-
-                        # Split file has been copied safely, remove it
-                        document.unlink()
-
-                    # And clean up the directory as well, now it's empty
-                    shutil.rmtree(document_list[0].parent)
-
-                    # This file has been split into multiple files without issue
-                    # remove the original and working copy
-                    input_doc.original_file.unlink()
-
-                    # If the original file was a TIFF, remove the PDF generated from it
-                    if input_doc.mime_type == "image/tiff":
-                        logger.debug(
-                            f"Deleting file {doc_barcode_info.pdf_path}",
-                        )
-                        doc_barcode_info.pdf_path.unlink()
-
-                    # notify the sender, otherwise the progress bar
-                    # in the UI stays stuck
-                    payload = {
-                        "filename": overrides.filename or input_doc.original_file.name,
-                        "task_id": None,
-                        "current_progress": 100,
-                        "max_progress": 100,
-                        "status": "SUCCESS",
-                        "message": "finished",
-                    }
-                    try:
-                        async_to_sync(get_channel_layer().group_send)(
-                            "status_updates",
-                            {"type": "status_update", "data": payload},
-                        )
-                    except ConnectionError as e:
-                        logger.warning(f"ConnectionError on status send: {str(e)}")
-                    # consuming stops here, since the original document with
-                    # the barcodes has been split and will be consumed separately
-                    return "File successfully split"
-
-        # try reading the ASN from barcode
-        if settings.CONSUMER_ENABLE_ASN_BARCODE:
-            overrides.asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
-            if overrides.asn:
-                logger.info(f"Found ASN in barcode: {overrides.asn}")
+        with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader:
+            if settings.CONSUMER_ENABLE_BARCODES and reader.separate(
+                input_doc.source,
+                overrides.filename,
+            ):
+                # notify the sender, otherwise the progress bar
+                # in the UI stays stuck
+                payload = {
+                    "filename": overrides.filename or input_doc.original_file.name,
+                    "task_id": None,
+                    "current_progress": 100,
+                    "max_progress": 100,
+                    "status": "SUCCESS",
+                    "message": "finished",
+                }
+                try:
+                    async_to_sync(get_channel_layer().group_send)(
+                        "status_updates",
+                        {"type": "status_update", "data": payload},
+                    )
+                except ConnectionError as e:
+                    logger.warning(f"ConnectionError on status send: {str(e)}")
+                # consuming stops here, since the original document with
+                # the barcodes has been split and will be consumed separately
+
+                input_doc.original_file.unlink()
+                return "File successfully split"
+
+            # try reading the ASN from barcode
+            if settings.CONSUMER_ENABLE_ASN_BARCODE:
+                overrides.asn = reader.asn
+                if overrides.asn:
+                    logger.info(f"Found ASN in barcode: {overrides.asn}")
 
     # continue with consumption if no barcode was found
     document = Consumer().try_consume_file(
diff --git a/src/documents/tests/samples/barcodes/barcode-128-PATCHT.png b/src/documents/tests/samples/barcodes/barcode-128-PATCHT.png
deleted file mode 100644 (file)
index 80517d5..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-128-PATCHT.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-128-custom.png b/src/documents/tests/samples/barcodes/barcode-128-custom.png
deleted file mode 100644 (file)
index c3f1b80..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-128-custom.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-39-PATCHT-distortion.png b/src/documents/tests/samples/barcodes/barcode-39-PATCHT-distortion.png
deleted file mode 100644 (file)
index 3f858f6..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-39-PATCHT-distortion.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-39-PATCHT-distortion2.png b/src/documents/tests/samples/barcodes/barcode-39-PATCHT-distortion2.png
deleted file mode 100644 (file)
index cc81f8e..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-39-PATCHT-distortion2.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-39-PATCHT-unreadable.png b/src/documents/tests/samples/barcodes/barcode-39-PATCHT-unreadable.png
deleted file mode 100644 (file)
index 1e24b4d..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-39-PATCHT-unreadable.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-39-PATCHT.png b/src/documents/tests/samples/barcodes/barcode-39-PATCHT.png
deleted file mode 100644 (file)
index 0078026..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-39-PATCHT.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-39-asn-123.png b/src/documents/tests/samples/barcodes/barcode-39-asn-123.png
deleted file mode 100644 (file)
index e0f7359..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-39-asn-123.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-39-asn-custom-prefix.png b/src/documents/tests/samples/barcodes/barcode-39-asn-custom-prefix.png
deleted file mode 100644 (file)
index 5712c69..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-39-asn-custom-prefix.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-39-asn-invalid.png b/src/documents/tests/samples/barcodes/barcode-39-asn-invalid.png
deleted file mode 100644 (file)
index cc7f0d4..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-39-asn-invalid.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-39-custom.png b/src/documents/tests/samples/barcodes/barcode-39-custom.png
deleted file mode 100644 (file)
index 5c2d7b4..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-39-custom.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/barcode-qr-custom.png b/src/documents/tests/samples/barcodes/barcode-qr-custom.png
deleted file mode 100644 (file)
index 6574638..0000000
Binary files a/src/documents/tests/samples/barcodes/barcode-qr-custom.png and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/patch-code-t-middle-alpha.tiff b/src/documents/tests/samples/barcodes/patch-code-t-middle-alpha.tiff
new file mode 100644 (file)
index 0000000..c8ee6f0
Binary files /dev/null and b/src/documents/tests/samples/barcodes/patch-code-t-middle-alpha.tiff differ
diff --git a/src/documents/tests/samples/barcodes/patch-code-t-middle-distorted.pdf b/src/documents/tests/samples/barcodes/patch-code-t-middle-distorted.pdf
new file mode 100755 (executable)
index 0000000..1a88b5a
Binary files /dev/null and b/src/documents/tests/samples/barcodes/patch-code-t-middle-distorted.pdf differ
diff --git a/src/documents/tests/samples/barcodes/patch-code-t-middle-fuzzy.pdf b/src/documents/tests/samples/barcodes/patch-code-t-middle-fuzzy.pdf
new file mode 100755 (executable)
index 0000000..01f4c08
Binary files /dev/null and b/src/documents/tests/samples/barcodes/patch-code-t-middle-fuzzy.pdf differ
diff --git a/src/documents/tests/samples/barcodes/patch-code-t-middle-unreadable.pdf b/src/documents/tests/samples/barcodes/patch-code-t-middle-unreadable.pdf
new file mode 100755 (executable)
index 0000000..a675806
Binary files /dev/null and b/src/documents/tests/samples/barcodes/patch-code-t-middle-unreadable.pdf differ
diff --git a/src/documents/tests/samples/barcodes/patch-code-t.pbm b/src/documents/tests/samples/barcodes/patch-code-t.pbm
deleted file mode 100644 (file)
index 7e72140..0000000
Binary files a/src/documents/tests/samples/barcodes/patch-code-t.pbm and /dev/null differ
diff --git a/src/documents/tests/samples/barcodes/qr-code-PATCHT.png b/src/documents/tests/samples/barcodes/qr-code-PATCHT.png
deleted file mode 100644 (file)
index 6f1d587..0000000
Binary files a/src/documents/tests/samples/barcodes/qr-code-PATCHT.png and /dev/null differ
index 5a6c3edf91ec4044a32520a4b8702ca985248c9a..8386712560db6ba0066d52041024d8e24f0c2ebd 100644 (file)
@@ -6,10 +6,9 @@ import pytest
 from django.conf import settings
 from django.test import TestCase
 from django.test import override_settings
-from PIL import Image
 
-from documents import barcodes
 from documents import tasks
+from documents.barcodes import BarcodeReader
 from documents.consumer import ConsumerError
 from documents.data_models import ConsumableDocument
 from documents.data_models import DocumentSource
@@ -30,178 +29,6 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 
     BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
 
-    def test_barcode_reader_png(self):
-        """
-        GIVEN:
-            - PNG file with separator barcode
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT.png"
-        img = Image.open(test_file)
-        separator_barcode = settings.CONSUMER_BARCODE_STRING
-        self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
-
-    def test_barcode_reader_pbm(self):
-        """
-        GIVEN:
-            - Netpbm bitmap file with separator barcode
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pbm"
-
-        img = Image.open(test_file)
-        separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
-        self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
-
-    def test_barcode_reader_distortion_scratchy(self):
-        """
-        GIVEN:
-            - Image containing high noise
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion.png"
-        img = Image.open(test_file)
-        separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
-        self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
-
-    def test_barcode_reader_distortion_stretched(self):
-        """
-        GIVEN:
-            - Image with a stretched barcode
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion2.png"
-        img = Image.open(test_file)
-        separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
-        self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
-
-    def test_barcode_reader_unreadable(self):
-        """
-        GIVEN:
-            - Image with a truly unreadable barcode
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - No barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-unreadable.png"
-        img = Image.open(test_file)
-        self.assertEqual(barcodes.barcode_reader(img), [])
-
-    def test_barcode_reader_qr(self):
-        """
-        GIVEN:
-            - Image file with QR separator barcode
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "qr-code-PATCHT.png"
-        img = Image.open(test_file)
-        separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
-        self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
-
-    def test_barcode_reader_128(self):
-        """
-        GIVEN:
-            - Image file with 128 style separator barcode
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-PATCHT.png"
-
-        img = Image.open(test_file)
-        separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
-        self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
-
-    def test_barcode_reader_no_barcode(self):
-        """
-        GIVEN:
-            - Image file with no barcode
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - No barcode is detected
-        """
-        test_file = self.SAMPLE_DIR / "simple.png"
-        img = Image.open(test_file)
-        self.assertListEqual(barcodes.barcode_reader(img), [])
-
-    def test_barcode_reader_custom_separator(self):
-        """
-        GIVEN:
-            - Image file with custom separator barcode value
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.png"
-
-        img = Image.open(test_file)
-        self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
-
-    def test_barcode_reader_custom_qr_separator(self):
-        """
-        GIVEN:
-            - Image file with custom separator barcode value as a QR code
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.png"
-
-        img = Image.open(test_file)
-        self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
-
-    def test_barcode_reader_custom_128_separator(self):
-        """
-        GIVEN:
-            - Image file with custom separator 128 barcode value
-        WHEN:
-            - Image is scanned for codes
-        THEN:
-            - The barcode is detected
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png"
-
-        img = Image.open(test_file)
-        self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
-
-    def test_convert_from_tiff_to_pdf(self):
-        """
-        GIVEN:
-            - Multi-page TIFF image
-        WHEN:
-            - Conversion to PDF
-        THEN:
-            - The file converts without error
-        """
-        test_file = self.SAMPLE_DIR / "simple.tiff"
-
-        dst = settings.SCRATCH_DIR / "simple.tiff"
-        shutil.copy(test_file, dst)
-        target_file = barcodes.convert_from_tiff_to_pdf(dst)
-
-        self.assertIsFile(target_file)
-        self.assertEqual(target_file.suffix, ".pdf")
-
     def test_scan_file_for_separating_barcodes(self):
         """
         GIVEN:
@@ -213,16 +40,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {0: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {0: False})
 
     def test_scan_file_for_separating_barcodes_none_present(self):
         """
@@ -235,16 +58,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             - No pages to split on
         """
         test_file = self.SAMPLE_DIR / "simple.pdf"
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {})
 
     def test_scan_file_for_separating_barcodes_middle_page(self):
         """
@@ -257,16 +76,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {1: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {1: False})
 
     def test_scan_file_for_separating_barcodes_multiple_pages(self):
         """
@@ -279,39 +94,56 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {2: False, 5: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {2: False, 5: False})
 
-    def test_scan_file_for_separating_barcodes_upside_down(self):
+    def test_scan_file_for_separating_barcodes_hard_to_detect(self):
         """
         GIVEN:
             - PDF file containing a separator on page 1 (zero indexed)
-            - The barcode is upside down
+            - The barcode is upside down, fuzzy or distorted
         WHEN:
             - File is scanned for barcodes
         THEN:
             - Barcode is detected on page 1 (zero indexed)
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle_reverse.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        for test_file in [
+            "patch-code-t-middle-reverse.pdf",
+            "patch-code-t-middle-distorted.pdf",
+            "patch-code-t-middle-fuzzy.pdf",
+        ]:
+            test_file = self.BARCODE_SAMPLE_DIR / test_file
+
+            with BarcodeReader(test_file, "application/pdf") as reader:
+                reader.detect()
+                separator_page_numbers = reader.get_separation_pages()
+
+                self.assertEqual(reader.pdf_file, test_file)
+                self.assertDictEqual(separator_page_numbers, {1: False})
+
+    def test_scan_file_for_separating_barcodes_unreadable(self):
+        """
+        GIVEN:
+            - PDF file containing a separator on page 1 (zero indexed)
+            - The barcode is not readable
+        WHEN:
+            - File is scanned for barcodes
+        THEN:
+            - Barcode is detected on page 1 (zero indexed)
+        """
+        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf"
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {1: False})
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
+
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {})
 
     def test_scan_file_for_separating_barcodes_fax_decode(self):
         """
@@ -324,16 +156,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {1: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {1: False})
 
     def test_scan_file_for_separating_qr_barcodes(self):
         """
@@ -347,16 +175,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {0: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {0: False})
 
     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
     def test_scan_file_for_separating_custom_barcodes(self):
@@ -371,16 +195,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {0: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {0: False})
 
     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
     def test_scan_file_for_separating_custom_qr_barcodes(self):
@@ -396,16 +216,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {0: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {0: False})
 
     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
     def test_scan_file_for_separating_custom_128_barcodes(self):
@@ -421,16 +237,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {0: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {0: False})
 
     def test_scan_file_for_separating_wrong_qr_barcodes(self):
         """
@@ -445,16 +257,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(separator_page_numbers, {})
 
     @override_settings(CONSUMER_BARCODE_STRING="ADAR-NEXTDOC")
     def test_scan_file_qr_barcodes_was_problem(self):
@@ -468,16 +276,13 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertGreater(len(doc_barcode_info.barcodes), 0)
-        self.assertDictEqual(separator_page_numbers, {1: False})
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertGreater(len(reader.barcodes), 0)
+            self.assertDictEqual(separator_page_numbers, {1: False})
 
     def test_separate_pages(self):
         """
@@ -490,9 +295,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
 
-        documents = barcodes.separate_pages(test_file, {1: False})
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            documents = reader.separate_pages({1: False})
 
-        self.assertEqual(len(documents), 2)
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertEqual(len(documents), 2)
 
     def test_separate_pages_double_code(self):
         """
@@ -505,9 +312,10 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
 
-        pages = barcodes.separate_pages(test_file, {1: False, 2: False})
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            documents = reader.separate_pages({1: False, 2: False})
 
-        self.assertEqual(len(pages), 2)
+            self.assertEqual(len(documents), 2)
 
     def test_separate_pages_no_list(self):
         """
@@ -519,111 +327,137 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             - No new documents are produced
             - A warning is logged
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
+        test_file = self.SAMPLE_DIR / "simple.pdf"
 
         with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
-            pages = barcodes.separate_pages(test_file, {})
-            self.assertEqual(pages, [])
-            self.assertEqual(
-                cm.output,
-                [
-                    "WARNING:paperless.barcodes:No pages to split on!",
-                ],
-            )
+            with BarcodeReader(test_file, "application/pdf") as reader:
+                success = reader.separate(DocumentSource.ApiUpload)
+                self.assertFalse(success)
+                self.assertEqual(
+                    cm.output,
+                    [
+                        "WARNING:paperless.barcodes:No pages to split on!",
+                    ],
+                )
 
-    def test_save_to_dir(self):
+    def test_save_to_dir_given_name(self):
         """
         GIVEN:
             - File to save to a directory
+            - There is a name override
         WHEN:
             - The file is saved
         THEN:
             - The file exists
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
+        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.separate(DocumentSource.ApiUpload, "newname.pdf")
 
-        barcodes.save_to_dir(test_file, target_dir=settings.SCRATCH_DIR)
-        target_file = settings.SCRATCH_DIR / "patch-code-t.pdf"
-        self.assertIsFile(target_file)
+            self.assertEqual(reader.pdf_file, test_file)
+            target_file1 = settings.CONSUMPTION_DIR / "0_newname.pdf"
+            target_file2 = settings.CONSUMPTION_DIR / "1_newname.pdf"
+            self.assertIsFile(target_file1)
+            self.assertIsFile(target_file2)
 
-    def test_save_to_dir_not_existing(self):
+    def test_barcode_splitter_api_upload(self):
         """
         GIVEN:
-            - File to save to a directory
-            - The directory doesn't exist
+            - Input file containing barcodes
         WHEN:
-            - The file is saved
+            - Input file is split on barcodes
         THEN:
-            - The file exists
+            - Correct number of files produced
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
+        sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
+        test_file = settings.SCRATCH_DIR / "patch-code-t-middle.pdf"
+        shutil.copy(sample_file, test_file)
 
-        nonexistingdir = Path("/nowhere")
-        self.assertIsNotDir(nonexistingdir)
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.separate(DocumentSource.ApiUpload)
 
-        with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
-            barcodes.save_to_dir(test_file, target_dir=nonexistingdir)
-        self.assertEqual(
-            cm.output,
-            [
-                f"WARNING:paperless.barcodes:{str(test_file)} or {str(nonexistingdir)} don't exist.",
-            ],
-        )
+            self.assertEqual(reader.pdf_file, test_file)
 
-    def test_save_to_dir_given_name(self):
+            target_file1 = (
+                settings.CONSUMPTION_DIR / "patch-code-t-middle_document_0.pdf"
+            )
+
+            target_file2 = (
+                settings.CONSUMPTION_DIR / "patch-code-t-middle_document_1.pdf"
+            )
+
+            self.assertIsFile(target_file1)
+            self.assertIsFile(target_file2)
+
+    def test_barcode_splitter_consume_dir(self):
         """
         GIVEN:
-            - File to save to a directory
-            - There is a name override
+            - Input file containing barcodes
         WHEN:
-            - The file is saved
+            - Input file is split on barcodes
         THEN:
-            - The file exists
+            - Correct number of files produced
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
+        sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
+        test_file = settings.CONSUMPTION_DIR / "patch-code-t-middle.pdf"
+        shutil.copy(sample_file, test_file)
 
-        barcodes.save_to_dir(
-            test_file,
-            newname="newname.pdf",
-            target_dir=settings.SCRATCH_DIR,
-        )
-        target_file = settings.SCRATCH_DIR / "newname.pdf"
-        self.assertIsFile(target_file)
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            reader.separate(DocumentSource.ConsumeFolder)
 
-    def test_barcode_splitter(self):
+            self.assertEqual(reader.pdf_file, test_file)
+
+            target_file1 = (
+                settings.CONSUMPTION_DIR / "patch-code-t-middle_document_0.pdf"
+            )
+
+            target_file2 = (
+                settings.CONSUMPTION_DIR / "patch-code-t-middle_document_1.pdf"
+            )
+
+            self.assertIsFile(target_file1)
+            self.assertIsFile(target_file2)
+
+    def test_barcode_splitter_consume_dir_recursive(self):
         """
         GIVEN:
             - Input file containing barcodes
+            - Input file is within a directory structure of the consume folder
         WHEN:
             - Input file is split on barcodes
         THEN:
             - Correct number of files produced
+            - Output files are within the same directory structure
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
-
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
+        sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
+        test_file = (
+            settings.CONSUMPTION_DIR / "tag1" / "tag2" / "patch-code-t-middle.pdf"
         )
+        test_file.parent.mkdir(parents=True)
+        shutil.copy(sample_file, test_file)
 
-        self.assertEqual(test_file, doc_barcode_info.pdf_path)
-        self.assertTrue(len(separator_page_numbers) > 0)
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.separate(DocumentSource.ConsumeFolder)
 
-        document_list = barcodes.separate_pages(test_file, separator_page_numbers)
-        self.assertGreater(len(document_list), 0)
+            self.assertEqual(reader.pdf_file, test_file)
 
-        for document in document_list:
-            barcodes.save_to_dir(document, target_dir=settings.SCRATCH_DIR)
-
-        target_file1 = settings.SCRATCH_DIR / "patch-code-t-middle_document_0.pdf"
+            target_file1 = (
+                settings.CONSUMPTION_DIR
+                / "tag1"
+                / "tag2"
+                / "patch-code-t-middle_document_0.pdf"
+            )
 
-        target_file2 = settings.SCRATCH_DIR / "patch-code-t-middle_document_1.pdf"
+            target_file2 = (
+                settings.CONSUMPTION_DIR
+                / "tag1"
+                / "tag2"
+                / "patch-code-t-middle_document_1.pdf"
+            )
 
-        self.assertIsFile(target_file1)
-        self.assertIsFile(target_file2)
+            self.assertIsFile(target_file1)
+            self.assertIsFile(target_file2)
 
     @override_settings(CONSUMER_ENABLE_BARCODES=True)
     def test_consume_barcode_file(self):
@@ -681,7 +515,39 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
                 ),
                 "File successfully split",
             )
-        self.assertFalse(dst.exists())
+        self.assertIsNotFile(dst)
+
+    @override_settings(
+        CONSUMER_ENABLE_BARCODES=True,
+        CONSUMER_BARCODE_TIFF_SUPPORT=True,
+    )
+    def test_consume_barcode_tiff_file_with_alpha(self):
+        """
+        GIVEN:
+            - TIFF image containing barcodes
+            - TIFF image has an alpha layer
+        WHEN:
+            - Consume task handles the alpha layer and returns
+        THEN:
+            - The file was split without issue
+        """
+        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
+
+        dst = settings.SCRATCH_DIR / "patch-code-t-middle.tiff"
+        shutil.copy(test_file, dst)
+
+        with mock.patch("documents.tasks.async_to_sync"):
+            self.assertEqual(
+                tasks.consume_file(
+                    ConsumableDocument(
+                        source=DocumentSource.ConsumeFolder,
+                        original_file=dst,
+                    ),
+                    None,
+                ),
+                "File successfully split",
+            )
+        self.assertIsNotFile(dst)
 
     @override_settings(
         CONSUMER_ENABLE_BARCODES=True,
@@ -760,7 +626,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
                 ),
                 "File successfully split",
             )
-        self.assertFalse(dst.exists())
+        self.assertIsNotFile(dst)
 
     def test_scan_file_for_separating_barcodes_password(self):
         """
@@ -773,20 +639,16 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.SAMPLE_DIR / "password-is-test.pdf"
         with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
-            doc_barcode_info = barcodes.scan_file_for_barcodes(
-                test_file,
-                "application/pdf",
-            )
-            warning = cm.output[0]
-            expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
-            self.assertTrue(warning.startswith(expected_str))
+            with BarcodeReader(test_file, "application/pdf") as reader:
+                reader.detect()
+                warning = cm.output[0]
+                expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
+                self.assertTrue(warning.startswith(expected_str))
 
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+                separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertDictEqual(separator_page_numbers, {})
+                self.assertEqual(reader.pdf_file, test_file)
+                self.assertDictEqual(separator_page_numbers, {})
 
     @override_settings(
         CONSUMER_ENABLE_BARCODES=True,
@@ -803,28 +665,27 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
 
-        self.assertEqual(test_file, doc_barcode_info.pdf_path)
-        self.assertDictEqual(
-            separator_page_numbers,
-            {
-                2: False,
-                4: True,
-                5: True,
-                8: True,
-                10: True,
-            },
-        )
+            self.assertEqual(
+                reader.pdf_file,
+                test_file,
+            )
+            self.assertDictEqual(
+                separator_page_numbers,
+                {
+                    2: False,
+                    4: True,
+                    5: True,
+                    8: True,
+                    10: True,
+                },
+            )
 
-        document_list = barcodes.separate_pages(test_file, separator_page_numbers)
-        self.assertEqual(len(document_list), 6)
+            document_list = reader.separate_pages(separator_page_numbers)
+            self.assertEqual(len(document_list), 6)
 
     @override_settings(
         CONSUMER_ENABLE_BARCODES=True,
@@ -841,27 +702,23 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        separator_page_numbers = barcodes.get_separating_barcodes(
-            doc_barcode_info.barcodes,
-        )
-
-        self.assertEqual(test_file, doc_barcode_info.pdf_path)
-        self.assertDictEqual(
-            separator_page_numbers,
-            {
-                2: True,
-                3: True,
-                6: True,
-                8: True,
-            },
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            reader.detect()
+            separator_page_numbers = reader.get_separation_pages()
+
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertDictEqual(
+                separator_page_numbers,
+                {
+                    2: True,
+                    3: True,
+                    6: True,
+                    8: True,
+                },
+            )
 
-        document_list = barcodes.separate_pages(test_file, separator_page_numbers)
-        self.assertEqual(len(document_list), 5)
+            document_list = reader.separate_pages(separator_page_numbers)
+            self.assertEqual(len(document_list), 5)
 
 
 class TestAsnBarcode(DirectoriesMixin, TestCase):
@@ -869,74 +726,60 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
 
     BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
 
-    def test_barcode_reader_asn_normal(self):
+    @override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
+    def test_scan_file_for_asn_custom_prefix(self):
         """
         GIVEN:
-            - Image containing standard ASNxxxxx barcode
+            - PDF containing an ASN barcode with custom prefix
+            - The ASN value is 123
         WHEN:
-            - Image is scanned for barcodes
+            - File is scanned for barcodes
         THEN:
-            - The barcode is located
-            - The barcode value is correct
+            - The ASN is located
+            - The ASN integer value is correct
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.png"
+        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            asn = reader.asn
 
-        img = Image.open(test_file)
-        self.assertEqual(barcodes.barcode_reader(img), ["ASN00123"])
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertEqual(asn, 123)
 
-    def test_barcode_reader_asn_invalid(self):
+    def test_scan_file_for_asn_barcode(self):
         """
         GIVEN:
-            - Image containing invalid ASNxxxxx barcode
-            - The number portion of the ASN is not a number
+            - PDF containing an ASN barcode
+            - The ASN value is 123
         WHEN:
-            - Image is scanned for barcodes
+            - File is scanned for barcodes
         THEN:
-            - The barcode is located
-            - The barcode value is correct
+            - The ASN is located
+            - The ASN integer value is correct
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.png"
+        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
 
-        img = Image.open(test_file)
-        self.assertEqual(barcodes.barcode_reader(img), ["ASNXYZXYZ"])
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            asn = reader.asn
 
-    def test_barcode_reader_asn_custom_prefix(self):
-        """
-        GIVEN:
-            - Image containing custom prefix barcode
-        WHEN:
-            - Image is scanned for barcodes
-        THEN:
-            - The barcode is located
-            - The barcode value is correct
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.png"
-
-        img = Image.open(test_file)
-        self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM-PREFIX-00123"])
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertEqual(asn, 123)
 
-    @override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
-    def test_scan_file_for_asn_custom_prefix(self):
+    def test_scan_file_for_asn_not_existing(self):
         """
         GIVEN:
-            - PDF containing an ASN barcode with custom prefix
-            - The ASN value is 123
+            - PDF without an ASN barcode
         WHEN:
             - File is scanned for barcodes
         THEN:
-            - The ASN is located
-            - The ASN integer value is correct
+            - No ASN is retrieved from the document
         """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
+        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            asn = reader.asn
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertEqual(asn, 123)
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertEqual(asn, None)
 
     def test_scan_file_for_asn_barcode_invalid(self):
         """
@@ -951,15 +794,13 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
         """
         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
 
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
+        with BarcodeReader(test_file, "application/pdf") as reader:
+            asn = reader.asn
 
-        asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
+            self.assertEqual(reader.pdf_file, test_file)
 
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertEqual(asn, None)
+            self.assertEqual(reader.pdf_file, test_file)
+            self.assertEqual(asn, None)
 
     @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
     def test_consume_barcode_file_asn_assignment(self):
@@ -992,48 +833,6 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
 
             self.assertEqual(kwargs["override_asn"], 123)
 
-    def test_scan_file_for_asn_barcode(self):
-        """
-        GIVEN:
-            - PDF containing an ASN barcode
-            - The ASN value is 123
-        WHEN:
-            - File is scanned for barcodes
-        THEN:
-            - The ASN is located
-            - The ASN integer value is correct
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
-
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
-
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertEqual(asn, 123)
-
-    def test_scan_file_for_asn_not_existing(self):
-        """
-        GIVEN:
-            - PDF without an ASN barcode
-        WHEN:
-            - File is scanned for barcodes
-        THEN:
-            - No ASN is retrieved from the document
-        """
-        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
-
-        doc_barcode_info = barcodes.scan_file_for_barcodes(
-            test_file,
-            "application/pdf",
-        )
-        asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
-
-        self.assertEqual(doc_barcode_info.pdf_path, test_file)
-        self.assertEqual(asn, None)
-
     @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
     def test_asn_too_large(self):
         """