]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Feature: Parse ASN from barcode
authorPeter Kappelt <kappelt.peter@gmail.com>
Sun, 15 Jan 2023 14:55:00 +0000 (15:55 +0100)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 24 Jan 2023 17:43:52 +0000 (09:43 -0800)
ASN-Barcodes are identified by a configurable prefix

src/documents/barcodes.py
src/documents/consumer.py
src/documents/tasks.py
src/paperless/settings.py

index 43c48046a3baddc850eb3812aa0fc5efa486eb87..638dfed6e6c34c2c7bea794d314c1d5e28c4c80e 100644 (file)
@@ -293,3 +293,36 @@ def save_to_dir(
             os.rename(dst, dst_new)
     else:
         logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
+
+
+def scan_file_for_asn_barcode(filepath: str) -> Tuple[Optional[str], Optional[int]]:
+    """
+    Scan the provided pdf file for barcodes that contain the ASN
+    for this document.
+    The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
+    is considered the ASN to be used.
+    Returns a PDF filepath and the detected ASN (or None)
+    """
+    asn = None
+
+    pdf_filepath, barcodes = scan_file_for_barcodes(filepath)
+    # only the barcode text is important here -> discard the page number
+    barcodes = [text for _, text in barcodes]
+    # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
+    asn_text = next(
+        (x for x in barcodes if x.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX))
+    )
+
+    logger.debug(f"Found ASN Barcode: {asn_text}")
+
+    if asn_text:
+        # remove the prefix and remove whitespace
+        asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
+
+        # now, try parsing the ASN number
+        try:
+            asn = int(asn_text)
+        except ValueError as e:
+            logger.warn(f"Failed to parse ASN number because: {e}")
+
+    return pdf_filepath, asn
index b46b3a6839b68862be8114e6c52fe6b72b0fe62e..6f42a692c79fdf906f9da53b9233447cac61e821 100644 (file)
@@ -98,6 +98,7 @@ class Consumer(LoggingMixin):
         self.override_correspondent_id = None
         self.override_tag_ids = None
         self.override_document_type_id = None
+        self.override_asn = None
         self.task_id = None
 
         self.channel_layer = get_channel_layer()
@@ -130,6 +131,20 @@ class Consumer(LoggingMixin):
         os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
         os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
 
+    def pre_check_asn_unique(self):
+        """
+        Check that if override_asn is given, it is unique
+        """
+        if not self.override_asn:
+            # check not necessary in case no ASN gets set
+            return
+        if Document.objects.filter(archive_serial_number=self.override_asn).exists():
+            self.log(
+                "warning",
+                f"A document with ASN {self.override_asn} already exists. No ASN will be set!",
+            )
+            self.override_asn = None
+
     def run_pre_consume_script(self):
         if not settings.PRE_CONSUME_SCRIPT:
             return
@@ -255,6 +270,7 @@ class Consumer(LoggingMixin):
         override_tag_ids=None,
         task_id=None,
         override_created=None,
+        override_asn=None,
     ) -> Document:
         """
         Return the document object if it was successfully created.
@@ -268,6 +284,7 @@ class Consumer(LoggingMixin):
         self.override_tag_ids = override_tag_ids
         self.task_id = task_id or str(uuid.uuid4())
         self.override_created = override_created
+        self.override_asn = override_asn
 
         self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
 
@@ -281,6 +298,7 @@ class Consumer(LoggingMixin):
         self.pre_check_file_exists()
         self.pre_check_directories()
         self.pre_check_duplicate()
+        self.pre_check_asn_unique()
 
         self.log("info", f"Consuming {self.filename}")
 
@@ -526,6 +544,9 @@ class Consumer(LoggingMixin):
             for tag_id in self.override_tag_ids:
                 document.tags.add(Tag.objects.get(pk=tag_id))
 
+        if self.override_asn:
+            document.archive_serial_number = self.override_asn
+
     def _write(self, storage_type, source, target):
         with open(source, "rb") as read_file:
             with open(target, "wb") as write_file:
index 0168b42ba13cd126c7f4a03aa26ea35e27f002bd..1b7f15d5a2256ba50ec1b38d295e67363920a315 100644 (file)
@@ -175,6 +175,13 @@ def consume_file(
                 # the barcodes has been split and will be consumed separately
                 return "File successfully split"
 
+    # try reading ASN barcodes
+    asn = None
+    if settings.CONSUMER_ENABLE_ASN_BARCODE:
+        _, asn = barcodes.scan_file_for_asn_barcode(path)
+        if asn:
+            logger.info(f"Using ASN {asn} from barcode")
+
     # continue with consumption if no barcode was found
     document = Consumer().try_consume_file(
         path,
@@ -185,6 +192,7 @@ def consume_file(
         override_tag_ids=override_tag_ids,
         task_id=task_id,
         override_created=override_created,
+        override_asn=asn
     )
 
     if document:
index c5bb4801c01b964e8608cec3ca5c81f9aa181644..cf119ea8a0ad6520cfe449c6e8da951463d4d1e4 100644 (file)
@@ -657,6 +657,16 @@ CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
     "PATCHT",
 )
 
+CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
+    "PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
+)
+
+CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
+    "PAPERLESS_CONSUMER_ASN_BARCODE_PREFIX",
+    "ASN",
+)
+
+
 OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
 
 # The default language that tesseract will attempt to use when parsing