os.rename(dst, dst_new)
else:
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
+
+
+def scan_file_for_asn_barcode(filepath: str) -> Tuple[Optional[str], Optional[int]]:
+ """
+ Scan the provided pdf file for barcodes that contain the ASN
+ for this document.
+ The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
+ is considered the ASN to be used.
+ Returns a PDF filepath and the detected ASN (or None)
+ """
+ asn = None
+
+ pdf_filepath, barcodes = scan_file_for_barcodes(filepath)
+ # only the barcode text is important here -> discard the page number
+ barcodes = [text for _, text in barcodes]
+ # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
+ asn_text = next(
+ (x for x in barcodes if x.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX))
+ )
+
+ logger.debug(f"Found ASN Barcode: {asn_text}")
+
+ if asn_text:
+ # remove the prefix and remove whitespace
+ asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
+
+ # now, try parsing the ASN number
+ try:
+ asn = int(asn_text)
+ except ValueError as e:
+ logger.warn(f"Failed to parse ASN number because: {e}")
+
+ return pdf_filepath, asn
self.override_correspondent_id = None
self.override_tag_ids = None
self.override_document_type_id = None
+ self.override_asn = None
self.task_id = None
self.channel_layer = get_channel_layer()
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
+ def pre_check_asn_unique(self):
+ """
+ Check that if override_asn is given, it is unique
+ """
+ if not self.override_asn:
+ # check not necessary in case no ASN gets set
+ return
+ if Document.objects.filter(archive_serial_number=self.override_asn).exists():
+ self.log(
+ "warning",
+ f"A document with ASN {self.override_asn} already exists. No ASN will be set!",
+ )
+ self.override_asn = None
+
def run_pre_consume_script(self):
if not settings.PRE_CONSUME_SCRIPT:
return
override_tag_ids=None,
task_id=None,
override_created=None,
+ override_asn=None,
) -> Document:
"""
Return the document object if it was successfully created.
self.override_tag_ids = override_tag_ids
self.task_id = task_id or str(uuid.uuid4())
self.override_created = override_created
+ self.override_asn = override_asn
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
self.pre_check_file_exists()
self.pre_check_directories()
self.pre_check_duplicate()
+ self.pre_check_asn_unique()
self.log("info", f"Consuming {self.filename}")
for tag_id in self.override_tag_ids:
document.tags.add(Tag.objects.get(pk=tag_id))
+ if self.override_asn:
+ document.archive_serial_number = self.override_asn
+
def _write(self, storage_type, source, target):
with open(source, "rb") as read_file:
with open(target, "wb") as write_file:
# the barcodes has been split and will be consumed separately
return "File successfully split"
+ # try reading ASN barcodes
+ asn = None
+ if settings.CONSUMER_ENABLE_ASN_BARCODE:
+ _, asn = barcodes.scan_file_for_asn_barcode(path)
+ if asn:
+ logger.info(f"Using ASN {asn} from barcode")
+
# continue with consumption if no barcode was found
document = Consumer().try_consume_file(
path,
override_tag_ids=override_tag_ids,
task_id=task_id,
override_created=override_created,
+ override_asn=asn
)
if document:
"PATCHT",
)
+CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = __get_boolean(
+ "PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
+)
+
+CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
+ "PAPERLESS_CONSUMER_ASN_BARCODE_PREFIX",
+ "ASN",
+)
+
+
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
# The default language that tesseract will attempt to use when parsing