import tempfile
from dataclasses import dataclass
from functools import lru_cache
+from math import ceil
from pathlib import Path
from typing import List
from typing import Optional
# raise an exception, triggering fallback
pillow_img = pdfimage.as_pil_image()
+ # Scale the image down
+ # See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
+ # TLDR: zbar has issues with larger images
+ width, height = pillow_img.size
+ if width > 512:
+ scaler = ceil(width / 512)
+ new_width = int(width / scaler)
+ new_height = int(height / scaler)
+ pillow_img = pillow_img.resize((new_width, new_height))
+
+ width, height = pillow_img.size
+
+ if height > 1024:
+ scaler = ceil(height / 1024)
+ new_width = int(width / scaler)
+ new_height = int(height / scaler)
+ pillow_img = pillow_img.resize((new_width, new_height))
+
for barcode_value in barcode_reader(pillow_img):
detected_barcodes.append(Barcode(page_num, barcode_value))
"""
Search the parsed barcodes for separators
and returns a list of page numbers, which
- separate the file into new files
+ separate the file into new files.
"""
# filter all barcodes for the separator string
# get the page numbers of the separating barcodes
- return [bc.page for bc in barcodes if bc.is_separator]
+ return list({bc.page for bc in barcodes if bc.is_separator})
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
try:
asn = int(asn_text)
except ValueError as e:
- logger.warn(f"Failed to parse ASN number because: {e}")
+ logger.warning(f"Failed to parse ASN number because: {e}")
return asn
self.assertEqual(doc_barcode_info.pdf_path, test_file)
self.assertListEqual(separator_page_numbers, [])
+ @override_settings(CONSUMER_BARCODE_STRING="ADAR-NEXTDOC")
+ def test_scan_file_for_separating_qr_barcodes(self):
+ """
+ GIVEN:
+ - Input PDF with certain QR codes that aren't detected at current size
+ WHEN:
+ - The input file is scanned for barcodes
+ THEN:
+ - QR codes are detected
+ """
+ test_file = os.path.join(
+ self.BARCODE_SAMPLE_DIR,
+ "many-qr-codes.pdf",
+ )
+
+ doc_barcode_info = barcodes.scan_file_for_barcodes(
+ test_file,
+ )
+ separator_page_numbers = barcodes.get_separating_barcodes(
+ doc_barcode_info.barcodes,
+ )
+
+ self.assertGreater(len(doc_barcode_info.barcodes), 0)
+ self.assertListEqual(separator_page_numbers, [1])
+
def test_separate_pages(self):
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,