Defaults to "300"
+#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE}
+
+: Enables the detection of barcodes in the scanned document and
+assigns or creates tags if a properly formatted barcode is detected.
+
+ The barcode must match one of the (configurable) regular expressions.
+ If the barcode text contains ',' (comma), it is split into multiple
+ barcodes which are individually processed for tagging.
+
+ Matching is case insensitive.
+
+ Defaults to false.
+
+#### [`PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING=<json dict>`](#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING) {#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING}
+
+: Defines a dictionary of filter regex and substitute expressions.
+
+ Syntax: {"<regex>": "<substitute>" [,...]]}
+
+ A barcode is considered for tagging if the barcode text matches
+ at least one of the provided <regex> pattern.
+
+ If a match is found, the <substitute> rule is applied. This allows very
+ versatile reformatting and mapping of barcode pattern to tag values.
+
+ If a tag is not found it will be created.
+
+ Defaults to:
+
+ {"TAG:(.*)": "\\g<1>"} which defines
+ - a regex TAG:(.*) which includes barcodes beginning with TAG:
+ followed by any text that gets stored into match group #1 and
+ - a substitute \\g<1> that replaces the original barcode text
+ by the content in match group #1.
+ Consequently, the tag is the barcode text without its TAG: prefix.
+
+ More examples:
+
+ {"ASN12.*": "JOHN", "ASN13.*": "SMITH"} for example maps
+ - ASN12nnnn barcodes to the tag JOHN and
+ - ASN13nnnn barcodes to the tag SMITH.
+
+ {"T-J": "JOHN", "T-S": "SMITH", "T-D": "DOE"} directly maps
+ - T-J barcodes to the tag JOHN,
+ - T-S barcodes to the tag SMITH and
+ - T-D barcodes to the tag DOE.
+
+ Please refer to the Python regex documentation for more information.
+
## Audit Trail
#### [`PAPERLESS_AUDIT_LOG_ENABLED=<bool>`](#PAPERLESS_AUDIT_LOG_ENABLED) {#PAPERLESS_AUDIT_LOG_ENABLED}
#PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
#PAPERLESS_CONSUMER_BARCODE_UPSCALE=0.0
#PAPERLESS_CONSUMER_BARCODE_DPI=300
+#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=false
+#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING={"TAG:(.*)": "\\g<1>"}
#PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED=false
#PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME=double-sided
#PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT=false
from documents.converters import convert_from_tiff_to_pdf
from documents.data_models import ConsumableDocument
+from documents.models import Tag
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import StopConsumeTaskError
from documents.plugins.helpers import ProgressStatusOptions
supported_mimes = {"application/pdf"}
return (
- settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES
+ settings.CONSUMER_ENABLE_ASN_BARCODE
+ or settings.CONSUMER_ENABLE_BARCODES
+ or settings.CONSUMER_ENABLE_TAG_BARCODE
) and self.input_doc.mime_type in supported_mimes
def setup(self):
logger.info(f"Found ASN in barcode: {located_asn}")
self.metadata.asn = located_asn
+ # try reading tags from barcodes
+ if settings.CONSUMER_ENABLE_TAG_BARCODE:
+ tags = self.tags
+ if tags is not None and len(tags) > 0:
+ if self.metadata.tag_ids:
+ self.metadata.tag_ids += tags
+ else:
+ self.metadata.tag_ids = tags
+ logger.info(f"Found tags in barcode: {tags}")
+
separator_pages = self.get_separation_pages()
if not separator_pages:
return "No pages to split on!"
return asn
+ @property
+ def tags(self) -> Optional[list[int]]:
+ """
+ Search the parsed barcodes for any tags.
+ Returns the detected tag ids (or empty list)
+ """
+ tags = []
+
+ # Ensure the barcodes have been read
+ self.detect()
+
+ for x in self.barcodes:
+ tag_texts = x.value
+
+ for raw in tag_texts.split(","):
+ try:
+ tag = None
+ for regex in settings.CONSUMER_TAG_BARCODE_MAPPING:
+ if re.match(regex, raw, flags=re.IGNORECASE):
+ sub = settings.CONSUMER_TAG_BARCODE_MAPPING[regex]
+ tag = (
+ re.sub(regex, sub, raw, flags=re.IGNORECASE)
+ if sub
+ else raw
+ )
+ break
+
+ if tag:
+ tag = Tag.objects.get_or_create(
+ name__iexact=tag,
+ defaults={"name": tag},
+ )[0]
+
+ logger.debug(
+ f"Found Tag Barcode '{raw}', substituted "
+ f"to '{tag}' and mapped to "
+ f"tag #{tag.pk}.",
+ )
+ tags.append(tag.pk)
+
+ except Exception as e:
+ logger.error(
+ f"Failed to find or create TAG '{raw}' because: {e}",
+ )
+
+ return tags
+
def get_separation_pages(self) -> dict[int, bool]:
"""
Search the parsed barcodes for separators and returns a dict of page
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
+from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import DocumentConsumeDelayMixin
from documents.tests.utils import DummyProgressManager
@override_settings(CONSUMER_BARCODE_SCANNER="ZXING")
class TestAsnBarcodesZxing(TestAsnBarcode):
pass
+
+
+class TestTagBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase):
+ @contextmanager
+ def get_reader(self, filepath: Path) -> BarcodePlugin:
+ reader = BarcodePlugin(
+ ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
+ DocumentMetadataOverrides(),
+ DummyProgressManager(filepath.name, None),
+ self.dirs.scratch_dir,
+ "task-id",
+ )
+ reader.setup()
+ yield reader
+ reader.cleanup()
+
+ @override_settings(CONSUMER_ENABLE_TAG_BARCODE=True)
+ def test_scan_file_without_matching_barcodes(self):
+ """
+ GIVEN:
+ - PDF containing tag barcodes but none with matching prefix (default "TAG:")
+ WHEN:
+ - File is scanned for barcodes
+ THEN:
+ - No TAG has been created
+ """
+ test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
+ with self.get_reader(test_file) as reader:
+ reader.run()
+ tags = reader.metadata.tag_ids
+ self.assertEqual(tags, None)
+
+ @override_settings(
+ CONSUMER_ENABLE_TAG_BARCODE=False,
+ CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<1>"},
+ )
+ def test_scan_file_with_matching_barcode_but_function_disabled(self):
+ """
+ GIVEN:
+ - PDF containing a tag barcode with matching custom prefix
+ - The tag barcode functionality is disabled
+ WHEN:
+ - File is scanned for barcodes
+ THEN:
+ - No TAG has been created
+ """
+ test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
+ with self.get_reader(test_file) as reader:
+ reader.run()
+ tags = reader.metadata.tag_ids
+ self.assertEqual(tags, None)
+
+ @override_settings(
+ CONSUMER_ENABLE_TAG_BARCODE=True,
+ CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<1>"},
+ )
+ def test_scan_file_for_tag_custom_prefix(self):
+ """
+ GIVEN:
+ - PDF containing a tag barcode with custom prefix
+ - The barcode mapping accepts this prefix and removes it from the mapped tag value
+ - The created tag is the non-prefixed values
+ WHEN:
+ - File is scanned for barcodes
+ THEN:
+ - The TAG is located
+ - One TAG has been created
+ """
+ test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
+ with self.get_reader(test_file) as reader:
+ reader.metadata.tag_ids = [99]
+ reader.run()
+ self.assertEqual(reader.pdf_file, test_file)
+ tags = reader.metadata.tag_ids
+ self.assertEqual(len(tags), 2)
+ self.assertEqual(tags[0], 99)
+ self.assertEqual(Tag.objects.get(name__iexact="00123").pk, tags[1])
+
+ @override_settings(
+ CONSUMER_ENABLE_TAG_BARCODE=True,
+ CONSUMER_TAG_BARCODE_MAPPING={"ASN(.*)": "\\g<1>"},
+ )
+ def test_scan_file_for_many_custom_tags(self):
+ """
+ GIVEN:
+ - PDF containing multiple tag barcode with custom prefix
+ - The barcode mapping accepts this prefix and removes it from the mapped tag value
+ - The created tags are the non-prefixed values
+ WHEN:
+ - File is scanned for barcodes
+ THEN:
+ - The TAG is located
+ - File Tags have been created
+ """
+ test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
+ with self.get_reader(test_file) as reader:
+ reader.run()
+ tags = reader.metadata.tag_ids
+ self.assertEqual(len(tags), 5)
+ self.assertEqual(Tag.objects.get(name__iexact="00123").pk, tags[0])
+ self.assertEqual(Tag.objects.get(name__iexact="00124").pk, tags[1])
+ self.assertEqual(Tag.objects.get(name__iexact="00125").pk, tags[2])
+ self.assertEqual(Tag.objects.get(name__iexact="00126").pk, tags[3])
+ self.assertEqual(Tag.objects.get(name__iexact="00127").pk, tags[4])
+
+ @override_settings(
+ CONSUMER_ENABLE_TAG_BARCODE=True,
+ CONSUMER_TAG_BARCODE_MAPPING={"CUSTOM-PREFIX-(.*)": "\\g<3>"},
+ )
+ def test_scan_file_for_tag_raises_value_error(self):
+ """
+ GIVEN:
+ - Any error occurs during tag barcode processing
+ THEN:
+ - The processing should be skipped and not break the import
+ """
+ test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
+ with self.get_reader(test_file) as reader:
+ reader.run()
+ # expect error to be caught and logged only
+ tags = reader.metadata.tag_ids
+ self.assertEqual(tags, None)
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
+CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
+ "PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
+)
+
+CONSUMER_TAG_BARCODE_MAPPING = dict(
+ json.loads(
+ os.getenv(
+ "PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING",
+ '{"TAG:(.*)": "\\\\g<1>"}',
+ ),
+ ),
+)
+
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
)