]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
add config options and documentation
authorflorian on nixos (Florian Brandes) <florian.brandes@posteo.de>
Sat, 26 Mar 2022 09:16:23 +0000 (10:16 +0100)
committerFlorian Brandes <florian.brandes@posteo.de>
Thu, 7 Apr 2022 09:14:29 +0000 (11:14 +0200)
Signed-off-by: florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
docs/configuration.rst
paperless.conf.example
src/documents/tasks.py
src/documents/tests/test_tasks.py
src/paperless/settings.py

index f53266481023a79b480a6d2c1c5575092456a00f..a5db8fffa58035cd271cd6946cfaac764e0f2aba 100644 (file)
@@ -588,6 +588,27 @@ PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=<bool>
 
     Defaults to false.
 
+PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool>
+    Enables the scanning and page separation based on detected barcodes.
+    This allows for scanning and adding multiple documents per uploaded
+    file, which are separated by one or multiple barcode pages.
+
+    For ease of use, it is suggested to use a standardized separation page,
+    e.g. `here <https://www.alliancegroup.co.uk/patch-codes.htm>`_.
+
+    If no barcodes are detected in the uploaded file, no page separation
+    will happen.
+
+    Defaults to true.
+
+
+PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
+  Defines the string to be detected as a separator barcode.
+  If paperless is used with the PATCH-T separator pages, users
+  shouldn't change this.
+
+  Defaults to "PATCHT"
+
 
 PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
     On smaller systems, or even in the case of Very Large Documents, the consumer
index de24bde7439571a963f8111c803d5d56fe095b57..cad13257c09275b69482474cb66d1f3a49cdfe26 100644 (file)
@@ -60,6 +60,8 @@
 #PAPERLESS_CONSUMER_RECURSIVE=false
 #PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]
 #PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
+#PAPERLESS_CONSUMER_ENABLE_BARCODES=true
+#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
 #PAPERLESS_OPTIMIZE_THUMBNAILS=true
 #PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
index e7d20eff8688454b5a1ee1a77881c5a1952bbddc..3abb3227bb5c5312ba430c450e9105163596cec6 100644 (file)
@@ -24,8 +24,6 @@ from pikepdf import Pdf
 from pyzbar import pyzbar
 from whoosh.writing import AsyncWriter
 
-# barcode decoder
-
 logger = logging.getLogger("paperless.tasks")
 
 
@@ -100,12 +98,13 @@ def scan_file_for_separating_barcodes(filepath: str) -> list:
     Returns a list of pagenumbers, which separate the file
     """
     separator_page_numbers = []
+    separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
     # use a temporary directory in case the file os too big to handle in memory
     with tempfile.TemporaryDirectory() as path:
         pages_from_path = convert_from_path(filepath, output_folder=path)
         for current_page_number, page in enumerate(pages_from_path):
             current_barcodes = barcode_reader(page)
-            if "b'PATCHT'" in current_barcodes:
+            if separator_barcode in current_barcodes:
                 separator_page_numbers = separator_page_numbers + [current_page_number]
     return separator_page_numbers
 
@@ -163,13 +162,12 @@ def save_to_dir(filepath, newname=None, target_dir=settings.CONSUMPTION_DIR):
     Copies filepath to target_dir.
     Optionally rename the file.
     """
-    logger.debug(f"filepath: {str(filepath)}")
-    logger.debug(f"newname: {str(newname)}")
-    logger.debug(f"target_dir: {str(target_dir)}")
     if os.path.isfile(filepath) and os.path.isdir(target_dir):
         dst = shutil.copy(filepath, target_dir)
+        logging.debug(f"saved {str(filepath)} to {str(dst)}")
         if newname:
             dst_new = os.path.join(target_dir, newname)
+            logger.debug(f"moving {str(dst)} to {str(dst_new)}")
             os.rename(dst, dst_new)
     else:
         logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
@@ -186,7 +184,9 @@ def consume_file(
 ):
 
     # check for separators in current document
-    separators = scan_file_for_separating_barcodes(path)
+    separators = []
+    if settings.CONSUMER_ENABLE_BARCODES:
+        separators = scan_file_for_separating_barcodes(path)
     document_list = []
     if separators == []:
         pass
index b5eabdd1a3045e1783232a41790af342d9448bc9..3e019b51ec45794d37515f409584706df0e2e18e 100644 (file)
@@ -98,7 +98,8 @@ class TestTasks(DirectoriesMixin, TestCase):
             "patch-code-t.pbm",
         )
         img = Image.open(test_file)
-        self.assertEqual(tasks.barcode_reader(img), ["b'PATCHT'"])
+        separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
+        self.assertEqual(tasks.barcode_reader(img), [separator_barcode])
 
     def test_barcode_reader2(self):
         test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
index 39b850813bbf9384c8bac7599d139f8e11feb4e7..934d19910e1906bf9fb0bd2c382940ff538c60e2 100644 (file)
@@ -462,6 +462,13 @@ CONSUMER_IGNORE_PATTERNS = list(
 
 CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
 
+CONSUMER_ENABLE_BARCODES = __get_boolean(
+    "PAPERLESS_CONSUMER_ENABLE_BARCODES",
+    default="YES",
+)
+
+CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
+
 OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
 
 OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))