add config options and documentation

author florian on nixos (Florian Brandes) <florian.brandes@posteo.de>

Sat, 26 Mar 2022 09:16:23 +0000 (10:16 +0100)

committer Florian Brandes <florian.brandes@posteo.de>

Thu, 7 Apr 2022 09:14:29 +0000 (11:14 +0200)
author florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
Sat, 26 Mar 2022 09:16:23 +0000 (10:16 +0100)
committer Florian Brandes <florian.brandes@posteo.de>
Thu, 7 Apr 2022 09:14:29 +0000 (11:14 +0200)
diff --git a/docs/configuration.rst b/docs/configuration.rst

index f53266481023a79b480a6d2c1c5575092456a00f..a5db8fffa58035cd271cd6946cfaac764e0f2aba 100644 (file)
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -588,6 +588,27 @@ PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=<bool>
  
      Defaults to false.
  
+PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool>
+    Enables the scanning and page separation based on detected barcodes.
+    This allows for scanning and adding multiple documents per uploaded
+    file, which are separated by one or multiple barcode pages.
+
+    For ease of use, it is suggested to use a standardized separation page,
+    e.g. `here <https://www.alliancegroup.co.uk/patch-codes.htm>`_.
+
+    If no barcodes are detected in the uploaded file, no page separation
+    will happen.
+
+    Defaults to true.
+
+
+PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
+  Defines the string to be detected as a separator barcode.
+  If paperless is used with the PATCH-T separator pages, users
+  shouldn't change this.
+
+  Defaults to "PATCHT"
+
  
  PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
      On smaller systems, or even in the case of Very Large Documents, the consumer
diff --git a/paperless.conf.example b/paperless.conf.example

index de24bde7439571a963f8111c803d5d56fe095b57..cad13257c09275b69482474cb66d1f3a49cdfe26 100644 (file)
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -60,6 +60,8 @@
  #PAPERLESS_CONSUMER_RECURSIVE=false
  #PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]
  #PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
+#PAPERLESS_CONSUMER_ENABLE_BARCODES=true
+#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
  #PAPERLESS_OPTIMIZE_THUMBNAILS=true
  #PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
  #PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
diff --git a/src/documents/tasks.py b/src/documents/tasks.py

index e7d20eff8688454b5a1ee1a77881c5a1952bbddc..3abb3227bb5c5312ba430c450e9105163596cec6 100644 (file)
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -24,8 +24,6 @@ from pikepdf import Pdf
  from pyzbar import pyzbar
  from whoosh.writing import AsyncWriter
  
-# barcode decoder
-
  logger = logging.getLogger("paperless.tasks")
  
  
@@ -100,12 +98,13 @@ def scan_file_for_separating_barcodes(filepath: str) -> list:
      Returns a list of pagenumbers, which separate the file
      """
      separator_page_numbers = []
+    separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
      # use a temporary directory in case the file os too big to handle in memory
      with tempfile.TemporaryDirectory() as path:
          pages_from_path = convert_from_path(filepath, output_folder=path)
          for current_page_number, page in enumerate(pages_from_path):
              current_barcodes = barcode_reader(page)
-            if "b'PATCHT'" in current_barcodes:
+            if separator_barcode in current_barcodes:
                  separator_page_numbers = separator_page_numbers + [current_page_number]
      return separator_page_numbers
  
@@ -163,13 +162,12 @@ def save_to_dir(filepath, newname=None, target_dir=settings.CONSUMPTION_DIR):
      Copies filepath to target_dir.
      Optionally rename the file.
      """
-    logger.debug(f"filepath: {str(filepath)}")
-    logger.debug(f"newname: {str(newname)}")
-    logger.debug(f"target_dir: {str(target_dir)}")
      if os.path.isfile(filepath) and os.path.isdir(target_dir):
          dst = shutil.copy(filepath, target_dir)
+        logging.debug(f"saved {str(filepath)} to {str(dst)}")
          if newname:
              dst_new = os.path.join(target_dir, newname)
+            logger.debug(f"moving {str(dst)} to {str(dst_new)}")
              os.rename(dst, dst_new)
      else:
          logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
@@ -186,7 +184,9 @@ def consume_file(
  ):
  
      # check for separators in current document
-    separators = scan_file_for_separating_barcodes(path)
+    separators = []
+    if settings.CONSUMER_ENABLE_BARCODES:
+        separators = scan_file_for_separating_barcodes(path)
      document_list = []
      if separators == []:
          pass
diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py

index b5eabdd1a3045e1783232a41790af342d9448bc9..3e019b51ec45794d37515f409584706df0e2e18e 100644 (file)
--- a/src/documents/tests/test_tasks.py
+++ b/src/documents/tests/test_tasks.py
@@ -98,7 +98,8 @@ class TestTasks(DirectoriesMixin, TestCase):
              "patch-code-t.pbm",
          )
          img = Image.open(test_file)
-        self.assertEqual(tasks.barcode_reader(img), ["b'PATCHT'"])
+        separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
+        self.assertEqual(tasks.barcode_reader(img), [separator_barcode])
  
      def test_barcode_reader2(self):
          test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
diff --git a/src/paperless/settings.py b/src/paperless/settings.py

index 39b850813bbf9384c8bac7599d139f8e11feb4e7..934d19910e1906bf9fb0bd2c382940ff538c60e2 100644 (file)
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -462,6 +462,13 @@ CONSUMER_IGNORE_PATTERNS = list(
  
  CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
  
+CONSUMER_ENABLE_BARCODES = __get_boolean(
+    "PAPERLESS_CONSUMER_ENABLE_BARCODES",
+    default="YES",
+)
+
+CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
+
  OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
  
  OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
author	florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
	Sat, 26 Mar 2022 09:16:23 +0000 (10:16 +0100)
committer	Florian Brandes <florian.brandes@posteo.de>
	Thu, 7 Apr 2022 09:14:29 +0000 (11:14 +0200)
docs/configuration.rst		patch \| blob \| blame \| history
paperless.conf.example		patch \| blob \| blame \| history
src/documents/tasks.py		patch \| blob \| blame \| history
src/documents/tests/test_tasks.py		patch \| blob \| blame \| history
src/paperless/settings.py		patch \| blob \| blame \| history