add split logic to consume_file

author Florian Brandes <florian.brandes@posteo.de>

Wed, 6 Apr 2022 19:22:07 +0000 (21:22 +0200)

committer Florian Brandes <florian.brandes@posteo.de>

Thu, 7 Apr 2022 09:14:17 +0000 (11:14 +0200)
author Florian Brandes <florian.brandes@posteo.de>
Wed, 6 Apr 2022 19:22:07 +0000 (21:22 +0200)
committer Florian Brandes <florian.brandes@posteo.de>
Thu, 7 Apr 2022 09:14:17 +0000 (11:14 +0200)
diff --git a/requirements.txt b/requirements.txt

index afc8b9f5184afb8a3ac067f5a725c9c3fd7f3eb1..26ec003f8508957dc26d63ec99aaa4ae48f06dbd 100644 (file)
--- a/requirements.txt
+++ b/requirements.txt
@@ -61,6 +61,7 @@ ocrmypdf==13.4.2
  packaging==21.3; python_version >= '3.6'
  pathvalidate==2.5.0
  pdfminer.six==20220319
+pdf2image==1.16.0
  pikepdf==5.1.1
  pillow==9.1.0
  pluggy==1.0.0; python_version >= '3.6'
diff --git a/src/documents/tasks.py b/src/documents/tasks.py

index 5161fcc0109b9c9e186b9409991e38a248009225..eff46436e67599b3b0f0f98580b569e4aab59add 100644 (file)
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -177,10 +177,26 @@ def consume_file(
  ):
  
      # check for separators in current document
-    separator_page_numbers = scan_file_for_separating_barcodes(path)
-    if separator_page_numbers != []:
-        logger.debug(f"Pages with separators found: {str(separator_page_numbers)}")
-
+    separators = scan_file_for_separating_barcodes(path)
+    document_list = []
+    if separators == []:
+        pass
+    else:
+        logger.debug(f"Pages with separators found in: {str(path)}")
+        document_list = separate_pages(path, separators)
+    if document_list == []:
+        pass
+    else:
+        for document in document_list:
+            # save to consumption dir
+            save_to_dir(document)
+        # if we got here, the document was successfully split
+        # and can safely be deleted
+        logger.debug("Deleting file {}".format(path))
+        os.unlink(path)
+        return "File successfully split"
+
+    # continue with consumption if no barcode was found
      document = Consumer().try_consume_file(
          path,
          override_filename=override_filename,
author	Florian Brandes <florian.brandes@posteo.de>
	Wed, 6 Apr 2022 19:22:07 +0000 (21:22 +0200)
committer	Florian Brandes <florian.brandes@posteo.de>
	Thu, 7 Apr 2022 09:14:17 +0000 (11:14 +0200)
requirements.txt		patch \| blob \| blame \| history
src/documents/tasks.py		patch \| blob \| blame \| history