]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
add split logic to consume_file
authorFlorian Brandes <florian.brandes@posteo.de>
Wed, 6 Apr 2022 19:22:07 +0000 (21:22 +0200)
committerFlorian Brandes <florian.brandes@posteo.de>
Thu, 7 Apr 2022 09:14:17 +0000 (11:14 +0200)
Signed-off-by: florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
requirements.txt
src/documents/tasks.py

index afc8b9f5184afb8a3ac067f5a725c9c3fd7f3eb1..26ec003f8508957dc26d63ec99aaa4ae48f06dbd 100644 (file)
@@ -61,6 +61,7 @@ ocrmypdf==13.4.2
 packaging==21.3; python_version >= '3.6'
 pathvalidate==2.5.0
 pdfminer.six==20220319
+pdf2image==1.16.0
 pikepdf==5.1.1
 pillow==9.1.0
 pluggy==1.0.0; python_version >= '3.6'
index 5161fcc0109b9c9e186b9409991e38a248009225..eff46436e67599b3b0f0f98580b569e4aab59add 100644 (file)
@@ -177,10 +177,26 @@ def consume_file(
 ):
 
     # check for separators in current document
-    separator_page_numbers = scan_file_for_separating_barcodes(path)
-    if separator_page_numbers != []:
-        logger.debug(f"Pages with separators found: {str(separator_page_numbers)}")
-
+    separators = scan_file_for_separating_barcodes(path)
+    document_list = []
+    if separators == []:
+        pass
+    else:
+        logger.debug(f"Pages with separators found in: {str(path)}")
+        document_list = separate_pages(path, separators)
+    if document_list == []:
+        pass
+    else:
+        for document in document_list:
+            # save to consumption dir
+            save_to_dir(document)
+        # if we got here, the document was successfully split
+        # and can safely be deleted
+        logger.debug("Deleting file {}".format(path))
+        os.unlink(path)
+        return "File successfully split"
+
+    # continue with consumption if no barcode was found
     document = Consumer().try_consume_file(
         path,
         override_filename=override_filename,