]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Updates following testing of command
authorTrenton Holmes <holmes.trenton@gmail.com>
Fri, 10 Jun 2022 18:23:24 +0000 (11:23 -0700)
committerMichael Shamoon <4887959+shamoon@users.noreply.github.com>
Sat, 2 Jul 2022 15:39:09 +0000 (08:39 -0700)
src/documents/management/commands/document_redo_ocr.py

index c76218d9938defda5bf34c88b0d3918a365cf8d5..3ead5a4851cf46333ab7449c11b091f13fda417d 100644 (file)
@@ -1,4 +1,3 @@
-import logging
 import shutil
 from pathlib import Path
 from typing import Type
@@ -38,13 +37,14 @@ class Command(BaseCommand):
 
     def handle(self, *args, **options):
 
-        logging.getLogger().handlers[0].level = logging.ERROR
-
         all_docs = Document.objects.all()
 
-        for doc_pk in tqdm.tqdm(args.documents, disable=options["no_progress_bar"]):
+        for doc_pk in tqdm.tqdm(
+            options["documents"],
+            disable=options["no_progress_bar"],
+        ):
             try:
-                self.stdout.write(self.style.INFO(f"Parsing document {doc_pk}"))
+                self.stdout.write(f"Parsing document {doc_pk}")
                 doc: Document = all_docs.get(pk=doc_pk)
             except ObjectDoesNotExist:
                 self.stdout.write(self.style.ERROR(f"Document {doc_pk} does not exist"))
@@ -64,11 +64,15 @@ class Command(BaseCommand):
             shutil.copy(doc.source_path, temp_file)
 
             try:
+                self.stdout.write(
+                    f"Using {type(document_parser).__name__} for document",
+                )
                 # Try to re-parse the document into text
                 document_parser.parse(str(temp_file), doc.mime_type)
 
                 doc.content = document_parser.get_text()
                 doc.save()
+                self.stdout.write("Document OCR updated")
 
             except ParseError as e:
                 self.stdout.write(self.style.ERROR(f"Error parsing document: {e}"))