]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
When PDF/A rendering fails, add a warning the user may want to allow it to continue
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 29 Aug 2023 00:43:59 +0000 (17:43 -0700)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 29 Aug 2023 01:10:11 +0000 (18:10 -0700)
src/paperless_tesseract/parsers.py

index c6d066fbe197c5bc9ba19cc10d7f1d1352593bba..4dbebb589e9a1753afacb58e0cedeab5243f1245 100644 (file)
@@ -304,6 +304,7 @@ class RasterisedDocumentParser(DocumentParser):
         import ocrmypdf
         from ocrmypdf import EncryptedPdfError
         from ocrmypdf import InputFileError
+        from ocrmypdf import SubprocessOutputError
 
         archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
         sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
@@ -333,6 +334,13 @@ class RasterisedDocumentParser(DocumentParser):
             )
             if original_has_text:
                 self.text = text_original
+        except SubprocessOutputError as e:
+            if "Ghostscript PDF/A rendering" in str(e):
+                self.log.warning(
+                    "Ghostscript PDF/A rendering failed, consider setting "
+                    "PAPERLESS_OCR_USER_ARGS: '{\"continue_on_soft_render_error\": true}'",  # noqa: E501
+                )
+            raise e
         except (NoTextFoundException, InputFileError) as e:
             self.log.warning(
                 f"Encountered an error while running OCR: {e!s}. "