Fix: ghostscript rendering error doesnt trigger frontend failure message (#4092)

author shamoon <4887959+shamoon@users.noreply.github.com>

Fri, 1 Sep 2023 02:49:00 +0000 (19:49 -0700)

committer GitHub <noreply@github.com>

Fri, 1 Sep 2023 02:49:00 +0000 (19:49 -0700)
author shamoon <4887959+shamoon@users.noreply.github.com>
Fri, 1 Sep 2023 02:49:00 +0000 (19:49 -0700)
committer GitHub <noreply@github.com>
Fri, 1 Sep 2023 02:49:00 +0000 (19:49 -0700)
diff --git a/src/documents/consumer.py b/src/documents/consumer.py

index 0ec6090c2ead434f576c8c08d98cdcb1262b9f94..59c4b7d859b892869f79e4bbbb6b5f9462554656 100644 (file)
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -450,11 +450,18 @@ class Consumer(LoggingMixin):
              archive_path = document_parser.get_archive_path()
  
          except ParseError as e:
+            self._fail(
+                str(e),
+                f"Error occurred while consuming document {self.filename}: {e}",
+                exc_info=True,
+                exception=e,
+            )
+        except Exception as e:
              document_parser.cleanup()
              tempdir.cleanup()
              self._fail(
                  str(e),
-                f"Error while consuming document {self.filename}: {e}",
+                f"Unexpected error while consuming document {self.filename}: {e}",
                  exc_info=True,
                  exception=e,
              )
@@ -544,8 +551,8 @@ class Consumer(LoggingMixin):
          except Exception as e:
              self._fail(
                  str(e),
-                f"The following error occurred while consuming "
-                f"{self.filename}: {e}",
+                f"The following error occurred while storing document "
+                f"{self.filename} after consuming: {e}",
                  exc_info=True,
                  exception=e,
              )
diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py

index a8f427c37f36df7e9bbb3d759eeb9c063d2cd866..a9cb887de2e5e5cc0640cbbdc3d26a719364539d 100644 (file)
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -211,6 +211,18 @@ class FaultyParser(DocumentParser):
          raise ParseError("Does not compute.")
  
  
+class FaultyGenericExceptionParser(DocumentParser):
+    def __init__(self, logging_group, scratch_dir):
+        super().__init__(logging_group)
+        _, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
+
+    def get_thumbnail(self, document_path, mime_type, file_name=None):
+        return self.fake_thumb
+
+    def parse(self, document_path, mime_type, file_name=None):
+        raise Exception("Generic exception.")
+
+
  def fake_magic_from_file(file, mime=False):
      if mime:
          if os.path.splitext(file)[1] == ".pdf":
@@ -260,6 +272,13 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
      def make_faulty_parser(self, logging_group, progress_callback=None):
          return FaultyParser(logging_group, self.dirs.scratch_dir)
  
+    def make_faulty_generic_exception_parser(
+        self,
+        logging_group,
+        progress_callback=None,
+    ):
+        return FaultyGenericExceptionParser(logging_group, self.dirs.scratch_dir)
+
      def setUp(self):
          super().setUp()
  
@@ -496,7 +515,29 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
  
          self.assertRaisesMessage(
              ConsumerError,
-            "sample.pdf: Error while consuming document sample.pdf: Does not compute.",
+            "sample.pdf: Error occurred while consuming document sample.pdf: Does not compute.",
+            self.consumer.try_consume_file,
+            self.get_test_file(),
+        )
+
+        self._assert_first_last_send_progress(last_status="FAILED")
+
+    @mock.patch("documents.parsers.document_consumer_declaration.send")
+    def testGenericParserException(self, m):
+        m.return_value = [
+            (
+                None,
+                {
+                    "parser": self.make_faulty_generic_exception_parser,
+                    "mime_types": {"application/pdf": ".pdf"},
+                    "weight": 0,
+                },
+            ),
+        ]
+
+        self.assertRaisesMessage(
+            ConsumerError,
+            "sample.pdf: Unexpected error while consuming document sample.pdf: Generic exception.",
              self.consumer.try_consume_file,
              self.get_test_file(),
          )
@@ -510,7 +551,7 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
  
          self.assertRaisesMessage(
              ConsumerError,
-            "sample.pdf: The following error occurred while consuming sample.pdf: NO.",
+            "sample.pdf: The following error occurred while storing document sample.pdf after consuming: NO.",
              self.consumer.try_consume_file,
              filename,
          )
diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py

index 4dbebb589e9a1753afacb58e0cedeab5243f1245..6764d5031def664faa8ca87b265983cd623fffd4 100644 (file)
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -340,7 +340,10 @@ class RasterisedDocumentParser(DocumentParser):
                      "Ghostscript PDF/A rendering failed, consider setting "
                      "PAPERLESS_OCR_USER_ARGS: '{\"continue_on_soft_render_error\": true}'",  # noqa: E501
                  )
-            raise e
+
+            raise ParseError(
+                f"SubprocessOutputError: {e!s}. See logs for more information.",
+            ) from e
          except (NoTextFoundException, InputFileError) as e:
              self.log.warning(
                  f"Encountered an error while running OCR: {e!s}. "
diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py

index 8b3de5615c4a83011c41fcbfdf19bd84fe5dff7b..606453904696a22ba04c512384d043a2ceefe428 100644 (file)
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -8,6 +8,7 @@ from unittest import mock
  
  from django.test import TestCase
  from django.test import override_settings
+from ocrmypdf import SubprocessOutputError
  
  from documents.parsers import ParseError
  from documents.parsers import run_convert
@@ -827,6 +828,18 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
          # Copied from the PDF to here.  Don't even look at it
          self.assertIn("ةﯾﻠﺧﺎدﻻ ةرازو", parser.get_text())
  
+    @mock.patch("ocrmypdf.ocr")
+    def test_gs_rendering_error(self, m):
+        m.side_effect = SubprocessOutputError("Ghostscript PDF/A rendering failed")
+        parser = RasterisedDocumentParser(None)
+
+        self.assertRaises(
+            ParseError,
+            parser.parse,
+            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
+            "application/pdf",
+        )
+
  
  class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
      SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
author	shamoon <4887959+shamoon@users.noreply.github.com>
	Fri, 1 Sep 2023 02:49:00 +0000 (19:49 -0700)
committer	GitHub <noreply@github.com>
	Fri, 1 Sep 2023 02:49:00 +0000 (19:49 -0700)
src/documents/consumer.py		patch \| blob \| blame \| history
src/documents/tests/test_consumer.py		patch \| blob \| blame \| history
src/paperless_tesseract/parsers.py		patch \| blob \| blame \| history
src/paperless_tesseract/tests/test_parser.py		patch \| blob \| blame \| history