Improves the logging mixin and allows it to be typed better

author Trenton H <797416+stumpylog@users.noreply.github.com>

Fri, 19 May 2023 22:23:11 +0000 (15:23 -0700)

committer Trenton H <797416+stumpylog@users.noreply.github.com>

Wed, 24 May 2023 00:16:39 +0000 (17:16 -0700)
author Trenton H <797416+stumpylog@users.noreply.github.com>
Fri, 19 May 2023 22:23:11 +0000 (15:23 -0700)
committer Trenton H <797416+stumpylog@users.noreply.github.com>
Wed, 24 May 2023 00:16:39 +0000 (17:16 -0700)
diff --git a/src/documents/consumer.py b/src/documents/consumer.py

index b5ec584830f860e0d66231f380df276c60ed35e4..7c4ddf003970b0ff20a329b665e6a68f4c901e2b 100644 (file)
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -92,7 +92,7 @@ class Consumer(LoggingMixin):
          exception: Optional[Exception] = None,
      ):
          self._send_progress(100, 100, "FAILED", message)
-        self.log("error", log_message or message, exc_info=exc_info)
+        self.log.error(log_message or message, exc_info=exc_info)
          raise ConsumerError(f"{self.filename}: {log_message or message}") from exception
  
      def __init__(self):
@@ -175,7 +175,7 @@ class Consumer(LoggingMixin):
                  f"{settings.PRE_CONSUME_SCRIPT} does not exist.",
              )
  
-        self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
+        self.log.info(f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
  
          working_file_path = str(self.path)
          original_file_path = str(self.original_path)
@@ -218,8 +218,7 @@ class Consumer(LoggingMixin):
                  f"{settings.POST_CONSUME_SCRIPT} does not exist.",
              )
  
-        self.log(
-            "info",
+        self.log.info(
              f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}",
          )
  
@@ -311,11 +310,6 @@ class Consumer(LoggingMixin):
  
          self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
  
-        # this is for grouping logging entries for this particular file
-        # together.
-
-        self.renew_logging_group()
-
          # Make sure that preconditions for consuming the file are met.
  
          self.pre_check_file_exists()
@@ -323,7 +317,7 @@ class Consumer(LoggingMixin):
          self.pre_check_duplicate()
          self.pre_check_asn_value()
  
-        self.log("info", f"Consuming {self.filename}")
+        self.log.info(f"Consuming {self.filename}")
  
          # For the actual work, copy the file into a tempdir
          self.original_path = self.path
@@ -338,7 +332,7 @@ class Consumer(LoggingMixin):
  
          mime_type = magic.from_file(self.path, mime=True)
  
-        self.log("debug", f"Detected mime type: {mime_type}")
+        self.log.debug(f"Detected mime type: {mime_type}")
  
          # Based on the mime type, get the parser for that type
          parser_class: Optional[Type[DocumentParser]] = get_parser_class_for_mime_type(
@@ -370,7 +364,7 @@ class Consumer(LoggingMixin):
              progress_callback,
          )
  
-        self.log("debug", f"Parser: {type(document_parser).__name__}")
+        self.log.debug(f"Parser: {type(document_parser).__name__}")
  
          # However, this already created working directories which we have to
          # clean up.
@@ -384,10 +378,10 @@ class Consumer(LoggingMixin):
  
          try:
              self._send_progress(20, 100, "WORKING", MESSAGE_PARSING_DOCUMENT)
-            self.log("debug", f"Parsing {self.filename}...")
+            self.log.debug(f"Parsing {self.filename}...")
              document_parser.parse(self.path, mime_type, self.filename)
  
-            self.log("debug", f"Generating thumbnail for {self.filename}...")
+            self.log.debug(f"Generating thumbnail for {self.filename}...")
              self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
              thumbnail = document_parser.get_thumbnail(
                  self.path,
@@ -475,7 +469,7 @@ class Consumer(LoggingMixin):
                  document.save()
  
                  # Delete the file only if it was successfully consumed
-                self.log("debug", f"Deleting file {self.path}")
+                self.log.debug(f"Deleting file {self.path}")
                  os.unlink(self.path)
                  self.original_path.unlink()
  
@@ -486,7 +480,7 @@ class Consumer(LoggingMixin):
                  )
  
                  if os.path.isfile(shadow_file):
-                    self.log("debug", f"Deleting file {shadow_file}")
+                    self.log.debug(f"Deleting file {shadow_file}")
                      os.unlink(shadow_file)
  
          except Exception as e:
@@ -503,7 +497,7 @@ class Consumer(LoggingMixin):
  
          self.run_post_consume_script(document)
  
-        self.log("info", f"Document {document} consumption finished")
+        self.log.info(f"Document {document} consumption finished")
  
          self._send_progress(100, 100, "SUCCESS", MESSAGE_FINISHED, document.id)
  
@@ -522,26 +516,25 @@ class Consumer(LoggingMixin):
  
          file_info = FileInfo.from_filename(self.filename)
  
-        self.log("debug", "Saving record to database")
+        self.log.debug("Saving record to database")
  
          if self.override_created is not None:
              create_date = self.override_created
-            self.log(
-                "debug",
+            self.log.debug(
                  f"Creation date from post_documents parameter: {create_date}",
              )
          elif file_info.created is not None:
              create_date = file_info.created
-            self.log("debug", f"Creation date from FileInfo: {create_date}")
+            self.log.debug(f"Creation date from FileInfo: {create_date}")
          elif date is not None:
              create_date = date
-            self.log("debug", f"Creation date from parse_date: {create_date}")
+            self.log.debug(f"Creation date from parse_date: {create_date}")
          else:
              stats = os.stat(self.path)
              create_date = timezone.make_aware(
                  datetime.datetime.fromtimestamp(stats.st_mtime),
              )
-            self.log("debug", f"Creation date from st_mtime: {create_date}")
+            self.log.debug(f"Creation date from st_mtime: {create_date}")
  
          storage_type = Document.STORAGE_TYPE_UNENCRYPTED
  
@@ -595,8 +588,7 @@ class Consumer(LoggingMixin):
          Decodes a process stdout and stderr streams and logs them to the main log
          """
          # Log what the script exited as
-        self.log(
-            "info",
+        self.log.info(
              f"{completed_process.args[0]} exited {completed_process.returncode}",
          )
  
@@ -609,9 +601,9 @@ class Consumer(LoggingMixin):
                      "\n",
                  )
              )
-            self.log("info", "Script stdout:")
+            self.log.info("Script stdout:")
              for line in stdout_str:
-                self.log("info", line)
+                self.log.info(line)
  
          if len(completed_process.stderr):
              stderr_str = (
@@ -622,6 +614,6 @@ class Consumer(LoggingMixin):
                  )
              )
  
-            self.log("warning", "Script stderr:")
+            self.log.warning("Script stderr:")
              for line in stderr_str:
-                self.log("warning", line)
+                self.log.warning(line)
diff --git a/src/documents/loggers.py b/src/documents/loggers.py

index 0f03135d530ddc4a2448a25c46bb65b72d85b427..0fc5cc162abacb9ec8b1a9882209b31ab443c376 100644 (file)
--- a/src/documents/loggers.py
+++ b/src/documents/loggers.py
@@ -3,18 +3,16 @@ import uuid
  
  
  class LoggingMixin:
-    logging_group = None
-
-    logging_name = None
+    def __init__(self) -> None:
+        self.renew_logging_group()
  
      def renew_logging_group(self):
+        """
+        Creates a new UUID to group subsequent log calls together with
+        the extra data named group
+        """
          self.logging_group = uuid.uuid4()
-
-    def log(self, level, message, **kwargs):
-        if self.logging_name:
-            logger = logging.getLogger(self.logging_name)
-        else:
-            name = ".".join([self.__class__.__module__, self.__class__.__name__])
-            logger = logging.getLogger(name)
-
-        getattr(logger, level)(message, extra={"group": self.logging_group}, **kwargs)
+        self.log = logging.LoggerAdapter(
+            logging.getLogger(self.logging_name),
+            extra={"group": self.logging_group},
+        )
diff --git a/src/documents/parsers.py b/src/documents/parsers.py

index 93d78832e7526bbfc7f839821418cb13ca350bc5..265219412bccfbe32fac280d9d42b6846cd8a395 100644 (file)
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -328,7 +328,7 @@ class DocumentParser(LoggingMixin):
          try:
              text = filepath.read_text(encoding="utf-8")
          except UnicodeDecodeError as e:
-            self.log("warning", f"Unicode error during text reading, continuing: {e}")
+            self.log.warning(f"Unicode error during text reading, continuing: {e}")
              text = filepath.read_bytes().decode("utf-8", errors="replace")
          return text
  
@@ -354,5 +354,5 @@ class DocumentParser(LoggingMixin):
          return self.date
  
      def cleanup(self):
-        self.log("debug", f"Deleting directory {self.tempdir}")
+        self.log.debug(f"Deleting directory {self.tempdir}")
          shutil.rmtree(self.tempdir)
diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py

index 8ab40f5ac426c8ea804f7db700cfc4f96572bc2d..441cffddfafcd26429c6af35e75a78aa1693a850 100644 (file)
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -832,8 +832,7 @@ class PreConsumeTestCase(TestCase):
                  }
                  self.assertDictEqual(environment, {**environment, **subset})
  
-    @mock.patch("documents.consumer.Consumer.log")
-    def test_script_with_output(self, mocked_log):
+    def test_script_with_output(self):
          """
          GIVEN:
              - A script which outputs to stdout and stderr
@@ -854,14 +853,19 @@ class PreConsumeTestCase(TestCase):
              os.chmod(script.name, st.st_mode | stat.S_IEXEC)
  
              with override_settings(PRE_CONSUME_SCRIPT=script.name):
-                c = Consumer()
-                c.path = "path-to-file"
-                c.run_pre_consume_script()
-
-                mocked_log.assert_called()
-
-                mocked_log.assert_any_call("info", "This message goes to stdout")
-                mocked_log.assert_any_call("warning", "This message goes to stderr")
+                with self.assertLogs("paperless.consumer", level="INFO") as cm:
+                    c = Consumer()
+                    c.path = "path-to-file"
+
+                    c.run_pre_consume_script()
+                    self.assertIn(
+                        "INFO:paperless.consumer:This message goes to stdout",
+                        cm.output,
+                    )
+                    self.assertIn(
+                        "WARNING:paperless.consumer:This message goes to stderr",
+                        cm.output,
+                    )
  
      def test_script_exit_non_zero(self):
          """
diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py

index de03a48616aeb5d24bfe3b861637faf4bc14ca7a..b525ef91d3f3dee87c384a4f69f846d5866863cf 100644 (file)
--- a/src/paperless_mail/mail.py
+++ b/src/paperless_mail/mail.py
@@ -422,7 +422,7 @@ class MailAccountHandler(LoggingMixin):
          try:
              return Correspondent.objects.get_or_create(name=name)[0]
          except DatabaseError as e:
-            self.log("error", f"Error while retrieving correspondent {name}: {e}")
+            self.log.error(f"Error while retrieving correspondent {name}: {e}")
              return None
  
      def _get_title(self, message, att, rule):
@@ -468,7 +468,7 @@ class MailAccountHandler(LoggingMixin):
  
          self.renew_logging_group()
  
-        self.log("debug", f"Processing mail account {account}")
+        self.log.debug(f"Processing mail account {account}")
  
          total_processed_files = 0
          try:
@@ -480,13 +480,12 @@ class MailAccountHandler(LoggingMixin):
                  supports_gmail_labels = "X-GM-EXT-1" in M.client.capabilities
                  supports_auth_plain = "AUTH=PLAIN" in M.client.capabilities
  
-                self.log("debug", f"GMAIL Label Support: {supports_gmail_labels}")
-                self.log("debug", f"AUTH=PLAIN Support: {supports_auth_plain}")
+                self.log.debug(f"GMAIL Label Support: {supports_gmail_labels}")
+                self.log.debug(f"AUTH=PLAIN Support: {supports_auth_plain}")
  
                  mailbox_login(M, account)
  
-                self.log(
-                    "debug",
+                self.log.debug(
                      f"Account {account}: Processing "
                      f"{account.rules.count()} rule(s)",
                  )
@@ -499,16 +498,13 @@ class MailAccountHandler(LoggingMixin):
                              supports_gmail_labels,
                          )
                      except Exception as e:
-                        self.log(
-                            "error",
+                        self.log.exception(
                              f"Rule {rule}: Error while processing rule: {e}",
-                            exc_info=True,
                          )
          except MailError:
              raise
          except Exception as e:
-            self.log(
-                "error",
+            self.log.error(
                  f"Error while retrieving mailbox {account}: {e}",
                  exc_info=False,
              )
@@ -521,21 +517,19 @@ class MailAccountHandler(LoggingMixin):
          rule: MailRule,
          supports_gmail_labels: bool,
      ):
-        self.log("debug", f"Rule {rule}: Selecting folder {rule.folder}")
+        self.log.debug(f"Rule {rule}: Selecting folder {rule.folder}")
  
          try:
              M.folder.set(rule.folder)
          except MailboxFolderSelectError as err:
-            self.log(
-                "error",
+            self.log.error(
                  f"Unable to access folder {rule.folder}, attempting folder listing",
              )
              try:
                  for folder_info in M.folder.list():
-                    self.log("info", f"Located folder: {folder_info.name}")
+                    self.log.info(f"Located folder: {folder_info.name}")
              except Exception as e:
-                self.log(
-                    "error",
+                self.log.error(
                      "Exception during folder listing, unable to provide list folders: "
                      + str(e),
                  )
@@ -547,8 +541,7 @@ class MailAccountHandler(LoggingMixin):
  
          criterias = make_criterias(rule, supports_gmail_labels)
  
-        self.log(
-            "debug",
+        self.log.debug(
              f"Rule {rule}: Searching folder with criteria {str(criterias)}",
          )
  
@@ -572,7 +565,7 @@ class MailAccountHandler(LoggingMixin):
                  uid=message.uid,
                  folder=rule.folder,
              ).exists():
-                self.log("debug", f"Skipping mail {message}, already processed.")
+                self.log.debug(f"Skipping mail {message}, already processed.")
                  continue
  
              try:
@@ -581,13 +574,11 @@ class MailAccountHandler(LoggingMixin):
                  total_processed_files += processed_files
                  mails_processed += 1
              except Exception as e:
-                self.log(
-                    "error",
+                self.log.exception(
                      f"Rule {rule}: Error while processing mail {message.uid}: {e}",
-                    exc_info=True,
                  )
  
-        self.log("debug", f"Rule {rule}: Processed {mails_processed} matching mail(s)")
+        self.log.debug(f"Rule {rule}: Processed {mails_processed} matching mail(s)")
  
          return total_processed_files
  
@@ -602,8 +593,7 @@ class MailAccountHandler(LoggingMixin):
          ):
              return processed_elements
  
-        self.log(
-            "debug",
+        self.log.debug(
              f"Rule {rule}: "
              f"Processing mail {message.subject} from {message.from_} with "
              f"{len(message.attachments)} attachment(s)",
@@ -657,8 +647,7 @@ class MailAccountHandler(LoggingMixin):
                  and rule.attachment_type
                  == MailRule.AttachmentProcessing.ATTACHMENTS_ONLY
              ):
-                self.log(
-                    "debug",
+                self.log.debug(
                      f"Rule {rule}: "
                      f"Skipping attachment {att.filename} "
                      f"with content disposition {att.content_disposition}",
@@ -688,8 +677,7 @@ class MailAccountHandler(LoggingMixin):
                  with open(temp_filename, "wb") as f:
                      f.write(att.payload)
  
-                self.log(
-                    "info",
+                self.log.info(
                      f"Rule {rule}: "
                      f"Consuming attachment {att.filename} from mail "
                      f"{message.subject} from {message.from_}",
@@ -717,8 +705,7 @@ class MailAccountHandler(LoggingMixin):
  
                  processed_attachments += 1
              else:
-                self.log(
-                    "debug",
+                self.log.debug(
                      f"Rule {rule}: "
                      f"Skipping attachment {att.filename} "
                      f"since guessed mime type {mime_type} is not supported "
@@ -784,8 +771,7 @@ class MailAccountHandler(LoggingMixin):
  
              f.write(message.obj.as_bytes())
  
-        self.log(
-            "info",
+        self.log.info(
              f"Rule {rule}: "
              f"Consuming eml from mail "
              f"{message.subject} from {message.from_}",
diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py

index b838290cb196d589560224c0f7ceb64bc4cdb957..ded4d6a9bd80f6100eaec8e00b665f8a15f95df4 100644 (file)
--- a/src/paperless_mail/parsers.py
+++ b/src/paperless_mail/parsers.py
@@ -64,8 +64,7 @@ class MailDocumentParser(DocumentParser):
          try:
              mail = self.get_parsed(document_path)
          except ParseError as e:
-            self.log(
-                "warning",
+            self.log.warning(
                  f"Error while fetching document metadata for {document_path}: {e}",
              )
              return result
@@ -146,7 +145,7 @@ class MailDocumentParser(DocumentParser):
          self.archive_path = self.generate_pdf(document_path)
  
      def tika_parse(self, html: str):
-        self.log("info", "Sending content to Tika server")
+        self.log.info("Sending content to Tika server")
  
          try:
              parsed = parser.from_buffer(html, self.tika_server)
@@ -251,7 +250,7 @@ class MailDocumentParser(DocumentParser):
  
      def generate_pdf_from_mail(self, mail):
          url = self.gotenberg_server + "/forms/chromium/convert/html"
-        self.log("info", "Converting mail to PDF")
+        self.log.info("Converting mail to PDF")
  
          css_file = os.path.join(os.path.dirname(__file__), "templates/output.css")
  
@@ -320,7 +319,7 @@ class MailDocumentParser(DocumentParser):
  
      def generate_pdf_from_html(self, orig_html, attachments):
          url = self.gotenberg_server + "/forms/chromium/convert/html"
-        self.log("info", "Converting html to PDF")
+        self.log.info("Converting html to PDF")
  
          files = {}
          for name, file in self.transform_inline_html(orig_html, attachments):
diff --git a/src/paperless_mail/tests/test_parsers.py b/src/paperless_mail/tests/test_parsers.py

index e6cae1121f2f6611a76ef4a1a0f7f55806dfc6c1..4288bfbae081aa491c8b65888db4e1f046dd959a 100644 (file)
--- a/src/paperless_mail/tests/test_parsers.py
+++ b/src/paperless_mail/tests/test_parsers.py
@@ -129,8 +129,7 @@ class TestParser(FileSystemAssertsMixin, TestCase):
          )
          self.assertEqual(mocked_return, thumb)
  
-    @mock.patch("documents.loggers.LoggingMixin.log")
-    def test_extract_metadata_fail(self, m: mock.MagicMock):
+    def test_extract_metadata_fail(self):
          """
          GIVEN:
              - Fresh start
@@ -140,8 +139,12 @@ class TestParser(FileSystemAssertsMixin, TestCase):
              - A log warning should be generated
          """
          # Validate if warning is logged when parsing fails
-        self.assertEqual([], self.parser.extract_metadata("na", "message/rfc822"))
-        self.assertEqual("warning", m.call_args[0][0])
+        with self.assertLogs("paperless.parsing.mail", level="WARNING") as cm:
+            self.assertEqual([], self.parser.extract_metadata("na", "message/rfc822"))
+            self.assertIn(
+                "WARNING:paperless.parsing.mail:Error while fetching document metadata for na",
+                cm.output[0],
+            )
  
      def test_extract_metadata(self):
          """
diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py

index 151af97dc5700aa950270b8a8e7566b078eaa4fd..b6a721b2f0e78636cd627a8d23416b4361b604c3 100644 (file)
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -54,8 +54,7 @@ class RasterisedDocumentParser(DocumentParser):
                          },
                      )
                  except Exception as e:
-                    self.log(
-                        "warning",
+                    self.log.warning(
                          f"Error while reading metadata {key}: {value}. Error: {e}",
                      )
          return result
@@ -98,7 +97,7 @@ class RasterisedDocumentParser(DocumentParser):
                  x, y = im.info["dpi"]
                  return round(x)
          except Exception as e:
-            self.log("warning", f"Error while getting DPI from image {image}: {e}")
+            self.log.warning(f"Error while getting DPI from image {image}: {e}")
              return None
  
      def calculate_a4_dpi(self, image):
@@ -107,11 +106,11 @@ class RasterisedDocumentParser(DocumentParser):
                  width, height = im.size
                  # divide image width by A4 width (210mm) in inches.
                  dpi = int(width / (21 / 2.54))
-                self.log("debug", f"Estimated DPI {dpi} based on image width {width}")
+                self.log.debug(f"Estimated DPI {dpi} based on image width {width}")
                  return dpi
  
          except Exception as e:
-            self.log("warning", f"Error while calculating DPI for image {image}: {e}")
+            self.log.warning(f"Error while calculating DPI for image {image}: {e}")
              return None
  
      def extract_text(self, sidecar_file: Optional[Path], pdf_file: Path):
@@ -127,10 +126,10 @@ class RasterisedDocumentParser(DocumentParser):
              if "[OCR skipped on page" not in text:
                  # This happens when there's already text in the input file.
                  # The sidecar file will only contain text for OCR'ed pages.
-                self.log("debug", "Using text from sidecar file")
+                self.log.debug("Using text from sidecar file")
                  return post_process_text(text)
              else:
-                self.log("debug", "Incomplete sidecar file: discarding.")
+                self.log.debug("Incomplete sidecar file: discarding.")
  
          # no success with the sidecar file, try PDF
  
@@ -160,8 +159,7 @@ class RasterisedDocumentParser(DocumentParser):
  
          except Exception:
              #  If pdftotext fails, fall back to OCR.
-            self.log(
-                "warning",
+            self.log.warning(
                  "Error while getting text from PDF document with pdftotext",
                  exc_info=True,
              )
@@ -227,15 +225,14 @@ class RasterisedDocumentParser(DocumentParser):
              a4_dpi = self.calculate_a4_dpi(input_file)
  
              if self.has_alpha(input_file):
-                self.log(
-                    "info",
+                self.log.info(
                      f"Removing alpha layer from {input_file} "
                      "for compatibility with img2pdf",
                  )
                  self.remove_alpha(input_file)
  
              if dpi:
-                self.log("debug", f"Detected DPI for image {input_file}: {dpi}")
+                self.log.debug(f"Detected DPI for image {input_file}: {dpi}")
                  ocrmypdf_args["image_dpi"] = dpi
              elif settings.OCR_IMAGE_DPI:
                  ocrmypdf_args["image_dpi"] = settings.OCR_IMAGE_DPI
@@ -253,8 +250,7 @@ class RasterisedDocumentParser(DocumentParser):
                  user_args = json.loads(settings.OCR_USER_ARGS)
                  ocrmypdf_args = {**ocrmypdf_args, **user_args}
              except Exception as e:
-                self.log(
-                    "warning",
+                self.log.warning(
                      f"There is an issue with PAPERLESS_OCR_USER_ARGS, so "
                      f"they will not be used. Error: {e}",
                  )
@@ -263,15 +259,13 @@ class RasterisedDocumentParser(DocumentParser):
              # Convert pixels to mega-pixels and provide to ocrmypdf
              max_pixels_mpixels = settings.OCR_MAX_IMAGE_PIXELS / 1_000_000.0
              if max_pixels_mpixels > 0:
-                self.log(
-                    "debug",
+                self.log.debug(
                      f"Calculated {max_pixels_mpixels} megapixels for OCR",
                  )
  
                  ocrmypdf_args["max_image_mpixels"] = max_pixels_mpixels
              else:
-                self.log(
-                    "warning",
+                self.log.warning(
                      "There is an issue with PAPERLESS_OCR_MAX_IMAGE_PIXELS, "
                      "this value must be at least 1 megapixel if set",
                  )
@@ -299,7 +293,7 @@ class RasterisedDocumentParser(DocumentParser):
              or settings.OCR_SKIP_ARCHIVE_FILE in ["with_text", "always"]
          )
          if skip_archive_for_text and original_has_text:
-            self.log("debug", "Document has text, skipping OCRmyPDF entirely.")
+            self.log.debug("Document has text, skipping OCRmyPDF entirely.")
              self.text = text_original
              return
  
@@ -322,7 +316,7 @@ class RasterisedDocumentParser(DocumentParser):
          )
  
          try:
-            self.log("debug", f"Calling OCRmyPDF with args: {args}")
+            self.log.debug(f"Calling OCRmyPDF with args: {args}")
              ocrmypdf.ocr(**args)
  
              if settings.OCR_SKIP_ARCHIVE_FILE != "always":
@@ -333,16 +327,14 @@ class RasterisedDocumentParser(DocumentParser):
              if not self.text:
                  raise NoTextFoundException("No text was found in the original document")
          except EncryptedPdfError:
-            self.log(
-                "warning",
+            self.log.warning(
                  "This file is encrypted, OCR is impossible. Using "
                  "any text present in the original file.",
              )
              if original_has_text:
                  self.text = text_original
          except (NoTextFoundException, InputFileError) as e:
-            self.log(
-                "warning",
+            self.log.warning(
                  f"Encountered an error while running OCR: {str(e)}. "
                  f"Attempting force OCR to get the text.",
              )
@@ -365,7 +357,7 @@ class RasterisedDocumentParser(DocumentParser):
              )
  
              try:
-                self.log("debug", f"Fallback: Calling OCRmyPDF with args: {args}")
+                self.log.debug(f"Fallback: Calling OCRmyPDF with args: {args}")
                  ocrmypdf.ocr(**args)
  
                  # Don't return the archived file here, since this file
@@ -390,8 +382,7 @@ class RasterisedDocumentParser(DocumentParser):
              if original_has_text:
                  self.text = text_original
              else:
-                self.log(
-                    "warning",
+                self.log.warning(
                      f"No text was found in {document_path}, the content will "
                      f"be empty.",
                  )
diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py

index 779abbe59bcb9f3aa27e3f3a9bad22b213b2267e..d71f038a3f68337ab68e81295076282c125b505e 100644 (file)
--- a/src/paperless_tika/parsers.py
+++ b/src/paperless_tika/parsers.py
@@ -38,8 +38,7 @@ class TikaDocumentParser(DocumentParser):
          try:
              parsed = parser.from_file(document_path, tika_server)
          except Exception as e:
-            self.log(
-                "warning",
+            self.log.warning(
                  f"Error while fetching document metadata for {document_path}: {e}",
              )
              return []
@@ -55,7 +54,7 @@ class TikaDocumentParser(DocumentParser):
          ]
  
      def parse(self, document_path: Path, mime_type, file_name=None):
-        self.log("info", f"Sending {document_path} to Tika server")
+        self.log.info(f"Sending {document_path} to Tika server")
          tika_server = settings.TIKA_ENDPOINT
  
          # tika does not support a PathLike, only strings
@@ -75,8 +74,7 @@ class TikaDocumentParser(DocumentParser):
          try:
              self.date = dateutil.parser.isoparse(parsed["metadata"]["Creation-Date"])
          except Exception as e:
-            self.log(
-                "warning",
+            self.log.warning(
                  f"Unable to extract date for document {document_path}: {e}",
              )
  
@@ -87,7 +85,7 @@ class TikaDocumentParser(DocumentParser):
          gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
          url = gotenberg_server + "/forms/libreoffice/convert"
  
-        self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
+        self.log.info(f"Converting {document_path} to PDF as {pdf_path}")
          with open(document_path, "rb") as document_handle:
              files = {
                  "files": (
author	Trenton H <797416+stumpylog@users.noreply.github.com>
	Fri, 19 May 2023 22:23:11 +0000 (15:23 -0700)
committer	Trenton H <797416+stumpylog@users.noreply.github.com>
	Wed, 24 May 2023 00:16:39 +0000 (17:16 -0700)
src/documents/consumer.py		patch \| blob \| blame \| history
src/documents/loggers.py		patch \| blob \| blame \| history
src/documents/parsers.py		patch \| blob \| blame \| history
src/documents/tests/test_consumer.py		patch \| blob \| blame \| history
src/paperless_mail/mail.py		patch \| blob \| blame \| history
src/paperless_mail/parsers.py		patch \| blob \| blame \| history
src/paperless_mail/tests/test_parsers.py		patch \| blob \| blame \| history
src/paperless_tesseract/parsers.py		patch \| blob \| blame \| history
src/paperless_tika/parsers.py		patch \| blob \| blame \| history