Small improvement to the consumer status with stronger typing

author Trenton Holmes <797416+stumpylog@users.noreply.github.com>

Sat, 22 Jul 2023 23:51:05 +0000 (16:51 -0700)

committer Trenton H <797416+stumpylog@users.noreply.github.com>

Wed, 26 Jul 2023 14:03:43 +0000 (07:03 -0700)
author Trenton Holmes <797416+stumpylog@users.noreply.github.com>
Sat, 22 Jul 2023 23:51:05 +0000 (16:51 -0700)
committer Trenton H <797416+stumpylog@users.noreply.github.com>
Wed, 26 Jul 2023 14:03:43 +0000 (07:03 -0700)
diff --git a/src-ui/src/app/services/consumer-status.service.ts b/src-ui/src/app/services/consumer-status.service.ts

index 51c4292b4fb06f7e6acf78fbfdaa973aee0f6f5a..090d8043345b3b6247408465de9e9449e966d6c5 100644 (file)
--- a/src-ui/src/app/services/consumer-status.service.ts
+++ b/src-ui/src/app/services/consumer-status.service.ts
@@ -164,7 +164,7 @@ export class ConsumerStatusService {
        }
        status.documentId = statusMessage.document_id
  
-      if (created && statusMessage.status == 'STARTING') {
+      if (created && statusMessage.status == 'STARTED') {
          this.documentDetectedSubject.next(status)
        }
        if (statusMessage.status == 'SUCCESS') {
diff --git a/src/documents/consumer.py b/src/documents/consumer.py

index c2669c00a247f082c9050475bacc0bc442b34226..6fa830101784e04e6babdcb3198edcac1f34c430 100644 (file)
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -3,6 +3,7 @@ import hashlib
  import os
  import tempfile
  import uuid
+from enum import Enum
  from pathlib import Path
  from subprocess import CompletedProcess
  from subprocess import run
@@ -44,21 +45,30 @@ class ConsumerError(Exception):
      pass
  
  
-MESSAGE_DOCUMENT_ALREADY_EXISTS = "document_already_exists"
-MESSAGE_ASN_ALREADY_EXISTS = "asn_already_exists"
-MESSAGE_ASN_RANGE = "asn_value_out_of_range"
-MESSAGE_FILE_NOT_FOUND = "file_not_found"
-MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND = "pre_consume_script_not_found"
-MESSAGE_PRE_CONSUME_SCRIPT_ERROR = "pre_consume_script_error"
-MESSAGE_POST_CONSUME_SCRIPT_NOT_FOUND = "post_consume_script_not_found"
-MESSAGE_POST_CONSUME_SCRIPT_ERROR = "post_consume_script_error"
-MESSAGE_NEW_FILE = "new_file"
-MESSAGE_UNSUPPORTED_TYPE = "unsupported_type"
-MESSAGE_PARSING_DOCUMENT = "parsing_document"
-MESSAGE_GENERATING_THUMBNAIL = "generating_thumbnail"
-MESSAGE_PARSE_DATE = "parse_date"
-MESSAGE_SAVE_DOCUMENT = "save_document"
-MESSAGE_FINISHED = "finished"
+class ConsumerStatusShortMessage(str, Enum):
+    DOCUMENT_ALREADY_EXISTS = "document_already_exists"
+    ASN_ALREADY_EXISTS = "asn_already_exists"
+    ASN_RANGE = "asn_value_out_of_range"
+    FILE_NOT_FOUND = "file_not_found"
+    PRE_CONSUME_SCRIPT_NOT_FOUND = "pre_consume_script_not_found"
+    PRE_CONSUME_SCRIPT_ERROR = "pre_consume_script_error"
+    POST_CONSUME_SCRIPT_NOT_FOUND = "post_consume_script_not_found"
+    POST_CONSUME_SCRIPT_ERROR = "post_consume_script_error"
+    NEW_FILE = "new_file"
+    UNSUPPORTED_TYPE = "unsupported_type"
+    PARSING_DOCUMENT = "parsing_document"
+    GENERATING_THUMBNAIL = "generating_thumbnail"
+    PARSE_DATE = "parse_date"
+    SAVE_DOCUMENT = "save_document"
+    FINISHED = "finished"
+    FAILED = "failed"
+
+
+class ConsumerFilePhase(str, Enum):
+    STARTED = "STARTED"
+    WORKING = "WORKING"
+    SUCCESS = "SUCCESS"
+    FAILED = "FAILED"
  
  
  class Consumer(LoggingMixin):
@@ -66,10 +76,10 @@ class Consumer(LoggingMixin):
  
      def _send_progress(
          self,
-        current_progress,
-        max_progress,
-        status,
-        message=None,
+        current_progress: int,
+        max_progress: int,
+        status: ConsumerFilePhase,
+        message: Optional[ConsumerStatusShortMessage] = None,
          document_id=None,
      ):  # pragma: no cover
          payload = {
@@ -88,12 +98,12 @@ class Consumer(LoggingMixin):
  
      def _fail(
          self,
-        message,
-        log_message=None,
+        message: ConsumerStatusShortMessage,
+        log_message: Optional[str] = None,
          exc_info=None,
          exception: Optional[Exception] = None,
      ):
-        self._send_progress(100, 100, "FAILED", message)
+        self._send_progress(100, 100, ConsumerFilePhase.FAILED, message)
          self.log.error(log_message or message, exc_info=exc_info)
          raise ConsumerError(f"{self.filename}: {log_message or message}") from exception
  
@@ -113,13 +123,19 @@ class Consumer(LoggingMixin):
          self.channel_layer = get_channel_layer()
  
      def pre_check_file_exists(self):
+        """
+        Confirm the input file still exists where it should
+        """
          if not os.path.isfile(self.path):
              self._fail(
-                MESSAGE_FILE_NOT_FOUND,
+                ConsumerStatusShortMessage.FILE_NOT_FOUND,
                  f"Cannot consume {self.path}: File not found.",
              )
  
      def pre_check_duplicate(self):
+        """
+        Using the MD5 of the file, check this exact file doesn't already exist
+        """
          with open(self.path, "rb") as f:
              checksum = hashlib.md5(f.read()).hexdigest()
          existing_doc = Document.objects.filter(
@@ -129,12 +145,15 @@ class Consumer(LoggingMixin):
              if settings.CONSUMER_DELETE_DUPLICATES:
                  os.unlink(self.path)
              self._fail(
-                MESSAGE_DOCUMENT_ALREADY_EXISTS,
+                ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS,
                  f"Not consuming {self.filename}: It is a duplicate of"
                  f" {existing_doc.get().title} (#{existing_doc.get().pk})",
              )
  
      def pre_check_directories(self):
+        """
+        Ensure all required directories exist before attempting to use them
+        """
          os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
          os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
          os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
@@ -154,7 +173,7 @@ class Consumer(LoggingMixin):
              or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
          ):
              self._fail(
-                MESSAGE_ASN_RANGE,
+                ConsumerStatusShortMessage.ASN_RANGE,
                  f"Not consuming {self.filename}: "
                  f"Given ASN {self.override_asn} is out of range "
                  f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
@@ -162,17 +181,21 @@ class Consumer(LoggingMixin):
              )
          if Document.objects.filter(archive_serial_number=self.override_asn).exists():
              self._fail(
-                MESSAGE_ASN_ALREADY_EXISTS,
+                ConsumerStatusShortMessage.ASN_ALREADY_EXISTS,
                  f"Not consuming {self.filename}: Given ASN already exists!",
              )
  
      def run_pre_consume_script(self):
+        """
+        If one is configured and exists, run the pre-consume script and
+        handle its output and/or errors
+        """
          if not settings.PRE_CONSUME_SCRIPT:
              return
  
          if not os.path.isfile(settings.PRE_CONSUME_SCRIPT):
              self._fail(
-                MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND,
+                ConsumerStatusShortMessage.PRE_CONSUME_SCRIPT_NOT_FOUND,
                  f"Configured pre-consume script "
                  f"{settings.PRE_CONSUME_SCRIPT} does not exist.",
              )
@@ -203,19 +226,23 @@ class Consumer(LoggingMixin):
  
          except Exception as e:
              self._fail(
-                MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
+                ConsumerStatusShortMessage.PRE_CONSUME_SCRIPT_ERROR,
                  f"Error while executing pre-consume script: {e}",
                  exc_info=True,
                  exception=e,
              )
  
      def run_post_consume_script(self, document: Document):
+        """
+        If one is configured and exists, run the pre-consume script and
+        handle its output and/or errors
+        """
          if not settings.POST_CONSUME_SCRIPT:
              return
  
          if not os.path.isfile(settings.POST_CONSUME_SCRIPT):
              self._fail(
-                MESSAGE_POST_CONSUME_SCRIPT_NOT_FOUND,
+                ConsumerStatusShortMessage.POST_CONSUME_SCRIPT_NOT_FOUND,
                  f"Configured post-consume script "
                  f"{settings.POST_CONSUME_SCRIPT} does not exist.",
              )
@@ -276,7 +303,7 @@ class Consumer(LoggingMixin):
  
          except Exception as e:
              self._fail(
-                MESSAGE_POST_CONSUME_SCRIPT_ERROR,
+                ConsumerStatusShortMessage.POST_CONSUME_SCRIPT_ERROR,
                  f"Error while executing post-consume script: {e}",
                  exc_info=True,
                  exception=e,
@@ -310,7 +337,12 @@ class Consumer(LoggingMixin):
          self.override_asn = override_asn
          self.override_owner_id = override_owner_id
  
-        self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
+        self._send_progress(
+            0,
+            100,
+            ConsumerFilePhase.STARTED,
+            ConsumerStatusShortMessage.NEW_FILE,
+        )
  
          # Make sure that preconditions for consuming the file are met.
  
@@ -342,7 +374,10 @@ class Consumer(LoggingMixin):
          )
          if not parser_class:
              tempdir.cleanup()
-            self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
+            self._fail(
+                ConsumerStatusShortMessage.UNSUPPORTED_TYPE,
+                f"Unsupported mime type {mime_type}",
+            )
  
          # Notify all listeners that we're going to do some work.
  
@@ -357,7 +392,7 @@ class Consumer(LoggingMixin):
          def progress_callback(current_progress, max_progress):  # pragma: no cover
              # recalculate progress to be within 20 and 80
              p = int((current_progress / max_progress) * 50 + 20)
-            self._send_progress(p, 100, "WORKING")
+            self._send_progress(p, 100, ConsumerFilePhase.WORKING)
  
          # This doesn't parse the document yet, but gives us a parser.
  
@@ -379,12 +414,22 @@ class Consumer(LoggingMixin):
          archive_path = None
  
          try:
-            self._send_progress(20, 100, "WORKING", MESSAGE_PARSING_DOCUMENT)
+            self._send_progress(
+                20,
+                100,
+                ConsumerFilePhase.WORKING,
+                ConsumerStatusShortMessage.PARSING_DOCUMENT,
+            )
              self.log.debug(f"Parsing {self.filename}...")
              document_parser.parse(self.path, mime_type, self.filename)
  
              self.log.debug(f"Generating thumbnail for {self.filename}...")
-            self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
+            self._send_progress(
+                70,
+                100,
+                ConsumerFilePhase.WORKING,
+                ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
+            )
              thumbnail = document_parser.get_thumbnail(
                  self.path,
                  mime_type,
@@ -394,7 +439,12 @@ class Consumer(LoggingMixin):
              text = document_parser.get_text()
              date = document_parser.get_date()
              if date is None:
-                self._send_progress(90, 100, "WORKING", MESSAGE_PARSE_DATE)
+                self._send_progress(
+                    90,
+                    100,
+                    ConsumerFilePhase.WORKING,
+                    ConsumerStatusShortMessage.PARSE_DATE,
+                )
                  date = parse_date(self.filename, text)
              archive_path = document_parser.get_archive_path()
  
@@ -416,7 +466,12 @@ class Consumer(LoggingMixin):
  
          classifier = load_classifier()
  
-        self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
+        self._send_progress(
+            95,
+            100,
+            ConsumerFilePhase.WORKING,
+            ConsumerStatusShortMessage.SAVE_DOCUMENT,
+        )
          # now that everything is done, we can start to store the document
          # in the system. This will be a transaction and reasonably fast.
          try:
@@ -501,7 +556,13 @@ class Consumer(LoggingMixin):
  
          self.log.info(f"Document {document} consumption finished")
  
-        self._send_progress(100, 100, "SUCCESS", MESSAGE_FINISHED, document.id)
+        self._send_progress(
+            100,
+            100,
+            ConsumerFilePhase.SUCCESS,
+            ConsumerStatusShortMessage.FINISHED,
+            document.id,
+        )
  
          # Return the most up to date fields
          document.refresh_from_db()
diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py

index 13806635582430a07e5f832712e966a54bfb3063..a8f427c37f36df7e9bbb3d759eeb9c063d2cd866 100644 (file)
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -21,6 +21,7 @@ from django.utils import timezone
  
  from documents.consumer import Consumer
  from documents.consumer import ConsumerError
+from documents.consumer import ConsumerFilePhase
  from documents.models import Correspondent
  from documents.models import Document
  from documents.models import DocumentType
@@ -228,8 +229,8 @@ def fake_magic_from_file(file, mime=False):
  class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
      def _assert_first_last_send_progress(
          self,
-        first_status="STARTING",
-        last_status="SUCCESS",
+        first_status=ConsumerFilePhase.STARTED,
+        last_status=ConsumerFilePhase.SUCCESS,
          first_progress=0,
          first_progress_max=100,
          last_progress=100,
author	Trenton Holmes <797416+stumpylog@users.noreply.github.com>
	Sat, 22 Jul 2023 23:51:05 +0000 (16:51 -0700)
committer	Trenton H <797416+stumpylog@users.noreply.github.com>
	Wed, 26 Jul 2023 14:03:43 +0000 (07:03 -0700)
src-ui/src/app/services/consumer-status.service.ts		patch \| blob \| blame \| history
src/documents/consumer.py		patch \| blob \| blame \| history
src/documents/tests/test_consumer.py		patch \| blob \| blame \| history