FAILED = "failed"
-class ConsumerPlugin(
- AlwaysRunPluginMixin,
- NoSetupPluginMixin,
- NoCleanupPluginMixin,
- LoggingMixin,
- ConsumeTaskPlugin,
-):
- logging_name = "paperless.consumer"
-
+class ConsumerPluginMixin:
def __init__(
self,
input_doc: ConsumableDocument,
self.log.error(log_message or message, exc_info=exc_info)
raise ConsumerError(f"{self.filename}: {log_message or message}") from exception
- def pre_check_file_exists(self):
- """
- Confirm the input file still exists where it should
- """
- if TYPE_CHECKING:
- assert isinstance(self.input_doc.original_file, Path), (
- self.input_doc.original_file
- )
- if not self.input_doc.original_file.is_file():
- self._fail(
- ConsumerStatusShortMessage.FILE_NOT_FOUND,
- f"Cannot consume {self.input_doc.original_file}: File not found.",
- )
- def pre_check_duplicate(self):
- """
- Using the MD5 of the file, check this exact file doesn't already exist
- """
- with Path(self.input_doc.original_file).open("rb") as f:
- checksum = hashlib.md5(f.read()).hexdigest()
- existing_doc = Document.global_objects.filter(
- Q(checksum=checksum) | Q(archive_checksum=checksum),
- )
- if existing_doc.exists():
- msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
- log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
-
- if existing_doc.first().deleted_at is not None:
- msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
- log_msg += " Note: existing document is in the trash."
-
- if settings.CONSUMER_DELETE_DUPLICATES:
- Path(self.input_doc.original_file).unlink()
- self._fail(
- msg,
- log_msg,
- )
-
- def pre_check_directories(self):
- """
- Ensure all required directories exist before attempting to use them
- """
- settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
- settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
- settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
- settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
-
- def pre_check_asn_value(self):
- """
- Check that if override_asn is given, it is unique and within a valid range
- """
- if self.metadata.asn is None:
- # check not necessary in case no ASN gets set
- return
- # Validate the range is above zero and less than uint32_t max
- # otherwise, Whoosh can't handle it in the index
- if (
- self.metadata.asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
- or self.metadata.asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
- ):
- self._fail(
- ConsumerStatusShortMessage.ASN_RANGE,
- f"Not consuming {self.filename}: "
- f"Given ASN {self.metadata.asn} is out of range "
- f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
- f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
- )
- existing_asn_doc = Document.global_objects.filter(
- archive_serial_number=self.metadata.asn,
- )
- if existing_asn_doc.exists():
- msg = ConsumerStatusShortMessage.ASN_ALREADY_EXISTS
- log_msg = f"Not consuming {self.filename}: Given ASN {self.metadata.asn} already exists!"
-
- if existing_asn_doc.first().deleted_at is not None:
- msg = ConsumerStatusShortMessage.ASN_ALREADY_EXISTS_IN_TRASH
- log_msg += " Note: existing document is in the trash."
-
- self._fail(
- msg,
- log_msg,
- )
+class ConsumerPlugin(
+ AlwaysRunPluginMixin,
+ NoSetupPluginMixin,
+ NoCleanupPluginMixin,
+ LoggingMixin,
+ ConsumerPluginMixin,
+ ConsumeTaskPlugin,
+):
+ logging_name = "paperless.consumer"
def run_pre_consume_script(self):
"""
tempdir = None
try:
- self._send_progress(
- 0,
- 100,
- ProgressStatusOptions.STARTED,
- ConsumerStatusShortMessage.NEW_FILE,
- )
-
- # Make sure that preconditions for consuming the file are met.
-
- self.pre_check_file_exists()
- self.pre_check_directories()
- self.pre_check_duplicate()
- self.pre_check_asn_value()
-
+ # Preflight has already run including progress update to 0%
self.log.info(f"Consuming {self.filename}")
# For the actual work, copy the file into a tempdir
copy_basic_file_stats(source, target)
except Exception: # pragma: no cover
pass
+
+
+class ConsumerPreflightPlugin(
+ NoCleanupPluginMixin,
+ NoSetupPluginMixin,
+ AlwaysRunPluginMixin,
+ LoggingMixin,
+ ConsumerPluginMixin,
+ ConsumeTaskPlugin,
+):
+ NAME: str = "ConsumerPreflightPlugin"
+ logging_name = "paperless.consumer"
+
+ def pre_check_file_exists(self):
+ """
+ Confirm the input file still exists where it should
+ """
+ if TYPE_CHECKING:
+ assert isinstance(self.input_doc.original_file, Path), (
+ self.input_doc.original_file
+ )
+ if not self.input_doc.original_file.is_file():
+ self._fail(
+ ConsumerStatusShortMessage.FILE_NOT_FOUND,
+ f"Cannot consume {self.input_doc.original_file}: File not found.",
+ )
+
+ def pre_check_duplicate(self):
+ """
+ Using the MD5 of the file, check this exact file doesn't already exist
+ """
+ with Path(self.input_doc.original_file).open("rb") as f:
+ checksum = hashlib.md5(f.read()).hexdigest()
+ existing_doc = Document.global_objects.filter(
+ Q(checksum=checksum) | Q(archive_checksum=checksum),
+ )
+ if existing_doc.exists():
+ msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
+ log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
+
+ if existing_doc.first().deleted_at is not None:
+ msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
+ log_msg += " Note: existing document is in the trash."
+
+ if settings.CONSUMER_DELETE_DUPLICATES:
+ Path(self.input_doc.original_file).unlink()
+ self._fail(
+ msg,
+ log_msg,
+ )
+
+ def pre_check_directories(self):
+ """
+ Ensure all required directories exist before attempting to use them
+ """
+ settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
+ settings.THUMBNAIL_DIR.mkdir(parents=True, exist_ok=True)
+ settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
+ settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
+
+ def pre_check_asn_value(self):
+ """
+ Check that if override_asn is given, it is unique and within a valid range
+ """
+ if self.metadata.asn is None:
+ # check not necessary in case no ASN gets set
+ return
+ # Validate the range is above zero and less than uint32_t max
+ # otherwise, Whoosh can't handle it in the index
+ if (
+ self.metadata.asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
+ or self.metadata.asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
+ ):
+ self._fail(
+ ConsumerStatusShortMessage.ASN_RANGE,
+ f"Not consuming {self.filename}: "
+ f"Given ASN {self.metadata.asn} is out of range "
+ f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
+ f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
+ )
+ existing_asn_doc = Document.global_objects.filter(
+ archive_serial_number=self.metadata.asn,
+ )
+ if existing_asn_doc.exists():
+ msg = ConsumerStatusShortMessage.ASN_ALREADY_EXISTS
+ log_msg = f"Not consuming {self.filename}: Given ASN {self.metadata.asn} already exists!"
+
+ if existing_asn_doc.first().deleted_at is not None:
+ msg = ConsumerStatusShortMessage.ASN_ALREADY_EXISTS_IN_TRASH
+ log_msg += " Note: existing document is in the trash."
+
+ self._fail(
+ msg,
+ log_msg,
+ )
+
+ def run(self) -> None:
+ self._send_progress(
+ 0,
+ 100,
+ ProgressStatusOptions.STARTED,
+ ConsumerStatusShortMessage.NEW_FILE,
+ )
+
+ # Make sure that preconditions for consuming the file are met.
+
+ self.pre_check_file_exists()
+ self.pre_check_duplicate()
+ self.pre_check_directories()
+ self.pre_check_asn_value()
self._assert_first_last_send_progress()
def testNotAFile(self):
- with self.get_consumer(Path("non-existing-file")) as consumer:
- with self.assertRaisesMessage(ConsumerError, "File not found"):
+ with self.assertRaisesMessage(ConsumerError, "File not found"):
+ with self.get_consumer(Path("non-existing-file")) as consumer:
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED")
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
- with self.get_consumer(self.get_test_file()) as consumer:
- with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
+ with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
+ with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED")
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
- with self.get_consumer(self.get_test_archive_file()) as consumer:
- with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
+ with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
+ with self.get_consumer(self.get_test_archive_file()) as consumer:
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED")
Document.objects.all().delete()
- with self.get_consumer(self.get_test_file()) as consumer:
- with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
+ with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
+ with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
def testAsnExists(self):
) as consumer:
consumer.run()
- with self.get_consumer(
- self.get_test_file2(),
- DocumentMetadataOverrides(asn=123),
- ) as consumer:
- with self.assertRaisesMessage(ConsumerError, "ASN 123 already exists"):
+ with self.assertRaisesMessage(ConsumerError, "ASN 123 already exists"):
+ with self.get_consumer(
+ self.get_test_file2(),
+ DocumentMetadataOverrides(asn=123),
+ ) as consumer:
consumer.run()
def testAsnExistsInTrash(self):
document = Document.objects.first()
document.delete()
- with self.get_consumer(
- self.get_test_file2(),
- DocumentMetadataOverrides(asn=123),
- ) as consumer:
- with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
+ with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
+ with self.get_consumer(
+ self.get_test_file2(),
+ DocumentMetadataOverrides(asn=123),
+ ) as consumer:
consumer.run()
@mock.patch("documents.parsers.document_consumer_declaration.send")
def testNoParsers(self, m):
m.return_value = []
- with self.get_consumer(self.get_test_file()) as consumer:
- with self.assertRaisesMessage(
- ConsumerError,
- "sample.pdf: Unsupported mime type application/pdf",
- ):
+ with self.assertRaisesMessage(
+ ConsumerError,
+ "sample.pdf: Unsupported mime type application/pdf",
+ ):
+ with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED")
dst = self.get_test_file()
self.assertIsFile(dst)
- with self.get_consumer(dst) as consumer:
- with self.assertRaises(ConsumerError):
+ with self.assertRaises(ConsumerError):
+ with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
dst = self.get_test_file()
self.assertIsFile(dst)
- with self.get_consumer(dst) as consumer:
- with self.assertRaisesRegex(
- ConsumerError,
- r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
- ):
+ with self.assertRaisesRegex(
+ ConsumerError,
+ r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
+ ):
+ with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsFile(dst)