import logging
import re
from fnmatch import fnmatch
+from fnmatch import translate as fnmatch_translate
from typing import TYPE_CHECKING
from documents.data_models import ConsumableDocument
from documents.permissions import get_objects_for_user_owner_aware
if TYPE_CHECKING:
+ from django.db.models import QuerySet
+
from documents.classifier import DocumentClassifier
logger = logging.getLogger("paperless.matching")
return (trigger_matched, reason)
+def prefilter_documents_by_workflowtrigger(
+ documents: QuerySet[Document],
+ trigger: WorkflowTrigger,
+) -> QuerySet[Document]:
+ """
+ To prevent scheduled workflows checking every document, we prefilter the
+ documents by the workflow trigger filters. This is done before e.g.
+ document_matches_workflow in run_workflows
+ """
+
+ if trigger.filter_has_tags.all().count() > 0:
+ documents = documents.filter(
+ tags__in=trigger.filter_has_tags.all(),
+ ).distinct()
+
+ if trigger.filter_has_correspondent is not None:
+ documents = documents.filter(
+ correspondent=trigger.filter_has_correspondent,
+ )
+
+ if trigger.filter_has_document_type is not None:
+ documents = documents.filter(
+ document_type=trigger.filter_has_document_type,
+ )
+
+ if trigger.filter_filename is not None and len(trigger.filter_filename) > 0:
+ # the true fnmatch will actually run later so we just want a loose filter here
+ regex = fnmatch_translate(trigger.filter_filename).lstrip("^").rstrip("$")
+ regex = f"(?i){regex}"
+ documents = documents.filter(original_filename__regex=regex)
+
+ return documents
+
+
def document_matches_workflow(
document: ConsumableDocument | Document,
workflow: Workflow,
from documents.double_sided import CollatePlugin
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
+from documents.matching import prefilter_documents_by_workflowtrigger
from documents.models import Correspondent
from documents.models import CustomFieldInstance
from documents.models import Document
documents = Document.objects.filter(id__in=matched_ids)
+ if documents.count() > 0:
+ documents = prefilter_documents_by_workflowtrigger(
+ documents,
+ trigger,
+ )
+
if documents.count() > 0:
logger.debug(
f"Found {documents.count()} documents for trigger {trigger}",
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource
from documents.matching import document_matches_workflow
+from documents.matching import prefilter_documents_by_workflowtrigger
from documents.models import Correspondent
from documents.models import CustomField
from documents.models import CustomFieldInstance
doc2.refresh_from_db()
self.assertIsNone(doc2.owner) # has not triggered yet
+ def test_workflow_scheduled_filters_queryset(self):
+ """
+ GIVEN:
+ - Existing workflow with scheduled trigger
+ WHEN:
+ - Workflows run and matching documents are found
+ THEN:
+ - prefilter_documents_by_workflowtrigger appropriately filters
+ """
+ trigger = WorkflowTrigger.objects.create(
+ type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
+ schedule_offset_days=-7,
+ schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
+ filter_filename="*sample*",
+ filter_has_document_type=self.dt,
+ filter_has_correspondent=self.c,
+ )
+ trigger.filter_has_tags.set([self.t1])
+ trigger.save()
+ action = WorkflowAction.objects.create(
+ assign_owner=self.user2,
+ )
+ w = Workflow.objects.create(
+ name="Workflow 1",
+ order=0,
+ )
+ w.triggers.add(trigger)
+ w.actions.add(action)
+ w.save()
+
+ # create 10 docs with half having the document type
+ for i in range(10):
+ doc = Document.objects.create(
+ title=f"sample test {i}",
+ checksum=f"checksum{i}",
+ correspondent=self.c,
+ original_filename=f"sample_{i}.pdf",
+ document_type=self.dt if i % 2 == 0 else None,
+ )
+ doc.tags.set([self.t1])
+ doc.save()
+
+ documents = Document.objects.all()
+ filtered_docs = prefilter_documents_by_workflowtrigger(
+ documents,
+ trigger,
+ )
+ self.assertEqual(filtered_docs.count(), 5)
+
def test_workflow_enabled_disabled(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,