tools for it.
```
-document_retagger [-h] [-c] [-T] [-t] [-i] [--id-range] [--use-first] [-f]
+document_retagger [-h] [-c] [-T] [-t] [-cf] [-i] [--id-range] [--use-first] [-f] [--suggest]
optional arguments:
-c, --correspondent
-T, --tags
-t, --document_type
-s, --storage_path
+-cf, --custom_fields
-i, --inbox-only
--id-range
--use-first
-f, --overwrite
+--suggest
```
Run this after changing or adding matching rules. It'll loop over all
it finds. This option does not apply to tags, since any amount of tags
can be applied to a document.
+If you want to suggest changes but not apply them, specify `--suggest`.
+
Finally, `-f` specifies that you wish to overwrite already assigned
correspondents, types and/or tags. The default behavior is to not assign
correspondents and types to documents that have this data already
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
from documents.signals.handlers import set_correspondent
+ from documents.signals.handlers import set_custom_fields
from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path
from documents.signals.handlers import set_tags
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_storage_path)
+ document_consumption_finished.connect(set_custom_fields)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_workflows_added)
document_updated.connect(run_workflows_updated)
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
from documents.signals.handlers import set_correspondent
+from documents.signals.handlers import set_custom_fields
from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path
from documents.signals.handlers import set_tags
class Command(ProgressBarMixin, BaseCommand):
help = (
"Using the current classification model, assigns correspondents, tags "
- "and document types to all documents, effectively allowing you to "
- "back-tag all previously indexed documents with metadata created (or "
- "modified) after their initial import."
+ "document types, storage paths and custom fields to all documents, effectively"
+ "allowing you to back-tag all previously indexed documents with metadata created "
+ "(or modified) after their initial import."
)
def add_arguments(self, parser):
parser.add_argument("-T", "--tags", default=False, action="store_true")
parser.add_argument("-t", "--document_type", default=False, action="store_true")
parser.add_argument("-s", "--storage_path", default=False, action="store_true")
+ parser.add_argument(
+ "-cf",
+ "--custom_fields",
+ default=False,
+ action="store_true",
+ )
parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
parser.add_argument(
"--use-first",
stdout=self.stdout,
style_func=self.style,
)
+
+ if options["custom_fields"]:
+ set_custom_fields(
+ sender=None,
+ document=document,
+ classifier=classifier,
+ replace=options["overwrite"],
+ use_first=options["use_first"],
+ suggest=options["suggest"],
+ base_url=options["base_url"],
+ stdout=self.stdout,
+ style_func=self.style,
+ )
)
+def match_custom_fields(document: Document, classifier: DocumentClassifier, user=None):
+ predicted_custom_field_ids = (
+ classifier.predict_custom_fields(document.content) if classifier else []
+ )
+
+ fields = [instance.field for instance in document.custom_fields.all()]
+
+ return list(
+ filter(
+ lambda o: matches(o, document)
+ or (
+ o.matching_algorithm == MatchingModel.MATCH_AUTO
+ and o.pk in predicted_custom_field_ids
+ ),
+ fields,
+ ),
+ )
+
+
def matches(matching_model: MatchingModel, document: Document):
search_kwargs = {}
document.save(update_fields=("storage_path",))
+def set_custom_fields(
+ document: Document,
+ logging_group=None,
+ classifier: DocumentClassifier | None = None,
+ replace=False,
+ suggest=False,
+ base_url=None,
+ stdout=None,
+ style_func=None,
+ **kwargs,
+):
+ if replace:
+ CustomFieldInstance.objects.filter(document=document).exclude(
+ Q(field__match="") & ~Q(field__matching_algorithm=CustomField.MATCH_AUTO),
+ ).delete()
+
+ current_fields = set([instance.field for instance in document.custom_fields.all()])
+
+ matched_fields = matching.match_custom_fields(document, classifier)
+
+ relevant_fields = set(matched_fields) - current_fields
+
+ if suggest:
+ extra_fields = current_fields - set(matched_fields)
+ extra_fields = [
+ f for f in extra_fields if f.matching_algorithm == MatchingModel.MATCH_AUTO
+ ]
+ if not relevant_fields and not extra_fields:
+ return
+ doc_str = style_func.SUCCESS(str(document))
+ if base_url:
+ stdout.write(doc_str)
+ stdout.write(f"{base_url}/documents/{document.pk}")
+ else:
+ stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
+ if relevant_fields:
+ stdout.write(
+ "Suggest custom fields: "
+ + ", ".join([f.name for f in relevant_fields]),
+ )
+ if extra_fields:
+ stdout.write(
+ "Extra custom fields: " + ", ".join([f.name for f in extra_fields]),
+ )
+ else:
+ if not relevant_fields:
+ return
+
+ message = 'Assigning custom fields "{}" to "{}"'
+ logger.info(
+ message.format(document, ", ".join([f.name for f in relevant_fields])),
+ extra={"group": logging_group},
+ )
+
+ for field in relevant_fields:
+ CustomFieldInstance.objects.create(
+ field=field,
+ document=document,
+ )
+
+
# see empty_trash in documents/tasks.py for signal handling
def cleanup_document_deletion(sender, instance, **kwargs):
with FileLock(settings.MEDIA_LOCK):