tools for it.
```
-document_retagger [-h] [-c] [-T] [-t] [-i] [--use-first] [-f]
+document_retagger [-h] [-c] [-T] [-t] [-i] [--id-range] [--use-first] [-f]
optional arguments:
-c, --correspondent
-t, --document_type
-s, --storage_path
-i, --inbox-only
+--id-range
--use-first
-f, --overwrite
```
inbox tags only. This is useful when you don't want to mess with your
already processed documents.
+Specify `--id-range 1 100` to have the document retagger work only on a
+specific range of document id´s. This can be useful if you have a lot of
+documents and want to test the matching rules only on a subset of
+documents.
+
When multiple document types or correspondents match a single document,
the retagger won't assign these to the document. Specify `--use-first`
to override this behavior and just use the first correspondent or type
"--base-url",
help="The base URL to use to build the link to the documents.",
)
+ parser.add_argument(
+ "--id-range",
+ help="A range of document ids on which the retagging should be applied.",
+ nargs=2,
+ type=int,
+ )
def handle(self, *args, **options):
# Detect if we support color
queryset = Document.objects.filter(tags__is_inbox_tag=True)
else:
queryset = Document.objects.all()
+
+ if options["id_range"]:
+ queryset = queryset.filter(
+ id__range=(options["id_range"][0], options["id_range"][1]),
+ )
+
documents = queryset.distinct()
classifier = load_classifier()
from django.core.management import call_command
+from django.core.management.base import CommandError
from django.test import TestCase
from documents.models import Correspondent
self.assertEqual(d_auto.storage_path, self.sp1)
self.assertIsNone(d_second.storage_path)
self.assertEqual(d_unrelated.storage_path, self.sp2)
+
+ def test_id_range_parameter(self):
+ commandOutput = ""
+ Document.objects.create(
+ checksum="E",
+ title="E",
+ content="NOT the first document",
+ )
+ call_command("document_retagger", "--tags", "--id-range", "1", "2")
+ # The retagger shouldn`t apply the 'first' tag to our new document
+ self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)
+
+ try:
+ commandOutput = call_command("document_retagger", "--tags", "--id-range")
+ except CommandError:
+ # Just ignore the error
+ None
+ self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")
+
+ try:
+ commandOutput = call_command(
+ "document_retagger",
+ "--tags",
+ "--id-range",
+ "a",
+ "b",
+ )
+ except CommandError:
+ # Just ignore the error
+ None
+ self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")
+
+ call_command("document_retagger", "--tags", "--id-range", "1", "9999")
+ # Now we should have 2 documents
+ self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)