]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fix: Enforce classifier training ordering to prevent extra training (#8822)
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Sun, 19 Jan 2025 20:52:03 +0000 (12:52 -0800)
committerGitHub <noreply@github.com>
Sun, 19 Jan 2025 20:52:03 +0000 (20:52 +0000)
src/documents/classifier.py

index 4c36dc5e0257f720bae7ffe98c2ddbc1925ecff0..b3e71711c84e774ef72e031b092e8af2a7baa6df 100644 (file)
@@ -170,6 +170,7 @@ class DocumentClassifier:
             )
             .select_related("document_type", "correspondent", "storage_path")
             .prefetch_related("tags")
+            .order_by("pk")
         )
 
         # No documents exit to train against
@@ -199,11 +200,10 @@ class DocumentClassifier:
             hasher.update(y.to_bytes(4, "little", signed=True))
             labels_correspondent.append(y)
 
-            tags: list[int] = sorted(
-                tag.pk
-                for tag in doc.tags.filter(
-                    matching_algorithm=MatchingModel.MATCH_AUTO,
-                )
+            tags: list[int] = list(
+                doc.tags.filter(matching_algorithm=MatchingModel.MATCH_AUTO)
+                .order_by("pk")
+                .values_list("pk", flat=True),
             )
             for tag in tags:
                 hasher.update(tag.to_bytes(4, "little", signed=True))