]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Revert "Tweak: more accurate classifier last trained time (#9004)"
authorshamoon <4887959+shamoon@users.noreply.github.com>
Fri, 14 Feb 2025 01:54:08 +0000 (17:54 -0800)
committershamoon <4887959+shamoon@users.noreply.github.com>
Mon, 17 Feb 2025 16:19:11 +0000 (08:19 -0800)
This reverts commit 3314c5982859609eea1635bfdb8545b7df1a7c07.

src/documents/classifier.py
src/documents/views.py

index 5bc8be2c6c125f036bf0f97708158895fb77185f..72bf1f16c82b5c867ee6815fd22505e31fb92f54 100644 (file)
@@ -1,7 +1,6 @@
 import logging
 import pickle
 import re
-import time
 import warnings
 from collections.abc import Iterator
 from hashlib import sha256
@@ -142,19 +141,6 @@ class DocumentClassifier:
                 ):
                     raise IncompatibleClassifierVersionError("sklearn version update")
 
-    def set_last_checked(self) -> None:
-        # save a timestamp of the last time we checked for retraining to a file
-        with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("w") as f:
-            f.write(str(time.time()))
-
-    def get_last_checked(self) -> float | None:
-        # load the timestamp of the last time we checked for retraining
-        try:
-            with Path(settings.MODEL_FILE.with_suffix(".last_checked")).open("r") as f:
-                return float(f.read())
-        except FileNotFoundError:  # pragma: no cover
-            return None
-
     def save(self) -> None:
         target_file: Path = settings.MODEL_FILE
         target_file_temp: Path = target_file.with_suffix(".pickle.part")
@@ -175,7 +161,6 @@ class DocumentClassifier:
             pickle.dump(self.storage_path_classifier, f)
 
         target_file_temp.rename(target_file)
-        self.set_last_checked()
 
     def train(self) -> bool:
         # Get non-inbox documents
@@ -244,7 +229,6 @@ class DocumentClassifier:
             and self.last_doc_change_time >= latest_doc_change
         ) and self.last_auto_type_hash == hasher.digest():
             logger.info("No updates since last training")
-            self.set_last_checked()
             # Set the classifier information into the cache
             # Caching for 50 minutes, so slightly less than the normal retrain time
             cache.set(
index 8193dc621e601aa32f40376ec7e8da261a0c56bb..00d2f6f29b11016ed6b47a40d54cdb55a7db8de6 100644 (file)
@@ -15,6 +15,7 @@ from urllib.parse import quote
 from urllib.parse import urlparse
 
 import pathvalidate
+from django.apps import apps
 from django.conf import settings
 from django.contrib.auth.models import Group
 from django.contrib.auth.models import User
@@ -2680,14 +2681,18 @@ class SystemStatusView(PassUserMixin):
                     classifier_status = "WARNING"
                     raise FileNotFoundError(classifier_error)
             classifier_status = "OK"
-            classifier_last_trained = (
-                make_aware(
-                    datetime.fromtimestamp(classifier.get_last_checked()),
+            task_result_model = apps.get_model("django_celery_results", "taskresult")
+            result = (
+                task_result_model.objects.filter(
+                    task_name="documents.tasks.train_classifier",
+                    status="SUCCESS",
                 )
-                if settings.MODEL_FILE.exists()
-                and classifier.get_last_checked() is not None
-                else None
+                .order_by(
+                    "-date_done",
+                )
+                .first()
             )
+            classifier_last_trained = result.date_done if result else None
         except Exception as e:
             if classifier_status is None:
                 classifier_status = "ERROR"