]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Use PaperlessTask for llmindex
authorshamoon <4887959+shamoon@users.noreply.github.com>
Wed, 30 Apr 2025 02:40:05 +0000 (19:40 -0700)
committershamoon <4887959+shamoon@users.noreply.github.com>
Wed, 2 Jul 2025 18:04:04 +0000 (11:04 -0700)
src-ui/src/app/data/paperless-task.ts
src/documents/management/commands/document_llmindex.py
src/documents/migrations/1066_alter_paperlesstask_task_name.py [new file with mode: 0644]
src/documents/models.py
src/documents/tasks.py
src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py
src/paperless/settings.py
src/paperless/tests/test_settings.py
src/paperless_ai/indexing.py

index 1bec277eb09d372d2d14289fb9214d538820a286..b30af7cdd4b5d3e1bdbb5cb46b0d5548dd713a02 100644 (file)
@@ -11,6 +11,7 @@ export enum PaperlessTaskName {
   TrainClassifier = 'train_classifier',
   SanityCheck = 'check_sanity',
   IndexOptimize = 'index_optimize',
+  LLMIndexUpdate = 'llmindex_update',
 }
 
 export enum PaperlessTaskStatus {
index 74c5c4d69e885cce0acf858b8a68f7927facdf5f..d2df02ed9ea9c49b205b8e1fb108a692602c7938 100644 (file)
@@ -18,4 +18,5 @@ class Command(ProgressBarMixin, BaseCommand):
             llmindex_index(
                 progress_bar_disable=self.no_progress_bar,
                 rebuild=options["command"] == "rebuild",
+                scheduled=False,
             )
diff --git a/src/documents/migrations/1066_alter_paperlesstask_task_name.py b/src/documents/migrations/1066_alter_paperlesstask_task_name.py
new file mode 100644 (file)
index 0000000..38fa5d4
--- /dev/null
@@ -0,0 +1,30 @@
+# Generated by Django 5.1.8 on 2025-04-30 02:38
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("documents", "1065_workflowaction_assign_custom_fields_values"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="paperlesstask",
+            name="task_name",
+            field=models.CharField(
+                choices=[
+                    ("consume_file", "Consume File"),
+                    ("train_classifier", "Train Classifier"),
+                    ("check_sanity", "Check Sanity"),
+                    ("index_optimize", "Index Optimize"),
+                    ("llmindex_update", "LLM Index Update"),
+                ],
+                help_text="Name of the task that was run",
+                max_length=255,
+                null=True,
+                verbose_name="Task Name",
+            ),
+        ),
+    ]
index e93f140543403579c02f7962192ad374768a5484..43dd5bde478cb83b9dd87c675261746013659f5e 100644 (file)
@@ -543,6 +543,7 @@ class PaperlessTask(ModelWithOwner):
         TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
         CHECK_SANITY = ("check_sanity", _("Check Sanity"))
         INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
+        LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
 
     task_id = models.CharField(
         max_length=255,
index 155b4abc23fed014deeceb6458c9bab378eaca38..73dd11a79ae6ad994ac00c51a10be2c5606a2d0a 100644 (file)
@@ -535,13 +535,29 @@ def check_scheduled_workflows():
 
 
 @shared_task
-def llmindex_index(*, progress_bar_disable=False, rebuild=False):
+def llmindex_index(*, progress_bar_disable=True, rebuild=False, scheduled=True):
     ai_config = AIConfig()
     if ai_config.llm_index_enabled():
-        update_llm_index(
+        task = PaperlessTask.objects.create(
+            type=PaperlessTask.TaskType.SCHEDULED_TASK
+            if scheduled
+            else PaperlessTask.TaskType.MANUAL_TASK,
+            task_id=uuid.uuid4(),
+            task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+            status=states.STARTED,
+            date_created=timezone.now(),
+            date_started=timezone.now(),
+        )
+        from paperless_ai.indexing import update_llm_index
+
+        result = update_llm_index(
             progress_bar_disable=progress_bar_disable,
             rebuild=rebuild,
         )
+        task.status = states.SUCCESS
+        task.result = result
+        task.date_done = timezone.now()
+        task.save(update_fields=["status", "result", "date_done"])
 
 
 @shared_task
@@ -552,11 +568,3 @@ def update_document_in_llm_index(document):
 @shared_task
 def remove_document_from_llm_index(document):
     llm_index_remove_document(document)
-
-
-# TODO: schedule to run periodically
-@shared_task
-def rebuild_llm_index_task():
-    from paperless_ai.indexing import update_llm_index
-
-    update_llm_index(rebuild=True)
index da5180bf2795653c27dc437e13ac934512a5f317..28350e3b1cf4ae36d6a6fbd161387576e4bf8f2c 100644 (file)
@@ -1,4 +1,4 @@
-# Generated by Django 5.1.7 on 2025-04-24 02:09
+# Generated by Django 5.1.8 on 2025-04-30 02:38
 
 from django.db import migrations
 from django.db import models
@@ -21,44 +21,44 @@ class Migration(migrations.Migration):
         ),
         migrations.AddField(
             model_name="applicationconfiguration",
-            name="llm_embedding_backend",
+            name="llm_api_key",
             field=models.CharField(
                 blank=True,
-                choices=[("openai", "OpenAI"), ("local", "Local")],
-                max_length=32,
+                max_length=128,
                 null=True,
-                verbose_name="Sets the LLM Embedding backend",
+                verbose_name="Sets the LLM API key",
             ),
         ),
         migrations.AddField(
             model_name="applicationconfiguration",
-            name="llm_embedding_model",
+            name="llm_backend",
             field=models.CharField(
                 blank=True,
+                choices=[("openai", "OpenAI"), ("ollama", "Ollama")],
                 max_length=32,
                 null=True,
-                verbose_name="Sets the LLM Embedding model",
+                verbose_name="Sets the LLM backend",
             ),
         ),
         migrations.AddField(
             model_name="applicationconfiguration",
-            name="llm_api_key",
+            name="llm_embedding_backend",
             field=models.CharField(
                 blank=True,
-                max_length=128,
+                choices=[("openai", "OpenAI"), ("huggingface", "Huggingface")],
+                max_length=32,
                 null=True,
-                verbose_name="Sets the LLM API key",
+                verbose_name="Sets the LLM embedding backend",
             ),
         ),
         migrations.AddField(
             model_name="applicationconfiguration",
-            name="llm_backend",
+            name="llm_embedding_model",
             field=models.CharField(
                 blank=True,
-                choices=[("openai", "OpenAI"), ("ollama", "Ollama")],
                 max_length=32,
                 null=True,
-                verbose_name="Sets the LLM backend",
+                verbose_name="Sets the LLM embedding model",
             ),
         ),
         migrations.AddField(
index f060fa89e6067563c333c496e0a34ebbe151ebde..159793cdb69b10ba8174c1139eb8a9386b23ecbd 100644 (file)
@@ -243,9 +243,6 @@ def _parse_beat_schedule() -> dict:
             "options": {
                 # 1 hour before default schedule sends again
                 "expires": 23.0 * 60.0 * 60.0,
-                "kwargs": {
-                    "progress_bar_disable": True,
-                },
             },
         },
     ]
index c0b75958269444efba45d7b4dc07b25868d51708..0727ceaeddbc5113e097c85852fbffa1ffa0ca42 100644 (file)
@@ -211,9 +211,6 @@ class TestCeleryScheduleParsing(TestCase):
                     "schedule": crontab(minute=10, hour=2),
                     "options": {
                         "expires": self.LLM_INDEX_EXPIRE_TIME,
-                        "kwargs": {
-                            "progress_bar_disable": True,
-                        },
                     },
                 },
             },
@@ -273,9 +270,6 @@ class TestCeleryScheduleParsing(TestCase):
                     "schedule": crontab(minute=10, hour=2),
                     "options": {
                         "expires": self.LLM_INDEX_EXPIRE_TIME,
-                        "kwargs": {
-                            "progress_bar_disable": True,
-                        },
                     },
                 },
             },
@@ -327,9 +321,6 @@ class TestCeleryScheduleParsing(TestCase):
                     "schedule": crontab(minute=10, hour=2),
                     "options": {
                         "expires": self.LLM_INDEX_EXPIRE_TIME,
-                        "kwargs": {
-                            "progress_bar_disable": True,
-                        },
                     },
                 },
             },
index afc0abb460b39d9056c6d65889d6f84511fb0c80..548b6ba51b78babea9f3cdbdb645373e7d22b777 100644 (file)
@@ -115,7 +115,7 @@ def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
         index.docstore.delete_document(node_id)
 
 
-def update_llm_index(*, progress_bar_disable=False, rebuild=False):
+def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
     """
     Rebuild or update the LLM index.
     """
@@ -123,8 +123,9 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
 
     documents = Document.objects.all()
     if not documents.exists():
-        logger.warning("No documents found to index.")
-        return
+        msg = "No documents found to index."
+        logger.warning(msg)
+        return msg
 
     if (
         rebuild
@@ -145,6 +146,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
             embed_model=embed_model,
             show_progress=not progress_bar_disable,
         )
+        msg = "LLM index rebuilt successfully."
     else:
         # Update existing index
         index = load_or_build_index()
@@ -173,15 +175,18 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
                 nodes.extend(build_document_node(document))
 
         if nodes:
+            msg = "LLM index updated successfully."
             logger.info(
                 "Updating %d nodes in LLM index.",
                 len(nodes),
             )
             index.insert_nodes(nodes)
         else:
-            logger.info("No changes detected, skipping llm index rebuild.")
+            msg = "No changes detected in LLM index."
+            logger.info(msg)
 
     index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
+    return msg
 
 
 def llm_index_add_or_update_document(document: Document):