Use PaperlessTask for llmindex

author shamoon <4887959+shamoon@users.noreply.github.com>

Wed, 30 Apr 2025 02:40:05 +0000 (19:40 -0700)

committer shamoon <4887959+shamoon@users.noreply.github.com>

Wed, 2 Jul 2025 18:04:04 +0000 (11:04 -0700)
author shamoon <4887959+shamoon@users.noreply.github.com>
Wed, 30 Apr 2025 02:40:05 +0000 (19:40 -0700)
committer shamoon <4887959+shamoon@users.noreply.github.com>
Wed, 2 Jul 2025 18:04:04 +0000 (11:04 -0700)
diff --git a/src-ui/src/app/data/paperless-task.ts b/src-ui/src/app/data/paperless-task.ts

index 1bec277eb09d372d2d14289fb9214d538820a286..b30af7cdd4b5d3e1bdbb5cb46b0d5548dd713a02 100644 (file)
--- a/src-ui/src/app/data/paperless-task.ts
+++ b/src-ui/src/app/data/paperless-task.ts
@@ -11,6 +11,7 @@ export enum PaperlessTaskName {
    TrainClassifier = 'train_classifier',
    SanityCheck = 'check_sanity',
    IndexOptimize = 'index_optimize',
+  LLMIndexUpdate = 'llmindex_update',
  }
  
  export enum PaperlessTaskStatus {
diff --git a/src/documents/management/commands/document_llmindex.py b/src/documents/management/commands/document_llmindex.py

index 74c5c4d69e885cce0acf858b8a68f7927facdf5f..d2df02ed9ea9c49b205b8e1fb108a692602c7938 100644 (file)
--- a/src/documents/management/commands/document_llmindex.py
+++ b/src/documents/management/commands/document_llmindex.py
@@ -18,4 +18,5 @@ class Command(ProgressBarMixin, BaseCommand):
              llmindex_index(
                  progress_bar_disable=self.no_progress_bar,
                  rebuild=options["command"] == "rebuild",
+                scheduled=False,
              )
diff --git a/src/documents/migrations/1066_alter_paperlesstask_task_name.py b/src/documents/migrations/1066_alter_paperlesstask_task_name.py

new file mode 100644 (file)

index 0000000..38fa5d4
--- /dev/null
+++ b/src/documents/migrations/1066_alter_paperlesstask_task_name.py
@@ -0,0 +1,30 @@
+# Generated by Django 5.1.8 on 2025-04-30 02:38
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("documents", "1065_workflowaction_assign_custom_fields_values"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="paperlesstask",
+            name="task_name",
+            field=models.CharField(
+                choices=[
+                    ("consume_file", "Consume File"),
+                    ("train_classifier", "Train Classifier"),
+                    ("check_sanity", "Check Sanity"),
+                    ("index_optimize", "Index Optimize"),
+                    ("llmindex_update", "LLM Index Update"),
+                ],
+                help_text="Name of the task that was run",
+                max_length=255,
+                null=True,
+                verbose_name="Task Name",
+            ),
+        ),
+    ]
diff --git a/src/documents/models.py b/src/documents/models.py

index e93f140543403579c02f7962192ad374768a5484..43dd5bde478cb83b9dd87c675261746013659f5e 100644 (file)
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -543,6 +543,7 @@ class PaperlessTask(ModelWithOwner):
          TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
          CHECK_SANITY = ("check_sanity", _("Check Sanity"))
          INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
+        LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
  
      task_id = models.CharField(
          max_length=255,
diff --git a/src/documents/tasks.py b/src/documents/tasks.py

index 155b4abc23fed014deeceb6458c9bab378eaca38..73dd11a79ae6ad994ac00c51a10be2c5606a2d0a 100644 (file)
--- a/src/documents/tasks.py
+++ b/src/documents/tasks.py
@@ -535,13 +535,29 @@ def check_scheduled_workflows():
  
  
  @shared_task
-def llmindex_index(*, progress_bar_disable=False, rebuild=False):
+def llmindex_index(*, progress_bar_disable=True, rebuild=False, scheduled=True):
      ai_config = AIConfig()
      if ai_config.llm_index_enabled():
-        update_llm_index(
+        task = PaperlessTask.objects.create(
+            type=PaperlessTask.TaskType.SCHEDULED_TASK
+            if scheduled
+            else PaperlessTask.TaskType.MANUAL_TASK,
+            task_id=uuid.uuid4(),
+            task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+            status=states.STARTED,
+            date_created=timezone.now(),
+            date_started=timezone.now(),
+        )
+        from paperless_ai.indexing import update_llm_index
+
+        result = update_llm_index(
              progress_bar_disable=progress_bar_disable,
              rebuild=rebuild,
          )
+        task.status = states.SUCCESS
+        task.result = result
+        task.date_done = timezone.now()
+        task.save(update_fields=["status", "result", "date_done"])
  
  
  @shared_task
@@ -552,11 +568,3 @@ def update_document_in_llm_index(document):
  @shared_task
  def remove_document_from_llm_index(document):
      llm_index_remove_document(document)
-
-
-# TODO: schedule to run periodically
-@shared_task
-def rebuild_llm_index_task():
-    from paperless_ai.indexing import update_llm_index
-
-    update_llm_index(rebuild=True)
diff --git a/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py b/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py

index da5180bf2795653c27dc437e13ac934512a5f317..28350e3b1cf4ae36d6a6fbd161387576e4bf8f2c 100644 (file)
--- a/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py
+++ b/src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.1.7 on 2025-04-24 02:09
+# Generated by Django 5.1.8 on 2025-04-30 02:38
  
  from django.db import migrations
  from django.db import models
@@ -21,44 +21,44 @@ class Migration(migrations.Migration):
          ),
          migrations.AddField(
              model_name="applicationconfiguration",
-            name="llm_embedding_backend",
+            name="llm_api_key",
              field=models.CharField(
                  blank=True,
-                choices=[("openai", "OpenAI"), ("local", "Local")],
-                max_length=32,
+                max_length=128,
                  null=True,
-                verbose_name="Sets the LLM Embedding backend",
+                verbose_name="Sets the LLM API key",
              ),
          ),
          migrations.AddField(
              model_name="applicationconfiguration",
-            name="llm_embedding_model",
+            name="llm_backend",
              field=models.CharField(
                  blank=True,
+                choices=[("openai", "OpenAI"), ("ollama", "Ollama")],
                  max_length=32,
                  null=True,
-                verbose_name="Sets the LLM Embedding model",
+                verbose_name="Sets the LLM backend",
              ),
          ),
          migrations.AddField(
              model_name="applicationconfiguration",
-            name="llm_api_key",
+            name="llm_embedding_backend",
              field=models.CharField(
                  blank=True,
-                max_length=128,
+                choices=[("openai", "OpenAI"), ("huggingface", "Huggingface")],
+                max_length=32,
                  null=True,
-                verbose_name="Sets the LLM API key",
+                verbose_name="Sets the LLM embedding backend",
              ),
          ),
          migrations.AddField(
              model_name="applicationconfiguration",
-            name="llm_backend",
+            name="llm_embedding_model",
              field=models.CharField(
                  blank=True,
-                choices=[("openai", "OpenAI"), ("ollama", "Ollama")],
                  max_length=32,
                  null=True,
-                verbose_name="Sets the LLM backend",
+                verbose_name="Sets the LLM embedding model",
              ),
          ),
          migrations.AddField(
diff --git a/src/paperless/settings.py b/src/paperless/settings.py

index f060fa89e6067563c333c496e0a34ebbe151ebde..159793cdb69b10ba8174c1139eb8a9386b23ecbd 100644 (file)
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -243,9 +243,6 @@ def _parse_beat_schedule() -> dict:
              "options": {
                  # 1 hour before default schedule sends again
                  "expires": 23.0 * 60.0 * 60.0,
-                "kwargs": {
-                    "progress_bar_disable": True,
-                },
              },
          },
      ]
diff --git a/src/paperless/tests/test_settings.py b/src/paperless/tests/test_settings.py

index c0b75958269444efba45d7b4dc07b25868d51708..0727ceaeddbc5113e097c85852fbffa1ffa0ca42 100644 (file)
--- a/src/paperless/tests/test_settings.py
+++ b/src/paperless/tests/test_settings.py
@@ -211,9 +211,6 @@ class TestCeleryScheduleParsing(TestCase):
                      "schedule": crontab(minute=10, hour=2),
                      "options": {
                          "expires": self.LLM_INDEX_EXPIRE_TIME,
-                        "kwargs": {
-                            "progress_bar_disable": True,
-                        },
                      },
                  },
              },
@@ -273,9 +270,6 @@ class TestCeleryScheduleParsing(TestCase):
                      "schedule": crontab(minute=10, hour=2),
                      "options": {
                          "expires": self.LLM_INDEX_EXPIRE_TIME,
-                        "kwargs": {
-                            "progress_bar_disable": True,
-                        },
                      },
                  },
              },
@@ -327,9 +321,6 @@ class TestCeleryScheduleParsing(TestCase):
                      "schedule": crontab(minute=10, hour=2),
                      "options": {
                          "expires": self.LLM_INDEX_EXPIRE_TIME,
-                        "kwargs": {
-                            "progress_bar_disable": True,
-                        },
                      },
                  },
              },
diff --git a/src/paperless_ai/indexing.py b/src/paperless_ai/indexing.py

index afc0abb460b39d9056c6d65889d6f84511fb0c80..548b6ba51b78babea9f3cdbdb645373e7d22b777 100644 (file)
--- a/src/paperless_ai/indexing.py
+++ b/src/paperless_ai/indexing.py
@@ -115,7 +115,7 @@ def remove_document_docstore_nodes(document: Document, index: VectorStoreIndex):
          index.docstore.delete_document(node_id)
  
  
-def update_llm_index(*, progress_bar_disable=False, rebuild=False):
+def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
      """
      Rebuild or update the LLM index.
      """
@@ -123,8 +123,9 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
  
      documents = Document.objects.all()
      if not documents.exists():
-        logger.warning("No documents found to index.")
-        return
+        msg = "No documents found to index."
+        logger.warning(msg)
+        return msg
  
      if (
          rebuild
@@ -145,6 +146,7 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
              embed_model=embed_model,
              show_progress=not progress_bar_disable,
          )
+        msg = "LLM index rebuilt successfully."
      else:
          # Update existing index
          index = load_or_build_index()
@@ -173,15 +175,18 @@ def update_llm_index(*, progress_bar_disable=False, rebuild=False):
                  nodes.extend(build_document_node(document))
  
          if nodes:
+            msg = "LLM index updated successfully."
              logger.info(
                  "Updating %d nodes in LLM index.",
                  len(nodes),
              )
              index.insert_nodes(nodes)
          else:
-            logger.info("No changes detected, skipping llm index rebuild.")
+            msg = "No changes detected in LLM index."
+            logger.info(msg)
  
      index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
+    return msg
  
  
  def llm_index_add_or_update_document(document: Document):
author	shamoon <4887959+shamoon@users.noreply.github.com>
	Wed, 30 Apr 2025 02:40:05 +0000 (19:40 -0700)
committer	shamoon <4887959+shamoon@users.noreply.github.com>
	Wed, 2 Jul 2025 18:04:04 +0000 (11:04 -0700)
src-ui/src/app/data/paperless-task.ts		patch \| blob \| blame \| history
src/documents/management/commands/document_llmindex.py		patch \| blob \| blame \| history
src/documents/migrations/1066_alter_paperlesstask_task_name.py	[new file with mode: 0644]	patch \| blob
src/documents/models.py		patch \| blob \| blame \| history
src/documents/tasks.py		patch \| blob \| blame \| history
src/paperless/migrations/0004_applicationconfiguration_ai_enabled_and_more.py		patch \| blob \| blame \| history
src/paperless/settings.py		patch \| blob \| blame \| history
src/paperless/tests/test_settings.py		patch \| blob \| blame \| history
src/paperless_ai/indexing.py		patch \| blob \| blame \| history