TrainClassifier = 'train_classifier',
SanityCheck = 'check_sanity',
IndexOptimize = 'index_optimize',
+ LLMIndexUpdate = 'llmindex_update',
}
export enum PaperlessTaskStatus {
llmindex_index(
progress_bar_disable=self.no_progress_bar,
rebuild=options["command"] == "rebuild",
+ scheduled=False,
)
--- /dev/null
+# Generated by Django 5.1.8 on 2025-04-30 02:38
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("documents", "1065_workflowaction_assign_custom_fields_values"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="paperlesstask",
+ name="task_name",
+ field=models.CharField(
+ choices=[
+ ("consume_file", "Consume File"),
+ ("train_classifier", "Train Classifier"),
+ ("check_sanity", "Check Sanity"),
+ ("index_optimize", "Index Optimize"),
+ ("llmindex_update", "LLM Index Update"),
+ ],
+ help_text="Name of the task that was run",
+ max_length=255,
+ null=True,
+ verbose_name="Task Name",
+ ),
+ ),
+ ]
TRAIN_CLASSIFIER = ("train_classifier", _("Train Classifier"))
CHECK_SANITY = ("check_sanity", _("Check Sanity"))
INDEX_OPTIMIZE = ("index_optimize", _("Index Optimize"))
+ LLMINDEX_UPDATE = ("llmindex_update", _("LLM Index Update"))
task_id = models.CharField(
max_length=255,
@shared_task
-def llmindex_index(*, progress_bar_disable=False, rebuild=False):
+def llmindex_index(*, progress_bar_disable=True, rebuild=False, scheduled=True):
ai_config = AIConfig()
if ai_config.llm_index_enabled():
- update_llm_index(
+ task = PaperlessTask.objects.create(
+ type=PaperlessTask.TaskType.SCHEDULED_TASK
+ if scheduled
+ else PaperlessTask.TaskType.MANUAL_TASK,
+ task_id=uuid.uuid4(),
+ task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
+ status=states.STARTED,
+ date_created=timezone.now(),
+ date_started=timezone.now(),
+ )
+ from paperless_ai.indexing import update_llm_index
+
+ result = update_llm_index(
progress_bar_disable=progress_bar_disable,
rebuild=rebuild,
)
+ task.status = states.SUCCESS
+ task.result = result
+ task.date_done = timezone.now()
+ task.save(update_fields=["status", "result", "date_done"])
@shared_task
@shared_task
def remove_document_from_llm_index(document):
llm_index_remove_document(document)
-
-
-# TODO: schedule to run periodically
-@shared_task
-def rebuild_llm_index_task():
- from paperless_ai.indexing import update_llm_index
-
- update_llm_index(rebuild=True)
-# Generated by Django 5.1.7 on 2025-04-24 02:09
+# Generated by Django 5.1.8 on 2025-04-30 02:38
from django.db import migrations
from django.db import models
),
migrations.AddField(
model_name="applicationconfiguration",
- name="llm_embedding_backend",
+ name="llm_api_key",
field=models.CharField(
blank=True,
- choices=[("openai", "OpenAI"), ("local", "Local")],
- max_length=32,
+ max_length=128,
null=True,
- verbose_name="Sets the LLM Embedding backend",
+ verbose_name="Sets the LLM API key",
),
),
migrations.AddField(
model_name="applicationconfiguration",
- name="llm_embedding_model",
+ name="llm_backend",
field=models.CharField(
blank=True,
+ choices=[("openai", "OpenAI"), ("ollama", "Ollama")],
max_length=32,
null=True,
- verbose_name="Sets the LLM Embedding model",
+ verbose_name="Sets the LLM backend",
),
),
migrations.AddField(
model_name="applicationconfiguration",
- name="llm_api_key",
+ name="llm_embedding_backend",
field=models.CharField(
blank=True,
- max_length=128,
+ choices=[("openai", "OpenAI"), ("huggingface", "Huggingface")],
+ max_length=32,
null=True,
- verbose_name="Sets the LLM API key",
+ verbose_name="Sets the LLM embedding backend",
),
),
migrations.AddField(
model_name="applicationconfiguration",
- name="llm_backend",
+ name="llm_embedding_model",
field=models.CharField(
blank=True,
- choices=[("openai", "OpenAI"), ("ollama", "Ollama")],
max_length=32,
null=True,
- verbose_name="Sets the LLM backend",
+ verbose_name="Sets the LLM embedding model",
),
),
migrations.AddField(
"options": {
# 1 hour before default schedule sends again
"expires": 23.0 * 60.0 * 60.0,
- "kwargs": {
- "progress_bar_disable": True,
- },
},
},
]
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
- "kwargs": {
- "progress_bar_disable": True,
- },
},
},
},
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
- "kwargs": {
- "progress_bar_disable": True,
- },
},
},
},
"schedule": crontab(minute=10, hour=2),
"options": {
"expires": self.LLM_INDEX_EXPIRE_TIME,
- "kwargs": {
- "progress_bar_disable": True,
- },
},
},
},
index.docstore.delete_document(node_id)
-def update_llm_index(*, progress_bar_disable=False, rebuild=False):
+def update_llm_index(*, progress_bar_disable=False, rebuild=False) -> str:
"""
Rebuild or update the LLM index.
"""
documents = Document.objects.all()
if not documents.exists():
- logger.warning("No documents found to index.")
- return
+ msg = "No documents found to index."
+ logger.warning(msg)
+ return msg
if (
rebuild
embed_model=embed_model,
show_progress=not progress_bar_disable,
)
+ msg = "LLM index rebuilt successfully."
else:
# Update existing index
index = load_or_build_index()
nodes.extend(build_document_node(document))
if nodes:
+ msg = "LLM index updated successfully."
logger.info(
"Updating %d nodes in LLM index.",
len(nodes),
)
index.insert_nodes(nodes)
else:
- logger.info("No changes detected, skipping llm index rebuild.")
+ msg = "No changes detected in LLM index."
+ logger.info(msg)
index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
+ return msg
def llm_index_add_or_update_document(document: Document):