]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Allows scheduling tasks via cron
authorTrenton Holmes <797416+stumpylog@users.noreply.github.com>
Sat, 31 Dec 2022 21:16:07 +0000 (13:16 -0800)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Wed, 11 Jan 2023 15:21:26 +0000 (07:21 -0800)
src/paperless/settings.py

index cc1b9e09688c9c8c8b288ea1756bd8dbe85291db..55c95018d2dbdc516e331fcf6e9c2c256fb9bad7 100644 (file)
@@ -5,6 +5,7 @@ import multiprocessing
 import os
 import re
 import tempfile
+from typing import Dict
 from typing import Final
 from typing import Optional
 from typing import Set
@@ -107,6 +108,51 @@ def _parse_redis_url(env_redis: Optional[str]) -> Tuple[str]:
     return (env_redis, env_redis)
 
 
+def _parse_beat_schedule() -> Dict:
+    schedule = {}
+    tasks = [
+        {
+            "name": "Check all e-mail accounts",
+            "env_key": "PAPERLESS_EMAIL_TASK_CRON",
+            # Default every ten minutes
+            "env_default": "*/10 * * * *",
+            "task": "paperless_mail.tasks.process_mail_accounts",
+        },
+        {
+            "name": "Train the classifier",
+            "env_key": "PAPERLESS_TRAIN_TASK_CRON",
+            # Default hourly at 5 minutes past the hour
+            "env_default": "5 */1 * * *",
+            "task": "documents.tasks.train_classifier",
+        },
+        {
+            "name": "Optimize the index",
+            "env_key": "PAPERLESS_INDEX_TASK_CRON",
+            # Default daily at midnight
+            "env_default": "0 0 * * *",
+            "task": "documents.tasks.index_optimize",
+        },
+        {
+            "name": "Perform sanity check",
+            "env_key": "PAPERLESS_SANITY_TASK_CRON",
+            # Default Sunday at 00:30
+            "env_default": "30 0 * * sun",
+            "task": "documents.tasks.sanity_check",
+        },
+    ]
+    for task in tasks:
+        value = os.getenv(task["env_key"], task["env_default"])
+        if value == "disable":
+            continue
+        minute, hour, day_month, month, day_week = value.split(" ")
+        schedule[task["name"]] = {
+            "task": task["task"],
+            "schedule": crontab(minute, hour, day_week, day_month, month),
+        }
+
+    return schedule
+
+
 # NEVER RUN WITH DEBUG IN PRODUCTION.
 DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
 
@@ -530,29 +576,10 @@ CELERY_RESULT_EXTENDED = True
 CELERY_RESULT_BACKEND = "django-db"
 CELERY_CACHE_BACKEND = "default"
 
+# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
+CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
 
-CELERY_BEAT_SCHEDULE = {
-    # Every ten minutes
-    "Check all e-mail accounts": {
-        "task": "paperless_mail.tasks.process_mail_accounts",
-        "schedule": crontab(minute="*/10"),
-    },
-    # Hourly at 5 minutes past the hour
-    "Train the classifier": {
-        "task": "documents.tasks.train_classifier",
-        "schedule": crontab(minute="5", hour="*/1"),
-    },
-    # Daily at midnight
-    "Optimize the index": {
-        "task": "documents.tasks.index_optimize",
-        "schedule": crontab(minute=0, hour=0),
-    },
-    # Weekly, Sunday at 00:30
-    "Perform sanity check": {
-        "task": "documents.tasks.sanity_check",
-        "schedule": crontab(minute=30, hour=0, day_of_week="sun"),
-    },
-}
+# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
 CELERY_BEAT_SCHEDULE_FILENAME = os.path.join(DATA_DIR, "celerybeat-schedule.db")
 
 # django setting.