]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Adds configuration for database timeout, documentation and troubleshotting suggestion
authorTrenton Holmes <holmes.trenton@gmail.com>
Thu, 21 Jul 2022 15:02:11 +0000 (08:02 -0700)
committerTrenton Holmes <holmes.trenton@gmail.com>
Fri, 22 Jul 2022 15:40:08 +0000 (08:40 -0700)
docs/configuration.rst
docs/troubleshooting.rst
src/documents/signals/handlers.py
src/paperless/settings.py

index c4203472c77970f1251c17dcc8db9c96653f6009..fd68f61cbf91cdb8b70c4a909286d350ded3bfe2 100644 (file)
@@ -31,7 +31,7 @@ PAPERLESS_REDIS=<url>
 
 PAPERLESS_DBHOST=<hostname>
     By default, sqlite is used as the database backend. This can be changed here.
-    Set PAPERLESS_DBHOST and PostgreSQL will be used instead of mysql.
+    Set PAPERLESS_DBHOST and PostgreSQL will be used instead of sqlite.
 
 PAPERLESS_DBPORT=<port>
     Adjust port if necessary.
@@ -60,6 +60,13 @@ PAPERLESS_DBSSLMODE=<mode>
 
     Default is ``prefer``.
 
+PAPERLESS_DB_TIMEOUT=<float>
+    Amount of time for a database connection to wait for the database to unlock.
+    Mostly applicable for an sqlite based installation, consider changing to postgresql
+    if you need to increase this.
+
+    Defaults to unset, keeping the Django defaults.
+
 Paths and folders
 #################
 
index 3db9a069e15032a3b64d2b69d2a868f0ac0e5328..1605fed115ddf395af9d8e46cdb5a4e46b628fee 100644 (file)
@@ -301,3 +301,19 @@ try adjusting the :ref:`polling configuration <configuration-polling>`.
 
     The user will need to manually move the file out of the consume folder and
     back in, for the initial failing file to be consumed.
+
+Log reports "Creating PaperlessTask failed".
+#########################################################
+
+You might find messages like these in your log files:
+
+.. code::
+
+    [WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still
+
+You are likely using an sqlite based installation, with an increased number of workers and are running into sqlite's concurrency limitations.
+Uploading or consuming multiple files at once results in many workers attempting to access the database simultaneously.
+
+Consider changing to the PostgreSQL database if you will be processing many documents at once often.  Otherwise,
+try tweaking the ``PAPERLESS_DB_TIMEOUT`` setting to allow more time for the database to unlock.  This may have
+minor performance implications.
index f7a04ad51d2deff10aa7fd11a54b656bd621df27..e2f4fb4f7044c2d8309a826a0afcb103ee073b4a 100644 (file)
@@ -11,6 +11,7 @@ from django.contrib.contenttypes.models import ContentType
 from django.db import DatabaseError
 from django.db import models
 from django.db.models import Q
+from django.db.utils import OperationalError
 from django.dispatch import receiver
 from django.utils import termcolors
 from django.utils import timezone
@@ -506,21 +507,28 @@ def add_to_index(sender, document, **kwargs):
 @receiver(django_q.signals.pre_enqueue)
 def init_paperless_task(sender, task, **kwargs):
     if task["func"] == "documents.tasks.consume_file":
-        paperless_task, created = PaperlessTask.objects.get_or_create(
-            task_id=task["id"],
-        )
-        paperless_task.name = task["name"]
-        paperless_task.created = task["started"]
-        paperless_task.save()
+        try:
+            paperless_task, created = PaperlessTask.objects.get_or_create(
+                task_id=task["id"],
+            )
+            paperless_task.name = task["name"]
+            paperless_task.created = task["started"]
+            paperless_task.save()
+        except OperationalError as e:
+            logger.error(f"Creating PaperlessTask failed: {e}")
 
 
 @receiver(django_q.signals.pre_execute)
 def paperless_task_started(sender, task, **kwargs):
     try:
         if task["func"] == "documents.tasks.consume_file":
-            paperless_task = PaperlessTask.objects.get(task_id=task["id"])
+            paperless_task, created = PaperlessTask.objects.get_or_create(
+                task_id=task["id"],
+            )
             paperless_task.started = timezone.now()
             paperless_task.save()
+    except OperationalError as e:
+        logger.error(f"Creating PaperlessTask failed: {e}")
     except PaperlessTask.DoesNotExist:
         pass
 
@@ -529,8 +537,12 @@ def paperless_task_started(sender, task, **kwargs):
 def update_paperless_task(sender, instance, **kwargs):
     try:
         if instance.func == "documents.tasks.consume_file":
-            paperless_task = PaperlessTask.objects.get(task_id=instance.id)
+            paperless_task, created = PaperlessTask.objects.get_or_create(
+                task_id=instance.id,
+            )
             paperless_task.attempted_task = instance
             paperless_task.save()
+    except OperationalError as e:
+        logger.error(f"Creating PaperlessTask failed: {e}")
     except PaperlessTask.DoesNotExist:
         pass
index bfb9507ba04683e6e05814aca32aefb37f99713e..e7fd0f3f0c17e00cfd61d415eddd6168c786a921 100644 (file)
@@ -327,6 +327,13 @@ if os.getenv("PAPERLESS_DBHOST"):
     if os.getenv("PAPERLESS_DBPORT"):
         DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
 
+if os.getenv("PAPERLESS_DB_TIMEOUT") is not None:
+    _new_opts = {"timeout": float(os.getenv("PAPERLESS_DB_TIMEOUT"))}
+    if "OPTIONS" in DATABASES["default"]:
+        DATABASES["default"]["OPTIONS"].update(_new_opts)
+    else:
+        DATABASES["default"]["OPTIONS"] = _new_opts
+
 DEFAULT_AUTO_FIELD = "django.db.models.AutoField"
 
 ###############################################################################