]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Fix: handle uuid fields created under mariadb and Django 4 (#8034)
authorshamoon <4887959+shamoon@users.noreply.github.com>
Mon, 28 Oct 2024 13:54:16 +0000 (06:54 -0700)
committerGitHub <noreply@github.com>
Mon, 28 Oct 2024 13:54:16 +0000 (13:54 +0000)
docker/install_management_commands.sh
docs/troubleshooting.md
src/documents/bulk_edit.py
src/documents/management/commands/convert_mariadb_uuid.py [new file with mode: 0644]
src/documents/tests/test_api_documents.py
src/documents/tests/test_bulk_edit.py
src/documents/tests/test_management.py
src/documents/views.py

index 38604af9d20f51d855e0044e5aeda95318b6266f..fa59d59a9d97c7d9f8d0a9a08d68bc077e2e2858 100755 (executable)
@@ -14,7 +14,8 @@ for command in decrypt_documents \
        document_thumbnails \
        document_sanity_checker \
        document_fuzzy_match \
-       manage_superuser;
+       manage_superuser \
+       convert_mariadb_uuid;
 do
        echo "installing $command..."
        sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
index 6418b82bacf221437a6c30eaa7acd6bf01cf5daa..b18ded26d5ba71da54f4434795b3a160382fdcdd 100644 (file)
@@ -353,6 +353,20 @@ ways from the original. As the logs indicate, if you encounter this error you ca
 `PAPERLESS_OCR_USER_ARGS: '{"continue_on_soft_render_error": true}'` to try to 'force'
 processing documents with this issue.
 
+## Logs show "possible incompatible database column" when deleting documents {#convert-uuid-field}
+
+You may see errors when deleting documents like:
+
+```
+Data too long for column 'transaction_id' at row 1
+```
+
+This error can occur in installations which have upgraded from a version of Paperless-ngx that used Django 4 (Paperless-ngx versions prior to v2.13.0) with a MariaDB/MySQL database. Due to the backawards-incompatible change in Django 5, the column "documents_document.transaction_id" will need to be re-created, which can be done with a one-time run of the following management command:
+
+```shell-session
+$ python3 manage.py convert_mariadb_uuid
+```
+
 ## Platform-Specific Deployment Troubleshooting
 
 A user-maintained wiki page is available to help troubleshoot issues that may arise when trying to deploy Paperless-ngx on specific platforms, for example SELinux. Please see [the wiki](https://github.com/paperless-ngx/paperless-ngx/wiki/Platform%E2%80%90Specific-Troubleshooting).
index 1aba8f9ec3b2efac1ee7660bc7fc37bb10e9d747..2e3e5f591e6b3ebde754f5b535edd10523ec7469 100644 (file)
@@ -159,13 +159,20 @@ def modify_custom_fields(doc_ids: list[int], add_custom_fields, remove_custom_fi
 
 @shared_task
 def delete(doc_ids: list[int]):
-    Document.objects.filter(id__in=doc_ids).delete()
+    try:
+        Document.objects.filter(id__in=doc_ids).delete()
 
-    from documents import index
+        from documents import index
 
-    with index.open_index_writer() as writer:
-        for id in doc_ids:
-            index.remove_document_by_id(writer, id)
+        with index.open_index_writer() as writer:
+            for id in doc_ids:
+                index.remove_document_by_id(writer, id)
+    except Exception as e:
+        if "Data too long for column" in str(e):
+            logger.warning(
+                "Detected a possible incompatible database column. See https://docs.paperless-ngx.com/troubleshooting/#convert-uuid-field",
+            )
+        logger.error(f"Error deleting documents: {e!s}")
 
     return "OK"
 
diff --git a/src/documents/management/commands/convert_mariadb_uuid.py b/src/documents/management/commands/convert_mariadb_uuid.py
new file mode 100644 (file)
index 0000000..4000e67
--- /dev/null
@@ -0,0 +1,36 @@
+from django.core.management.base import BaseCommand
+from django.db import connection
+from django.db import models
+
+from documents.models import Document
+
+
+class Command(BaseCommand):
+    # This code is taken almost entirely from https://github.com/wagtail/wagtail/pull/11912 with all credit to the original author.
+    help = "Converts UUID columns from char type to the native UUID type used in MariaDB 10.7+ and Django 5.0+."
+
+    def convert_field(self, model, field_name, null=False):
+        if model._meta.get_field(field_name).model != model:  # pragma: no cover
+            # Field is inherited from a parent model
+            return
+
+        if not model._meta.managed:  # pragma: no cover
+            # The migration framework skips unmanaged models, so we should too
+            return
+
+        old_field = models.CharField(null=null, max_length=36)
+        old_field.set_attributes_from_name(field_name)
+
+        new_field = models.UUIDField(null=null)
+        new_field.set_attributes_from_name(field_name)
+
+        with connection.schema_editor() as schema_editor:
+            schema_editor.alter_field(model, old_field, new_field)
+            self.stdout.write(
+                self.style.SUCCESS(
+                    f"Successfully converted {model._meta.label} {field_name} field to UUID type.",
+                ),
+            )
+
+    def handle(self, **options):
+        self.convert_field(Document, "transaction_id", null=True)
index b1cd43932ed15c813efd247d77c4805fcb1deb84..2e2b02f0d3ad1386cc65abd683c5319d5df50c69 100644 (file)
@@ -15,6 +15,7 @@ from django.conf import settings
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.core.cache import cache
+from django.db import DataError
 from django.test import override_settings
 from django.utils import timezone
 from guardian.shortcuts import assign_perm
@@ -2605,6 +2606,35 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
         self.assertEqual(resp.status_code, status.HTTP_200_OK)
         self.assertEqual(doc1.tags.count(), 2)
 
+    @mock.patch("django_softdelete.models.SoftDeleteModel.delete")
+    def test_warn_on_delete_with_old_uuid_field(self, mocked_delete):
+        """
+        GIVEN:
+            - Existing document in a (mocked) MariaDB database with an old UUID field
+        WHEN:
+            - API request to delete document is made which raises "Data too long for column" error
+        THEN:
+            - Warning is logged alerting the user of the issue (and link to the fix)
+        """
+
+        doc = Document.objects.create(
+            title="test",
+            mime_type="application/pdf",
+            content="this is a document 1",
+            checksum="1",
+        )
+
+        mocked_delete.side_effect = DataError(
+            "Data too long for column 'transaction_id' at row 1",
+        )
+
+        with self.assertLogs(level="WARNING") as cm:
+            self.client.delete(f"/api/documents/{doc.pk}/")
+            self.assertIn(
+                "Detected a possible incompatible database column",
+                cm.output[0],
+            )
+
 
 class TestDocumentApiV2(DirectoriesMixin, APITestCase):
     def setUp(self):
index d80116a80e750af829b6a8062f8b2ed76bf63315..c6e846a772e60779da9430e76f9d2d95eebb70e0 100644 (file)
@@ -327,6 +327,15 @@ class TestBulkEdit(DirectoriesMixin, TestCase):
         )
         self.assertEqual(groups_with_perms.count(), 2)
 
+    @mock.patch("documents.models.Document.delete")
+    def test_delete_documents_old_uuid_field(self, m):
+        m.side_effect = Exception("Data too long for column 'transaction_id' at row 1")
+        doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
+        bulk_edit.delete(doc_ids)
+        with self.assertLogs(level="WARNING") as cm:
+            bulk_edit.delete(doc_ids)
+            self.assertIn("possible incompatible database column", cm.output[0])
+
 
 class TestPDFActions(DirectoriesMixin, TestCase):
     def setUp(self):
index d1efe27d476338a0c97fda843761921f8ad22247..76a0a2c743a7291beedadffa324ea474ba4f1135 100644 (file)
@@ -3,6 +3,7 @@ import hashlib
 import os
 import shutil
 import tempfile
+from io import StringIO
 from pathlib import Path
 from unittest import mock
 
@@ -238,3 +239,16 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
 
         self.assertEqual(len(capture.output), 2)
         self.assertIn("Checksum mismatch. Stored: abc, actual:", capture.output[1])
+
+
+class TestConvertMariaDBUUID(TestCase):
+    @mock.patch("django.db.connection.schema_editor")
+    def test_convert(self, m):
+        m.alter_field.return_value = None
+
+        stdout = StringIO()
+        call_command("convert_mariadb_uuid", stdout=stdout)
+
+        m.assert_called_once()
+
+        self.assertIn("Successfully converted", stdout.getvalue())
index 919f9d2ddddce6adb2e452c45dc9b06b594795e5..10b2d0cbda45ac4085916d01a769babb323f8a3c 100644 (file)
@@ -406,7 +406,17 @@ class DocumentViewSet(
         from documents import index
 
         index.remove_document_from_index(self.get_object())
-        return super().destroy(request, *args, **kwargs)
+        try:
+            return super().destroy(request, *args, **kwargs)
+        except Exception as e:
+            if "Data too long for column" in str(e):
+                logger.warning(
+                    "Detected a possible incompatible database column. See https://docs.paperless-ngx.com/troubleshooting/#convert-uuid-field",
+                )
+            logger.error(f"Error deleting document: {e!s}")
+            return HttpResponseBadRequest(
+                "Error deleting document, check logs for more detail.",
+            )
 
     @staticmethod
     def original_requested(request):