]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Enhancement: prune audit logs and management command (#8416)
authorshamoon <4887959+shamoon@users.noreply.github.com>
Tue, 3 Dec 2024 19:28:27 +0000 (11:28 -0800)
committerGitHub <noreply@github.com>
Tue, 3 Dec 2024 19:28:27 +0000 (19:28 +0000)
docker/install_management_commands.sh
docs/administration.md
src/documents/management/commands/prune_audit_logs.py [new file with mode: 0644]
src/documents/tasks.py
src/documents/tests/test_management.py

index fa59d59a9d97c7d9f8d0a9a08d68bc077e2e2858..37c17058ae3749505027470896705f31095f9908 100755 (executable)
@@ -15,7 +15,8 @@ for command in decrypt_documents \
        document_sanity_checker \
        document_fuzzy_match \
        manage_superuser \
-       convert_mariadb_uuid;
+       convert_mariadb_uuid \
+       prune_audit_logs;
 do
        echo "installing $command..."
        sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
index 8204352d8c738737f419df043103661aa841107b..0c852e6672d709dc7ad63987192c6164f75ea6b0 100644 (file)
@@ -624,3 +624,12 @@ document_fuzzy_match [--ratio] [--processes N]
     If providing the `--delete` option, it is highly recommended to have a backup.
     While every effort has been taken to ensure proper operation, there is always the
     chance of deletion of a file you want to keep.
+
+### Prune history (audit log) entries {#prune-history}
+
+If the audit log is enabled Paperless-ngx keeps an audit log of all changes made to documents. Functionality to automatically remove entries for deleted documents was added but
+entries created prior to this are not removed. This command allows you to prune the audit log of entries that are no longer needed.
+
+```shell
+prune_audit_logs
+```
diff --git a/src/documents/management/commands/prune_audit_logs.py b/src/documents/management/commands/prune_audit_logs.py
new file mode 100644 (file)
index 0000000..b49f4af
--- /dev/null
@@ -0,0 +1,39 @@
+from auditlog.models import LogEntry
+from django.core.management.base import BaseCommand
+from django.db import transaction
+from tqdm import tqdm
+
+from documents.management.commands.mixins import ProgressBarMixin
+
+
+class Command(BaseCommand, ProgressBarMixin):
+    """
+    Prune the audit logs of objects that no longer exist.
+    """
+
+    help = "Prunes the audit logs of objects that no longer exist."
+
+    def add_arguments(self, parser):
+        self.add_argument_progress_bar_mixin(parser)
+
+    def handle(self, **options):
+        self.handle_progress_bar_mixin(**options)
+        with transaction.atomic():
+            for log_entry in tqdm(LogEntry.objects.all(), disable=self.no_progress_bar):
+                model_class = log_entry.content_type.model_class()
+                # use global_objects for SoftDeleteModel
+                objects = (
+                    model_class.global_objects
+                    if hasattr(model_class, "global_objects")
+                    else model_class.objects
+                )
+                if (
+                    log_entry.object_id
+                    and not objects.filter(pk=log_entry.object_id).exists()
+                ):
+                    log_entry.delete()
+                    tqdm.write(
+                        self.style.NOTICE(
+                            f"Deleted audit log entry for {model_class.__name__} #{log_entry.object_id}",
+                        ),
+                    )
index bd519935938d2701f7ba6d31772eaafcedb92583..d12e7313892e1af9f6d1241aedc32ccfbe4cf75b 100644 (file)
@@ -10,6 +10,7 @@ import tqdm
 from celery import Task
 from celery import shared_task
 from django.conf import settings
+from django.contrib.contenttypes.models import ContentType
 from django.db import models
 from django.db import transaction
 from django.db.models.signals import post_save
@@ -332,9 +333,17 @@ def empty_trash(doc_ids=None):
     )
 
     try:
+        deleted_document_ids = documents.values_list("id", flat=True)
         # Temporarily connect the cleanup handler
         models.signals.post_delete.connect(cleanup_document_deletion, sender=Document)
         documents.delete()  # this is effectively a hard delete
+
+        if settings.AUDIT_LOG_ENABLED:
+            # Delete the audit log entries for documents that dont exist anymore
+            LogEntry.objects.filter(
+                content_type=ContentType.objects.get_for_model(Document),
+                object_id__in=deleted_document_ids,
+            ).delete()
     except Exception as e:  # pragma: no cover
         logger.exception(f"Error while emptying trash: {e}")
     finally:
index 5340035e7274f7a1939a88192e67187ee1ed4f7e..2f21627a71f656f15f54dc6753bb906bf7e756b1 100644 (file)
@@ -7,6 +7,8 @@ from io import StringIO
 from pathlib import Path
 from unittest import mock
 
+from auditlog.models import LogEntry
+from django.contrib.contenttypes.models import ContentType
 from django.core.management import call_command
 from django.test import TestCase
 from django.test import override_settings
@@ -252,3 +254,15 @@ class TestConvertMariaDBUUID(TestCase):
         m.assert_called_once()
 
         self.assertIn("Successfully converted", stdout.getvalue())
+
+
+class TestPruneAuditLogs(TestCase):
+    def test_prune_audit_logs(self):
+        LogEntry.objects.create(
+            content_type=ContentType.objects.get_for_model(Document),
+            object_id=1,
+            action=LogEntry.Action.CREATE,
+        )
+        call_command("prune_audit_logs")
+
+        self.assertEqual(LogEntry.objects.count(), 0)