]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Feature: Audit Trail (#4425)
authornanokatz <22838757+nanokatz@users.noreply.github.com>
Mon, 30 Oct 2023 16:23:22 +0000 (17:23 +0100)
committerGitHub <noreply@github.com>
Mon, 30 Oct 2023 16:23:22 +0000 (09:23 -0700)
Adds new feature for optionally enabling change tracking for possible audit purposes
---------

Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
Co-authored-by: Trenton Holmes <797416+stumpylog@users.noreply.github.com>
12 files changed:
Pipfile
Pipfile.lock
docs/configuration.md
src/documents/admin.py
src/documents/models.py
src/documents/tasks.py
src/documents/views.py
src/paperless/__init__.py
src/paperless/checks.py
src/paperless/settings.py
src/paperless/tests/test_checks.py
src/setup.cfg

diff --git a/Pipfile b/Pipfile
index d8b66d7190742f44199e4060987187383e59e8e9..1b8d3a94aadd9fdad62bede43d74fe165f58a550 100644 (file)
--- a/Pipfile
+++ b/Pipfile
@@ -52,6 +52,7 @@ bleach = "*"
 zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"}
 django-multiselectfield = "*"
 gotenberg-client = "*"
+django-auditlog = "*"
 
 [dev-packages]
 # Linting
index 67cdc29b1c4097804354087324871c8bca8cde72..17f35b94b1cef239ebbf42dd499e39b6309bd310 100644 (file)
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "505bd6b18d31ed64988ef307c12a5acb70f611cafd932a391e985a11bbbc8000"
+            "sha256": "7b4272de2042a346f3252ae20e7bbeee60c375381f59526caa35511a706d4977"
         },
         "pipfile-spec": 6,
         "requires": {},
         },
         "django": {
             "hashes": [
-                "sha256:5e5c1c9548ffb7796b4a8a4782e9a2e5a3df3615259fc1bfd3ebc73b646146c1",
-                "sha256:b6b2b5cae821077f137dc4dade696a1c2aa292f892eca28fa8d7bfdf2608ddd4"
+                "sha256:08f41f468b63335aea0d904c5729e0250300f6a1907bf293a65499496cdbc68f",
+                "sha256:a64d2487cdb00ad7461434320ccc38e60af9c404773a2f95ab0093b4453a3215"
             ],
             "index": "pypi",
             "markers": "python_version >= '3.8'",
-            "version": "==4.2.5"
+            "version": "==4.2.6"
+        },
+        "django-auditlog": {
+            "hashes": [
+                "sha256:7bc2c87e4aff62dec9785d1b2359a2b27148f8c286f8a52b9114fc7876c5a9f7",
+                "sha256:b9d3acebb64f3f2785157efe3f2f802e0929aafc579d85bbfb9827db4adab532"
+            ],
+            "index": "pypi",
+            "markers": "python_version >= '3.7'",
+            "version": "==2.3.0"
         },
         "django-celery-results": {
             "hashes": [
                 "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0",
                 "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"
             ],
-            "markers": "python_version < '3.10'",
+            "markers": "python_version < '3.11'",
             "version": "==4.8.0"
         },
         "tzdata": {
index ca04dbc7bf639a0a12e9ef1a83393d3c93e902b3..fa2b6c2fa9723926e431454321780d7f5cd9085d 100644 (file)
@@ -1136,6 +1136,15 @@ combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
 
     Defaults to "300"
 
+## Audit Trail
+
+#### [`PAPERLESS_AUDIT_LOG_ENABLED=<bool>`](#PAPERLESS_AUDIT_LOG_ENABLED){#PAPERLESS_AUDIT_LOG_ENABLED}
+
+: Enables an audit trail for documents, document types, correspondents, and tags. Log entries can be viewed in the Django backend only.
+
+    !!! warning
+    Once enabled cannot be disabled
+
 ## Collate Double-Sided Documents {#collate}
 
 #### [`PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED=<bool>`](#PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED) {#PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED}
index a190f8d1edf92b25a0d7df8c1cf0086be62fb10d..97871e4f8e345754a36a0e8e2e09fa6979584576 100644 (file)
@@ -1,3 +1,4 @@
+from django.conf import settings
 from django.contrib import admin
 from guardian.admin import GuardedModelAdmin
 
@@ -12,6 +13,10 @@ from documents.models import ShareLink
 from documents.models import StoragePath
 from documents.models import Tag
 
+if settings.AUDIT_LOG_ENABLED:
+    from auditlog.admin import LogEntryAdmin
+    from auditlog.models import LogEntry
+
 
 class CorrespondentAdmin(GuardedModelAdmin):
     list_display = ("name", "match", "matching_algorithm")
@@ -148,3 +153,12 @@ admin.site.register(StoragePath, StoragePathAdmin)
 admin.site.register(PaperlessTask, TaskAdmin)
 admin.site.register(Note, NotesAdmin)
 admin.site.register(ShareLink, ShareLinksAdmin)
+
+if settings.AUDIT_LOG_ENABLED:
+
+    class LogEntryAUDIT(LogEntryAdmin):
+        def has_delete_permission(self, request, obj=None):
+            return False
+
+    admin.site.unregister(LogEntry)
+    admin.site.register(LogEntry, LogEntryAUDIT)
index a1f7d7dd618f7253381afd308eed17db42047793..b0d347f200728895b7c70056b1ad1c18648c41ce 100644 (file)
@@ -20,6 +20,9 @@ from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
 from multiselectfield import MultiSelectField
 
+if settings.AUDIT_LOG_ENABLED:
+    from auditlog.registry import auditlog
+
 from documents.data_models import DocumentSource
 from documents.parsers import get_default_file_extension
 
@@ -872,3 +875,11 @@ class ConsumptionTemplate(models.Model):
 
     def __str__(self):
         return f"{self.name}"
+
+
+if settings.AUDIT_LOG_ENABLED:
+    auditlog.register(Document, m2m_fields={"tags"})
+    auditlog.register(Correspondent)
+    auditlog.register(Tag)
+    auditlog.register(DocumentType)
+    auditlog.register(Note)
index 8aea56eaa032502241698cbf6bbf563925798804..e89b4fa47849f59b57f613273a633603cb24f616 100644 (file)
@@ -37,6 +37,10 @@ from documents.parsers import DocumentParser
 from documents.parsers import get_parser_class_for_mime_type
 from documents.sanity_checker import SanityCheckFailedException
 
+if settings.AUDIT_LOG_ENABLED:
+    import json
+
+    from auditlog.models import LogEntry
 logger = logging.getLogger("paperless.tasks")
 
 
@@ -258,11 +262,37 @@ def update_document_archive_file(document_id):
                     document,
                     archive_filename=True,
                 )
+                oldDocument = Document.objects.get(pk=document.pk)
                 Document.objects.filter(pk=document.pk).update(
                     archive_checksum=checksum,
                     content=parser.get_text(),
                     archive_filename=document.archive_filename,
                 )
+                newDocument = Document.objects.get(pk=document.pk)
+                if settings.AUDIT_LOG_ENABLED:
+                    LogEntry.objects.log_create(
+                        instance=oldDocument,
+                        changes=json.dumps(
+                            {
+                                "content": [oldDocument.content, newDocument.content],
+                                "archive_checksum": [
+                                    oldDocument.archive_checksum,
+                                    newDocument.archive_checksum,
+                                ],
+                                "archive_filename": [
+                                    oldDocument.archive_filename,
+                                    newDocument.archive_filename,
+                                ],
+                            },
+                        ),
+                        additional_data=json.dumps(
+                            {
+                                "reason": "Redo OCR called",
+                            },
+                        ),
+                        action=LogEntry.Action.UPDATE,
+                    )
+
                 with FileLock(settings.MEDIA_LOCK):
                     create_source_path_directory(document.archive_path)
                     shutil.move(parser.get_archive_path(), document.archive_path)
index 02049d18f9c6ccd2a0afa45288e0ef6ef6b5baf7..8f2be05496eae8c586130f173c67cdf60bcb6e4e 100644 (file)
@@ -115,6 +115,9 @@ from paperless import version
 from paperless.db import GnuPG
 from paperless.views import StandardPagination
 
+if settings.AUDIT_LOG_ENABLED:
+    from auditlog.models import LogEntry
+
 logger = logging.getLogger("paperless.api")
 
 
@@ -521,6 +524,18 @@ class DocumentViewSet(
                     user=currentUser,
                 )
                 c.save()
+                # If audit log is enabled make an entry in the log
+                # about this note change
+                if settings.AUDIT_LOG_ENABLED:
+                    LogEntry.objects.log_create(
+                        instance=doc,
+                        changes=json.dumps(
+                            {
+                                "Note Added": ["None", c.id],
+                            },
+                        ),
+                        action=LogEntry.Action.UPDATE,
+                    )
 
                 doc.modified = timezone.now()
                 doc.save()
@@ -546,6 +561,17 @@ class DocumentViewSet(
                 return HttpResponseForbidden("Insufficient permissions to delete")
 
             note = Note.objects.get(id=int(request.GET.get("id")))
+            if settings.AUDIT_LOG_ENABLED:
+                LogEntry.objects.log_create(
+                    instance=doc,
+                    changes=json.dumps(
+                        {
+                            "Note Deleted": [note.id, "None"],
+                        },
+                    ),
+                    action=LogEntry.Action.UPDATE,
+                )
+
             note.delete()
 
             doc.modified = timezone.now()
index 36e448bee88d45072e70d21ffc70b56856bf171f..54ff3cb791f56f63a6a3e10cbe72a51441385ed0 100644 (file)
@@ -1,4 +1,5 @@
 from paperless.celery import app as celery_app
+from paperless.checks import audit_log_check
 from paperless.checks import binaries_check
 from paperless.checks import paths_check
 from paperless.checks import settings_values_check
@@ -8,4 +9,5 @@ __all__ = [
     "binaries_check",
     "paths_check",
     "settings_values_check",
+    "audit_log_check",
 ]
index 2b78eb4fa620f417846fe55fa2bdc2a74e3582d9..6b0501821b6bafb98501bc8f24eebbe1986e1d08 100644 (file)
@@ -5,9 +5,11 @@ import shutil
 import stat
 
 from django.conf import settings
+from django.core.checks import Critical
 from django.core.checks import Error
 from django.core.checks import Warning
 from django.core.checks import register
+from django.db import connections
 
 exists_message = "{} is set but doesn't exist."
 exists_hint = "Create a directory at {}"
@@ -195,3 +197,19 @@ def settings_values_check(app_configs, **kwargs):
         + _barcode_scanner_validate()
         + _email_certificate_validate()
     )
+
+
+@register()
+def audit_log_check(app_configs, **kwargs):
+    db_conn = connections["default"]
+    all_tables = db_conn.introspection.table_names()
+
+    if ("auditlog_logentry" in all_tables) and not (settings.AUDIT_LOG_ENABLED):
+        return [
+            Critical(
+                (
+                    "auditlog table was found but PAPERLESS_AUDIT_LOG_ENABLED"
+                    " is not active.  This setting cannot be disabled after enabling"
+                ),
+            ),
+        ]
index 3cbfb4bff25be02d239b55b8edf9c2fb32ceb74d..5910fd56c8d13e0901e6919cc97489a41d6847f9 100644 (file)
@@ -933,6 +933,11 @@ TIKA_GOTENBERG_ENDPOINT = os.getenv(
 if TIKA_ENABLED:
     INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
 
+AUDIT_LOG_ENABLED = __get_boolean("PAPERLESS_AUDIT_LOG_ENABLED", "NO")
+if AUDIT_LOG_ENABLED:
+    INSTALLED_APPS.append("auditlog")
+    MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")
+
 
 def _parse_ignore_dates(
     env_ignore: str,
index 6aac1a4c68b932aab9f3d2cc29fc16af2d47d356..a6879cdbf07f8c7f02e9cf0e17164b7ef3a3a194 100644 (file)
@@ -1,11 +1,13 @@
 import os
 from pathlib import Path
+from unittest import mock
 
 from django.test import TestCase
 from django.test import override_settings
 
 from documents.tests.utils import DirectoriesMixin
 from documents.tests.utils import FileSystemAssertsMixin
+from paperless.checks import audit_log_check
 from paperless.checks import binaries_check
 from paperless.checks import debug_mode_check
 from paperless.checks import paths_check
@@ -231,3 +233,35 @@ class TestEmailCertSettingsChecks(DirectoriesMixin, FileSystemAssertsMixin, Test
         msg = msgs[0]
 
         self.assertIn("Email cert /tmp/not_actually_here.pem is not a file", msg.msg)
+
+
+class TestAuditLogChecks(TestCase):
+    def test_was_enabled_once(self):
+        """
+        GIVEN:
+            - Audit log is not enabled
+        WHEN:
+            - Database tables contain audit log entry
+        THEN:
+            - system check error reported for disabling audit log
+        """
+        introspect_mock = mock.MagicMock()
+        introspect_mock.introspection.table_names.return_value = ["auditlog_logentry"]
+        with override_settings(AUDIT_LOG_ENABLED=False):
+            with mock.patch.dict(
+                "paperless.checks.connections",
+                {"default": introspect_mock},
+            ):
+                msgs = audit_log_check(None)
+
+                self.assertEqual(len(msgs), 1)
+
+                msg = msgs[0]
+
+                self.assertIn(
+                    (
+                        "auditlog table was found but PAPERLESS_AUDIT_LOG_ENABLED"
+                        " is not active."
+                    ),
+                    msg.msg,
+                )
index e2e5cf8ea0558ce31b29a871cdad11ab6760e2d5..c4b197e79ca6b3c77efa060216a5b68ac92841fe 100644 (file)
@@ -21,6 +21,11 @@ omit =
        paperless/wsgi.py
        paperless/auth.py
 
+[coverage:report]
+exclude_also =
+    if settings.AUDIT_LOG_ENABLED:
+               if AUDIT_LOG_ENABLED:
+
 [mypy]
 plugins = mypy_django_plugin.main, mypy_drf_plugin.main, numpy.typing.mypy_plugin
 check_untyped_defs = true