]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Adds additional warnings during an import if it might fail due to reasons (#4814)
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 5 Dec 2023 03:39:59 +0000 (19:39 -0800)
committerGitHub <noreply@github.com>
Tue, 5 Dec 2023 03:39:59 +0000 (03:39 +0000)
src/documents/management/commands/document_importer.py
src/documents/tests/test_importer.py [deleted file]
src/documents/tests/test_management_importer.py [new file with mode: 0644]

index 9895104c3c4ace5525159e28d49f542b8a01a200..c166ec0cbff85ac85ab686800047241b6336cf82 100644 (file)
@@ -7,6 +7,7 @@ from pathlib import Path
 import tqdm
 from django.conf import settings
 from django.contrib.auth.models import Permission
+from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
 from django.core.exceptions import FieldDoesNotExist
 from django.core.management import call_command
@@ -60,10 +61,11 @@ class Command(BaseCommand):
         self.manifest = None
         self.version = None
 
-    def handle(self, *args, **options):
-        logging.getLogger().handlers[0].level = logging.ERROR
-
-        self.source = Path(options["source"]).resolve()
+    def pre_check(self) -> None:
+        """
+        Runs some initial checks against the source directory, including looking for
+        common mistakes like having files still and users other than expected
+        """
 
         if not self.source.exists():
             raise CommandError("That path doesn't exist")
@@ -71,6 +73,40 @@ class Command(BaseCommand):
         if not os.access(self.source, os.R_OK):
             raise CommandError("That path doesn't appear to be readable")
 
+        for document_dir in [settings.ORIGINALS_DIR, settings.ARCHIVE_DIR]:
+            if document_dir.exists() and document_dir.is_dir():
+                for entry in document_dir.glob("**/*"):
+                    if entry.is_dir():
+                        continue
+                    self.stdout.write(
+                        self.style.WARNING(
+                            f"Found file {entry.relative_to(document_dir)}, this might indicate a non-empty installation",
+                        ),
+                    )
+                    break
+        if (
+            User.objects.exclude(username__in=["consumer", "AnonymousUser"]).count()
+            != 0
+        ):
+            self.stdout.write(
+                self.style.WARNING(
+                    "Found existing user(s), this might indicate a non-empty installation",
+                ),
+            )
+        if Document.objects.count() != 0:
+            self.stdout.write(
+                self.style.WARNING(
+                    "Found existing documents(s), this might indicate a non-empty installation",
+                ),
+            )
+
+    def handle(self, *args, **options):
+        logging.getLogger().handlers[0].level = logging.ERROR
+
+        self.source = Path(options["source"]).resolve()
+
+        self.pre_check()
+
         manifest_paths = []
 
         main_manifest_path = self.source / "manifest.json"
diff --git a/src/documents/tests/test_importer.py b/src/documents/tests/test_importer.py
deleted file mode 100644 (file)
index d86101f..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-import tempfile
-from pathlib import Path
-
-from django.core.management import call_command
-from django.core.management.base import CommandError
-from django.test import TestCase
-
-from documents.management.commands.document_importer import Command
-from documents.settings import EXPORTER_ARCHIVE_NAME
-from documents.settings import EXPORTER_FILE_NAME
-
-
-class TestImporter(TestCase):
-    def __init__(self, *args, **kwargs):
-        TestCase.__init__(self, *args, **kwargs)
-
-    def test_check_manifest_exists(self):
-        cmd = Command()
-        self.assertRaises(
-            CommandError,
-            cmd._check_manifest_exists,
-            Path("/tmp/manifest.json"),
-        )
-
-    def test_check_manifest(self):
-        cmd = Command()
-        cmd.source = Path("/tmp")
-
-        cmd.manifest = [{"model": "documents.document"}]
-        with self.assertRaises(CommandError) as cm:
-            cmd._check_manifest_valid()
-        self.assertIn("The manifest file contains a record", str(cm.exception))
-
-        cmd.manifest = [
-            {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
-        ]
-        # self.assertRaises(CommandError, cmd._check_manifest)
-        with self.assertRaises(CommandError) as cm:
-            cmd._check_manifest_valid()
-        self.assertIn(
-            'The manifest file refers to "noexist.pdf"',
-            str(cm.exception),
-        )
-
-    def test_import_permission_error(self):
-        """
-        GIVEN:
-            - Original file which cannot be read from
-            - Archive file which cannot be read from
-        WHEN:
-            - Import is attempted
-        THEN:
-            - CommandError is raised indicating the issue
-        """
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Create empty files
-            original_path = Path(temp_dir) / "original.pdf"
-            archive_path = Path(temp_dir) / "archive.pdf"
-            original_path.touch()
-            archive_path.touch()
-
-            # No read permissions
-            original_path.chmod(0o222)
-
-            cmd = Command()
-            cmd.source = Path(temp_dir)
-            cmd.manifest = [
-                {
-                    "model": "documents.document",
-                    EXPORTER_FILE_NAME: "original.pdf",
-                    EXPORTER_ARCHIVE_NAME: "archive.pdf",
-                },
-            ]
-            with self.assertRaises(CommandError) as cm:
-                cmd._check_manifest_valid()
-                self.assertInt("Failed to read from original file", str(cm.exception))
-
-            original_path.chmod(0o444)
-            archive_path.chmod(0o222)
-
-            with self.assertRaises(CommandError) as cm:
-                cmd._check_manifest_valid()
-                self.assertInt("Failed to read from archive file", str(cm.exception))
-
-    def test_import_source_not_existing(self):
-        """
-        GIVEN:
-            - Source given doesn't exist
-        WHEN:
-            - Import is attempted
-        THEN:
-            - CommandError is raised indicating the issue
-        """
-        with self.assertRaises(CommandError) as cm:
-            call_command("document_importer", Path("/tmp/notapath"))
-            self.assertInt("That path doesn't exist", str(cm.exception))
-
-    def test_import_source_not_readable(self):
-        """
-        GIVEN:
-            - Source given isn't readable
-        WHEN:
-            - Import is attempted
-        THEN:
-            - CommandError is raised indicating the issue
-        """
-        with tempfile.TemporaryDirectory() as temp_dir:
-            path = Path(temp_dir)
-            path.chmod(0o222)
-            with self.assertRaises(CommandError) as cm:
-                call_command("document_importer", path)
-                self.assertInt(
-                    "That path doesn't appear to be readable",
-                    str(cm.exception),
-                )
diff --git a/src/documents/tests/test_management_importer.py b/src/documents/tests/test_management_importer.py
new file mode 100644 (file)
index 0000000..c0d155d
--- /dev/null
@@ -0,0 +1,274 @@
+import json
+import tempfile
+from io import StringIO
+from pathlib import Path
+
+from django.contrib.auth.models import User
+from django.core.management import call_command
+from django.core.management.base import CommandError
+from django.test import TestCase
+
+from documents.management.commands.document_importer import Command
+from documents.models import Document
+from documents.settings import EXPORTER_ARCHIVE_NAME
+from documents.settings import EXPORTER_FILE_NAME
+from documents.tests.utils import DirectoriesMixin
+from documents.tests.utils import FileSystemAssertsMixin
+
+
+class TestCommandImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
+    def test_check_manifest_exists(self):
+        """
+        GIVEN:
+            - Source directory exists
+            - No manifest.json file exists in the directory
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        with self.assertRaises(CommandError) as e:
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                str(self.dirs.scratch_dir),
+            )
+            self.assertIn(
+                "That directory doesn't appear to contain a manifest.json file.",
+                str(e),
+            )
+
+    def test_check_manifest_malformed(self):
+        """
+        GIVEN:
+            - Source directory exists
+            - manifest.json file exists in the directory
+            - manifest.json is missing the documents exported name
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        manifest_file = self.dirs.scratch_dir / "manifest.json"
+        with manifest_file.open("w") as outfile:
+            json.dump([{"model": "documents.document"}], outfile)
+
+        with self.assertRaises(CommandError) as e:
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                str(self.dirs.scratch_dir),
+            )
+            self.assertIn(
+                "The manifest file contains a record which does not refer to an actual document file.",
+                str(e),
+            )
+
+    def test_check_manifest_file_not_found(self):
+        """
+        GIVEN:
+            - Source directory exists
+            - manifest.json file exists in the directory
+            - manifest.json refers to a file which doesn't exist
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        manifest_file = self.dirs.scratch_dir / "manifest.json"
+        with manifest_file.open("w") as outfile:
+            json.dump(
+                [{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}],
+                outfile,
+            )
+
+        with self.assertRaises(CommandError) as e:
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                str(self.dirs.scratch_dir),
+            )
+            self.assertIn('The manifest file refers to "noexist.pdf"', str(e))
+
+    def test_import_permission_error(self):
+        """
+        GIVEN:
+            - Original file which cannot be read from
+            - Archive file which cannot be read from
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Create empty files
+            original_path = Path(temp_dir) / "original.pdf"
+            archive_path = Path(temp_dir) / "archive.pdf"
+            original_path.touch()
+            archive_path.touch()
+
+            # No read permissions
+            original_path.chmod(0o222)
+
+            cmd = Command()
+            cmd.source = Path(temp_dir)
+            cmd.manifest = [
+                {
+                    "model": "documents.document",
+                    EXPORTER_FILE_NAME: "original.pdf",
+                    EXPORTER_ARCHIVE_NAME: "archive.pdf",
+                },
+            ]
+            with self.assertRaises(CommandError) as cm:
+                cmd._check_manifest_valid()
+                self.assertInt("Failed to read from original file", str(cm.exception))
+
+            original_path.chmod(0o444)
+            archive_path.chmod(0o222)
+
+            with self.assertRaises(CommandError) as cm:
+                cmd._check_manifest_valid()
+                self.assertInt("Failed to read from archive file", str(cm.exception))
+
+    def test_import_source_not_existing(self):
+        """
+        GIVEN:
+            - Source given doesn't exist
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        with self.assertRaises(CommandError) as cm:
+            call_command("document_importer", Path("/tmp/notapath"))
+            self.assertInt("That path doesn't exist", str(cm.exception))
+
+    def test_import_source_not_readable(self):
+        """
+        GIVEN:
+            - Source given isn't readable
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        with tempfile.TemporaryDirectory() as temp_dir:
+            path = Path(temp_dir)
+            path.chmod(0o222)
+            with self.assertRaises(CommandError) as cm:
+                call_command("document_importer", path)
+                self.assertInt(
+                    "That path doesn't appear to be readable",
+                    str(cm.exception),
+                )
+
+    def test_import_source_does_not_exist(self):
+        """
+        GIVEN:
+            - Source directory does not exist
+        WHEN:
+            - Request to import documents from a directory
+        THEN:
+            - CommandError is raised indicating the folder doesn't exist
+        """
+        path = Path("/tmp/should-not-exist")
+
+        self.assertIsNotFile(path)
+
+        with self.assertRaises(CommandError) as e:
+            call_command("document_importer", "--no-progress-bar", str(path))
+
+            self.assertIn("That path doesn't exist", str(e))
+
+    def test_import_files_exist(self):
+        """
+        GIVEN:
+            - Source directory does exist
+            - A file exists in the originals directory
+        WHEN:
+            - Request to import documents from a directory
+        THEN:
+            - CommandError is raised indicating the file exists
+        """
+        (self.dirs.originals_dir / "temp").mkdir()
+
+        (self.dirs.originals_dir / "temp" / "file.pdf").touch()
+
+        stdout = StringIO()
+
+        with self.assertRaises(CommandError):
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                str(self.dirs.scratch_dir),
+                stdout=stdout,
+            )
+        stdout.seek(0)
+        self.assertIn(
+            "Found file temp/file.pdf, this might indicate a non-empty installation",
+            str(stdout.read()),
+        )
+
+    def test_import_with_user_exists(self):
+        """
+        GIVEN:
+            - Source directory does exist
+            - At least 1 User exists in the database
+        WHEN:
+            - Request to import documents from a directory
+        THEN:
+            - A warning is output to stdout
+        """
+        stdout = StringIO()
+
+        User.objects.create()
+
+        # Not creating a manifest, etc, so it errors
+        with self.assertRaises(CommandError):
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                str(self.dirs.scratch_dir),
+                stdout=stdout,
+            )
+        stdout.seek(0)
+        self.assertIn(
+            "Found existing user(s), this might indicate a non-empty installation",
+            str(stdout.read()),
+        )
+
+    def test_import_with_documents_exists(self):
+        """
+        GIVEN:
+            - Source directory does exist
+            - At least 1 Document exists in the database
+        WHEN:
+            - Request to import documents from a directory
+        THEN:
+            - A warning is output to stdout
+        """
+        stdout = StringIO()
+
+        Document.objects.create(
+            content="Content",
+            checksum="42995833e01aea9b3edee44bbfdd7ce1",
+            archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
+            title="wow1",
+            filename="0000001.pdf",
+            mime_type="application/pdf",
+            archive_filename="0000001.pdf",
+        )
+
+        # Not creating a manifest, etc, so it errors
+        with self.assertRaises(CommandError):
+            call_command(
+                "document_importer",
+                "--no-progress-bar",
+                str(self.dirs.scratch_dir),
+                stdout=stdout,
+            )
+        stdout.seek(0)
+        self.assertIn(
+            "Found existing documents(s), this might indicate a non-empty installation",
+            str(stdout.read()),
+        )