]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Make the importer a little more robust against some types of errors
authorTrenton H <797416+stumpylog@users.noreply.github.com>
Tue, 25 Apr 2023 19:17:37 +0000 (12:17 -0700)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Wed, 26 Apr 2023 14:08:50 +0000 (07:08 -0700)
src/documents/management/commands/document_importer.py
src/documents/tests/test_importer.py
src/documents/tests/test_management_exporter.py

index d68f04ac331c107f1c5756bfc604e31903a054b9..0676117e7e727e10cd3b31c57132a90d8994a27e 100644 (file)
@@ -64,9 +64,9 @@ class Command(BaseCommand):
 
         logging.getLogger().handlers[0].level = logging.ERROR
 
-        self.source = options["source"]
+        self.source = Path(options["source"]).resolve()
 
-        if not os.path.exists(self.source):
+        if not self.source.exists():
             raise CommandError("That path doesn't exist")
 
         if not os.access(self.source, os.R_OK):
@@ -74,39 +74,39 @@ class Command(BaseCommand):
 
         manifest_paths = []
 
-        main_manifest_path = os.path.normpath(
-            os.path.join(self.source, "manifest.json"),
-        )
+        main_manifest_path = self.source / "manifest.json"
+
         self._check_manifest_exists(main_manifest_path)
 
-        with open(main_manifest_path) as f:
-            self.manifest = json.load(f)
+        with main_manifest_path.open() as infile:
+            self.manifest = json.load(infile)
         manifest_paths.append(main_manifest_path)
 
         for file in Path(self.source).glob("**/*-manifest.json"):
-            with open(file) as f:
-                self.manifest += json.load(f)
+            with file.open() as infile:
+                self.manifest += json.load(infile)
             manifest_paths.append(file)
 
-        version_path = os.path.normpath(os.path.join(self.source, "version.json"))
-        if os.path.exists(version_path):
-            with open(version_path) as f:
-                self.version = json.load(f)["version"]
-                # Provide an initial warning if needed to the user
-                if self.version != version.__full_version_str__:
-                    self.stdout.write(
-                        self.style.WARNING(
-                            "Version mismatch: "
-                            f"Currently {version.__full_version_str__},"
-                            f" importing {self.version}."
-                            " Continuing, but import may fail.",
-                        ),
-                    )
+        version_path = self.source / "version.json"
+        if version_path.exists():
+            with version_path.open() as infile:
+                self.version = json.load(infile)["version"]
+            # Provide an initial warning if needed to the user
+            if self.version != version.__full_version_str__:
+                self.stdout.write(
+                    self.style.WARNING(
+                        "Version mismatch: "
+                        f"Currently {version.__full_version_str__},"
+                        f" importing {self.version}."
+                        " Continuing, but import may fail.",
+                    ),
+                )
 
         else:
             self.stdout.write(self.style.NOTICE("No version.json file located"))
 
-        self._check_manifest()
+        self._check_manifest_valid()
+
         with disable_signal(
             post_save,
             receiver=update_filename_and_move_files,
@@ -150,14 +150,18 @@ class Command(BaseCommand):
         )
 
     @staticmethod
-    def _check_manifest_exists(path):
-        if not os.path.exists(path):
+    def _check_manifest_exists(path: Path):
+        if not path.exists():
             raise CommandError(
                 "That directory doesn't appear to contain a manifest.json file.",
             )
 
-    def _check_manifest(self):
-
+    def _check_manifest_valid(self):
+        """
+        Attempts to verify the manifest is valid.  Namely checking the files
+        referred to exist and the files can be read from
+        """
+        self.stdout.write("Checking the manifest")
         for record in self.manifest:
 
             if record["model"] != "documents.document":
@@ -170,19 +174,35 @@ class Command(BaseCommand):
                 )
 
             doc_file = record[EXPORTER_FILE_NAME]
-            if not os.path.exists(os.path.join(self.source, doc_file)):
+            doc_path = self.source / doc_file
+            if not doc_path.exists():
                 raise CommandError(
                     'The manifest file refers to "{}" which does not '
                     "appear to be in the source directory.".format(doc_file),
                 )
+            try:
+                with doc_path.open(mode="rb") as infile:
+                    infile.read(1)
+            except Exception as e:
+                raise CommandError(
+                    f"Failed to read from original file {doc_path}",
+                ) from e
 
             if EXPORTER_ARCHIVE_NAME in record:
                 archive_file = record[EXPORTER_ARCHIVE_NAME]
-                if not os.path.exists(os.path.join(self.source, archive_file)):
+                doc_archive_path = self.source / archive_file
+                if not doc_archive_path.exists():
                     raise CommandError(
                         f"The manifest file refers to {archive_file} which "
                         f"does not appear to be in the source directory.",
                     )
+                try:
+                    with doc_archive_path.open(mode="rb") as infile:
+                        infile.read(1)
+                except Exception as e:
+                    raise CommandError(
+                        f"Failed to read from archive file {doc_archive_path}",
+                    ) from e
 
     def _import_files_from_manifest(self, progress_bar_disable):
 
index 8a659dbb6947a9d4a8a187abd50087ac7ed32c30..a98e7394b6c2177f6cb72fdab396ea1c689e87f8 100644 (file)
@@ -1,6 +1,10 @@
 from django.core.management.base import CommandError
 from django.test import TestCase
 from documents.settings import EXPORTER_FILE_NAME
+from documents.settings import EXPORTER_ARCHIVE_NAME
+from pathlib import Path
+import tempfile
+from django.core.management import call_command
 
 from documents.management.commands.document_importer import Command
 
@@ -14,17 +18,17 @@ class TestImporter(TestCase):
         self.assertRaises(
             CommandError,
             cmd._check_manifest_exists,
-            "/tmp/manifest.json",
+            Path("/tmp/manifest.json"),
         )
 
     def test_check_manifest(self):
 
         cmd = Command()
-        cmd.source = "/tmp"
+        cmd.source = Path("/tmp")
 
         cmd.manifest = [{"model": "documents.document"}]
         with self.assertRaises(CommandError) as cm:
-            cmd._check_manifest()
+            cmd._check_manifest_valid()
         self.assertIn("The manifest file contains a record", str(cm.exception))
 
         cmd.manifest = [
@@ -32,8 +36,81 @@ class TestImporter(TestCase):
         ]
         # self.assertRaises(CommandError, cmd._check_manifest)
         with self.assertRaises(CommandError) as cm:
-            cmd._check_manifest()
+            cmd._check_manifest_valid()
         self.assertIn(
             'The manifest file refers to "noexist.pdf"',
             str(cm.exception),
         )
+
+    def test_import_permission_error(self):
+        """
+        GIVEN:
+            - Original file which cannot be read from
+            - Archive file which cannot be read from
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        with tempfile.TemporaryDirectory() as temp_dir:
+
+            # Create empty files
+            original_path = Path(temp_dir) / "original.pdf"
+            archive_path = Path(temp_dir) / "archive.pdf"
+            original_path.touch()
+            archive_path.touch()
+
+            # No read permissions
+            original_path.chmod(0o222)
+
+            cmd = Command()
+            cmd.source = Path(temp_dir)
+            cmd.manifest = [
+                {
+                    "model": "documents.document",
+                    EXPORTER_FILE_NAME: "original.pdf",
+                    EXPORTER_ARCHIVE_NAME: "archive.pdf",
+                },
+            ]
+            with self.assertRaises(CommandError) as cm:
+                cmd._check_manifest_valid()
+                self.assertInt("Failed to read from original file", str(cm.exception))
+
+            original_path.chmod(0o444)
+            archive_path.chmod(0o222)
+
+            with self.assertRaises(CommandError) as cm:
+                cmd._check_manifest_valid()
+                self.assertInt("Failed to read from archive file", str(cm.exception))
+
+    def test_import_source_not_existing(self):
+        """
+        GIVEN:
+            - Source given doesn't exist
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        with self.assertRaises(CommandError) as cm:
+            call_command("document_importer", Path("/tmp/notapath"))
+            self.assertInt("That path doesn't exist", str(cm.exception))
+
+    def test_import_source_not_readable(self):
+        """
+        GIVEN:
+            - Source given isn't readable
+        WHEN:
+            - Import is attempted
+        THEN:
+            - CommandError is raised indicating the issue
+        """
+        with tempfile.TemporaryDirectory() as temp_dir:
+            path = Path(temp_dir)
+            path.chmod(0o222)
+            with self.assertRaises(CommandError) as cm:
+                call_command("document_importer", path)
+                self.assertInt(
+                    "That path doesn't appear to be readable",
+                    str(cm.exception),
+                )
index 05b6db5b3c844f1e1a9f75b78ce72554def4c77f..cc52dc9c16afd3442a859c812fba3bcb37e32822 100644 (file)
@@ -212,7 +212,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             Tag.objects.all().delete()
             self.assertEqual(Document.objects.count(), 0)
 
-            call_command("document_importer", self.target)
+            call_command("document_importer", "--no-progress-bar", self.target)
             self.assertEqual(Document.objects.count(), 4)
             self.assertEqual(Tag.objects.count(), 1)
             self.assertEqual(Correspondent.objects.count(), 1)
@@ -541,7 +541,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             self.assertEqual(Document.objects.count(), 4)
             Document.objects.all().delete()
             self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", self.target)
+            call_command("document_importer", "--no-progress-bar", self.target)
             self.assertEqual(Document.objects.count(), 4)
 
     def test_no_thumbnail(self):
@@ -584,7 +584,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             self.assertEqual(Document.objects.count(), 4)
             Document.objects.all().delete()
             self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", self.target)
+            call_command("document_importer", "--no-progress-bar", self.target)
             self.assertEqual(Document.objects.count(), 4)
 
     def test_split_manifest(self):
@@ -613,7 +613,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             self.assertEqual(Document.objects.count(), 4)
             Document.objects.all().delete()
             self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", self.target)
+            call_command("document_importer", "--no-progress-bar", self.target)
             self.assertEqual(Document.objects.count(), 4)
 
     def test_folder_prefix(self):
@@ -637,5 +637,5 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
             self.assertEqual(Document.objects.count(), 4)
             Document.objects.all().delete()
             self.assertEqual(Document.objects.count(), 0)
-            call_command("document_importer", self.target)
+            call_command("document_importer", "--no-progress-bar", self.target)
             self.assertEqual(Document.objects.count(), 4)