]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Enhancement: Add --compare-json option to document_exporter to write json files only...
authorKevin Doren <kevin@doren.org>
Tue, 19 Nov 2024 15:20:24 +0000 (07:20 -0800)
committerGitHub <noreply@github.com>
Tue, 19 Nov 2024 15:20:24 +0000 (07:20 -0800)
docs/administration.md
src/documents/management/commands/document_exporter.py
src/documents/tests/test_management_exporter.py

index 7624de41b6c08b11268b4b988a8476cd862a3846..8204352d8c738737f419df043103661aa841107b 100644 (file)
@@ -241,6 +241,7 @@ document_exporter target [-c] [-d] [-f] [-na] [-nt] [-p] [-sm] [-z]
 
 optional arguments:
 -c,  --compare-checksums
+-cj, --compare-json
 -d,  --delete
 -f,  --use-filename-format
 -na, --no-archive
@@ -269,7 +270,8 @@ only export changed and added files. Paperless determines whether a file
 has changed by inspecting the file attributes "date/time modified" and
 "size". If that does not work out for you, specify `-c` or
 `--compare-checksums` and paperless will attempt to compare file
-checksums instead. This is slower.
+checksums instead. This is slower. The manifest and metadata json files
+are always updated, unless `cj` or `--compare-json` is specified.
 
 Paperless will not remove any existing files in the export directory. If
 you want paperless to also remove files that do not belong to the
index f49008cc78eaf9f2b3b796995913c3b62e479e99..84275507d9d1be539cb4fc4934abc587481e2e7d 100644 (file)
@@ -82,6 +82,18 @@ class Command(CryptMixin, BaseCommand):
             ),
         )
 
+        parser.add_argument(
+            "-cj",
+            "--compare-json",
+            default=False,
+            action="store_true",
+            help=(
+                "Compare json file checksums when determining whether to "
+                "export a json file or not (manifest or metadata). "
+                "If not specified, the file is always exported."
+            ),
+        )
+
         parser.add_argument(
             "-d",
             "--delete",
@@ -178,6 +190,7 @@ class Command(CryptMixin, BaseCommand):
         self.target = Path(options["target"]).resolve()
         self.split_manifest: bool = options["split_manifest"]
         self.compare_checksums: bool = options["compare_checksums"]
+        self.compare_json: bool = options["compare_json"]
         self.use_filename_format: bool = options["use_filename_format"]
         self.use_folder_prefix: bool = options["use_folder_prefix"]
         self.delete: bool = options["delete"]
@@ -343,12 +356,11 @@ class Command(CryptMixin, BaseCommand):
                         manifest_dict["custom_field_instances"],
                     ),
                 )
-                manifest_name.write_text(
-                    json.dumps(content, indent=2, ensure_ascii=False),
-                    encoding="utf-8",
+
+                self.check_and_write_json(
+                    content,
+                    manifest_name,
                 )
-                if manifest_name in self.files_in_export_dir:
-                    self.files_in_export_dir.remove(manifest_name)
 
         # These were exported already
         if self.split_manifest:
@@ -361,12 +373,10 @@ class Command(CryptMixin, BaseCommand):
         for key in manifest_dict:
             manifest.extend(manifest_dict[key])
         manifest_path = (self.target / "manifest.json").resolve()
-        manifest_path.write_text(
-            json.dumps(manifest, indent=2, ensure_ascii=False),
-            encoding="utf-8",
+        self.check_and_write_json(
+            manifest,
+            manifest_path,
         )
-        if manifest_path in self.files_in_export_dir:
-            self.files_in_export_dir.remove(manifest_path)
 
         # 4.2 write version information to target folder
         extra_metadata_path = (self.target / "metadata.json").resolve()
@@ -378,16 +388,11 @@ class Command(CryptMixin, BaseCommand):
         # Django stores most of these in the field itself, we store them once here
         if self.passphrase:
             metadata.update(self.get_crypt_params())
-        extra_metadata_path.write_text(
-            json.dumps(
-                metadata,
-                indent=2,
-                ensure_ascii=False,
-            ),
-            encoding="utf-8",
+
+        self.check_and_write_json(
+            metadata,
+            extra_metadata_path,
         )
-        if extra_metadata_path in self.files_in_export_dir:
-            self.files_in_export_dir.remove(extra_metadata_path)
 
         if self.delete:
             # 5. Remove files which we did not explicitly export in this run
@@ -516,6 +521,35 @@ class Command(CryptMixin, BaseCommand):
                     archive_target,
                 )
 
+    def check_and_write_json(
+        self,
+        content: list[dict] | dict,
+        target: Path,
+    ):
+        """
+        Writes the source content to the target json file.
+        If --compare-json arg was used, don't write to target file if
+        the file exists and checksum is identical to content checksum.
+        This preserves the file timestamps when no changes are made.
+        """
+
+        target = target.resolve()
+        perform_write = True
+        if target in self.files_in_export_dir:
+            self.files_in_export_dir.remove(target)
+            if self.compare_json:
+                target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
+                src_str = json.dumps(content, indent=2, ensure_ascii=False)
+                src_checksum = hashlib.md5(src_str.encode("utf-8")).hexdigest()
+                if src_checksum == target_checksum:
+                    perform_write = False
+
+        if perform_write:
+            target.write_text(
+                json.dumps(content, indent=2, ensure_ascii=False),
+                encoding="utf-8",
+            )
+
     def check_and_copy(
         self,
         source: Path,
index 9697f0c033f4e1af2a7a092d66b702506a443088..0a79b6cd7c901a09b9512d5b6350aa871c5b1b5c 100644 (file)
@@ -153,6 +153,7 @@ class TestExportImport(
         *,
         use_filename_format=False,
         compare_checksums=False,
+        compare_json=False,
         delete=False,
         no_archive=False,
         no_thumbnail=False,
@@ -165,6 +166,8 @@ class TestExportImport(
             args += ["--use-filename-format"]
         if compare_checksums:
             args += ["--compare-checksums"]
+        if compare_json:
+            args += ["--compare-json"]
         if delete:
             args += ["--delete"]
         if no_archive:
@@ -340,6 +343,10 @@ class TestExportImport(
         self.assertNotEqual(st_mtime_1, st_mtime_2)
         self.assertNotEqual(st_mtime_2, st_mtime_3)
 
+        self._do_export(compare_json=True)
+        st_mtime_4 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
+        self.assertEqual(st_mtime_3, st_mtime_4)
+
     def test_update_export_changed_checksum(self):
         shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
         shutil.copytree(