]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Adds an optional API field to follow the filenaming format when creating a bulk downl...
authorTrenton Holmes <797416+stumpylog@users.noreply.github.com>
Sun, 20 Nov 2022 01:37:32 +0000 (17:37 -0800)
committerTrenton H <797416+stumpylog@users.noreply.github.com>
Fri, 9 Dec 2022 02:32:14 +0000 (18:32 -0800)
src/documents/bulk_download.py
src/documents/models.py
src/documents/serialisers.py
src/documents/tests/test_api.py
src/documents/views.py

index 6beefa23b67772f100f0b940913f847aac0c8f7c..87d97afcce6541a871cae26ae0c9d66a25c41d6c 100644 (file)
@@ -1,18 +1,29 @@
+import os
 from zipfile import ZipFile
 
 from documents.models import Document
 
 
 class BulkArchiveStrategy:
-    def __init__(self, zipf: ZipFile):
+    def __init__(self, zipf: ZipFile, follow_formatting: bool = False):
         self.zipf = zipf
+        if follow_formatting:
+            self.make_unique_filename = self._formatted_filepath
+        else:
+            self.make_unique_filename = self._filename_only
 
-    def make_unique_filename(
+    def _filename_only(
         self,
         doc: Document,
         archive: bool = False,
         folder: str = "",
     ):
+        """
+        Constructs a unique name for the given document to be used inside the
+        zip file.
+
+        The filename might not be unique enough, so a counter is appended if needed
+        """
         counter = 0
         while True:
             filename = folder + doc.get_public_filename(archive, counter)
@@ -21,6 +32,25 @@ class BulkArchiveStrategy:
             else:
                 return filename
 
+    def _formatted_filepath(
+        self,
+        doc: Document,
+        archive: bool = False,
+        folder: str = "",
+    ):
+        """
+        Constructs a full file path for the given document to be used inside
+        the zipfile.
+
+        The path is already unique, as handled when a document is consumed or updated
+        """
+        if archive and doc.has_archive_version:
+            in_archive_path = os.path.join(folder, doc.archive_filename)
+        else:
+            in_archive_path = os.path.join(folder, doc.filename)
+
+        return in_archive_path
+
     def add_document(self, doc: Document):
         raise NotImplementedError()  # pragma: no cover
 
@@ -31,9 +61,6 @@ class OriginalsOnlyStrategy(BulkArchiveStrategy):
 
 
 class ArchiveOnlyStrategy(BulkArchiveStrategy):
-    def __init__(self, zipf):
-        super().__init__(zipf)
-
     def add_document(self, doc: Document):
         if doc.has_archive_version:
             self.zipf.write(
index c1b9c88bcaa653acf150e9b2e6552a457edbf600..1ee6dfedb833f3366a36e98a289f0c6e702ef678 100644 (file)
@@ -287,6 +287,9 @@ class Document(models.Model):
         return open(self.archive_path, "rb")
 
     def get_public_filename(self, archive=False, counter=0, suffix=None) -> str:
+        """
+        Returns a sanitized filename for the document, not including any paths.
+        """
         result = str(self)
 
         if counter:
index db282cacd1e4db2fa8e21fcd819a98fe92cbc237..3e6ec4390fae203a10ead7d4761156dd398baf1f 100644 (file)
@@ -551,6 +551,10 @@ class BulkDownloadSerializer(DocumentListSerializer):
         default="none",
     )
 
+    follow_formatting = serializers.BooleanField(
+        default=False,
+    )
+
     def validate_compression(self, compression):
         import zipfile
 
index d876984bd0e2d286ac4c3d4c079649843bd51c9a..a7e2be53f84f3651a660f4915d490fa8350fdb05 100644 (file)
@@ -2329,6 +2329,9 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 
 
 class TestBulkDownload(DirectoriesMixin, APITestCase):
+
+    ENDPOINT = "/api/documents/bulk_download/"
+
     def setUp(self):
         super().setUp()
 
@@ -2379,7 +2382,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
 
     def test_download_originals(self):
         response = self.client.post(
-            "/api/documents/bulk_download/",
+            self.ENDPOINT,
             json.dumps(
                 {"documents": [self.doc2.id, self.doc3.id], "content": "originals"},
             ),
@@ -2402,7 +2405,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
 
     def test_download_default(self):
         response = self.client.post(
-            "/api/documents/bulk_download/",
+            self.ENDPOINT,
             json.dumps({"documents": [self.doc2.id, self.doc3.id]}),
             content_type="application/json",
         )
@@ -2423,7 +2426,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
 
     def test_download_both(self):
         response = self.client.post(
-            "/api/documents/bulk_download/",
+            self.ENDPOINT,
             json.dumps({"documents": [self.doc2.id, self.doc3.id], "content": "both"}),
             content_type="application/json",
         )
@@ -2457,7 +2460,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
 
     def test_filename_clashes(self):
         response = self.client.post(
-            "/api/documents/bulk_download/",
+            self.ENDPOINT,
             json.dumps({"documents": [self.doc2.id, self.doc2b.id]}),
             content_type="application/json",
         )
@@ -2479,13 +2482,145 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
 
     def test_compression(self):
         response = self.client.post(
-            "/api/documents/bulk_download/",
+            self.ENDPOINT,
             json.dumps(
                 {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"},
             ),
             content_type="application/json",
         )
 
+    @override_settings(FILENAME_FORMAT="{correspondent}/{title}")
+    def test_formatted_download_originals(self):
+
+        c = Correspondent.objects.create(name="test")
+        c2 = Correspondent.objects.create(name="a space name")
+
+        self.doc2.correspondent = c
+        self.doc2.title = "This is Doc 2"
+        self.doc2.save()
+
+        self.doc3.correspondent = c2
+        self.doc3.title = "Title 2 - Doc 3"
+        self.doc3.save()
+
+        response = self.client.post(
+            self.ENDPOINT,
+            json.dumps(
+                {
+                    "documents": [self.doc2.id, self.doc3.id],
+                    "content": "originals",
+                    "follow_formatting": True,
+                },
+            ),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response["Content-Type"], "application/zip")
+
+        with zipfile.ZipFile(io.BytesIO(response.content)) as zipf:
+            self.assertEqual(len(zipf.filelist), 2)
+            self.assertIn("a space name/Title 2 - Doc 3.jpg", zipf.namelist())
+            self.assertIn("test/This is Doc 2.pdf", zipf.namelist())
+
+            with self.doc2.source_file as f:
+                self.assertEqual(f.read(), zipf.read("test/This is Doc 2.pdf"))
+
+            with self.doc3.source_file as f:
+                self.assertEqual(
+                    f.read(),
+                    zipf.read("a space name/Title 2 - Doc 3.jpg"),
+                )
+
+    @override_settings(FILENAME_FORMAT="somewhere/{title}")
+    def test_formatted_download_archive(self):
+
+        self.doc2.title = "This is Doc 2"
+        self.doc2.save()
+
+        self.doc3.title = "Title 2 - Doc 3"
+        self.doc3.save()
+        print(self.doc3.archive_path)
+        print(self.doc3.archive_filename)
+
+        response = self.client.post(
+            self.ENDPOINT,
+            json.dumps(
+                {
+                    "documents": [self.doc2.id, self.doc3.id],
+                    "follow_formatting": True,
+                },
+            ),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response["Content-Type"], "application/zip")
+
+        with zipfile.ZipFile(io.BytesIO(response.content)) as zipf:
+            self.assertEqual(len(zipf.filelist), 2)
+            self.assertIn("somewhere/This is Doc 2.pdf", zipf.namelist())
+            self.assertIn("somewhere/Title 2 - Doc 3.pdf", zipf.namelist())
+
+            with self.doc2.source_file as f:
+                self.assertEqual(f.read(), zipf.read("somewhere/This is Doc 2.pdf"))
+
+            with self.doc3.archive_file as f:
+                self.assertEqual(f.read(), zipf.read("somewhere/Title 2 - Doc 3.pdf"))
+
+    @override_settings(FILENAME_FORMAT="{document_type}/{title}")
+    def test_formatted_download_both(self):
+
+        dc1 = DocumentType.objects.create(name="bill")
+        dc2 = DocumentType.objects.create(name="statement")
+
+        self.doc2.document_type = dc1
+        self.doc2.title = "This is Doc 2"
+        self.doc2.save()
+
+        self.doc3.document_type = dc2
+        self.doc3.title = "Title 2 - Doc 3"
+        self.doc3.save()
+
+        response = self.client.post(
+            self.ENDPOINT,
+            json.dumps(
+                {
+                    "documents": [self.doc2.id, self.doc3.id],
+                    "content": "both",
+                    "follow_formatting": True,
+                },
+            ),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response["Content-Type"], "application/zip")
+
+        with zipfile.ZipFile(io.BytesIO(response.content)) as zipf:
+            self.assertEqual(len(zipf.filelist), 3)
+            self.assertIn("originals/bill/This is Doc 2.pdf", zipf.namelist())
+            self.assertIn("archive/statement/Title 2 - Doc 3.pdf", zipf.namelist())
+            self.assertIn("originals/statement/Title 2 - Doc 3.jpg", zipf.namelist())
+
+            with self.doc2.source_file as f:
+                self.assertEqual(
+                    f.read(),
+                    zipf.read("originals/bill/This is Doc 2.pdf"),
+                )
+
+            with self.doc3.archive_file as f:
+                self.assertEqual(
+                    f.read(),
+                    zipf.read("archive/statement/Title 2 - Doc 3.pdf"),
+                )
+
+            with self.doc3.source_file as f:
+                self.assertEqual(
+                    f.read(),
+                    zipf.read("originals/statement/Title 2 - Doc 3.jpg"),
+                )
+
 
 class TestApiAuth(DirectoriesMixin, APITestCase):
     def test_auth_required(self):
index 10225be6fe6b7b958c44b25f783421032851e997..ce82cbfaaf6d3d20d22cd4463d565e012e4baa4c 100644 (file)
@@ -745,6 +745,7 @@ class BulkDownloadView(GenericAPIView):
         ids = serializer.validated_data.get("documents")
         compression = serializer.validated_data.get("compression")
         content = serializer.validated_data.get("content")
+        follow_filename_format = serializer.validated_data.get("follow_formatting")
 
         os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
         temp = tempfile.NamedTemporaryFile(
@@ -761,7 +762,7 @@ class BulkDownloadView(GenericAPIView):
             strategy_class = ArchiveOnlyStrategy
 
         with zipfile.ZipFile(temp.name, "w", compression) as zipf:
-            strategy = strategy_class(zipf)
+            strategy = strategy_class(zipf, follow_filename_format)
             for id in ids:
                 doc = Document.objects.get(id=id)
                 strategy.add_document(doc)