]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Initial task for building
authorshamoon <4887959+shamoon@users.noreply.github.com>
Wed, 5 Nov 2025 04:55:12 +0000 (20:55 -0800)
committershamoon <4887959+shamoon@users.noreply.github.com>
Wed, 5 Nov 2025 04:55:12 +0000 (20:55 -0800)
src-ui/src/app/data/share-bundle.ts
src/documents/migrations/1075_sharebundle.py
src/documents/models.py
src/documents/serialisers.py
src/documents/tasks.py
src/documents/views.py
src/paperless/settings.py

index 4eb6f5744cd93103b963460390d0ba20e0e905d7..3c63077302a2c1be01e129483bffd92229eb7918 100644 (file)
@@ -16,6 +16,7 @@ export interface ShareBundleSummary {
   document_count: number
   file_version: FileVersion
   status: ShareBundleStatus
+  built_at?: string
   size_bytes?: number
   last_error?: string
 }
index 9a661120505ccf99185d568366c2d978a5f89135..a64ad29d328ccb01ada139f1dfff1ef371528f2c 100644 (file)
@@ -128,6 +128,22 @@ class Migration(migrations.Migration):
                         verbose_name="last error",
                     ),
                 ),
+                (
+                    "file_path",
+                    models.CharField(
+                        blank=True,
+                        max_length=512,
+                        verbose_name="file path",
+                    ),
+                ),
+                (
+                    "built_at",
+                    models.DateTimeField(
+                        blank=True,
+                        null=True,
+                        verbose_name="built at",
+                    ),
+                ),
                 (
                     "owner",
                     models.ForeignKey(
index ec5fdc7312856d783226078d165bbfde7b594390..0bd4aedd84a0ef1a794633f7781c2ee78dfdf5d6 100644 (file)
@@ -844,6 +844,18 @@ class ShareBundle(SoftDeleteModel):
         blank=True,
     )
 
+    file_path = models.CharField(
+        _("file path"),
+        max_length=512,
+        blank=True,
+    )
+
+    built_at = models.DateTimeField(
+        _("built at"),
+        null=True,
+        blank=True,
+    )
+
     documents = models.ManyToManyField(
         "documents.Document",
         related_name="share_bundles",
@@ -853,6 +865,31 @@ class ShareBundle(SoftDeleteModel):
     def __str__(self):
         return _("Share bundle %(slug)s") % {"slug": self.slug}
 
+    @property
+    def absolute_file_path(self) -> Path | None:
+        if not self.file_path:
+            return None
+        file_path = Path(self.file_path)
+        if not file_path.is_absolute():
+            file_path = (settings.MEDIA_ROOT / file_path).resolve()
+        return file_path
+
+    def remove_file(self):
+        path = self.absolute_file_path
+        if path and path.exists():
+            try:
+                path.unlink()
+            except OSError:
+                pass
+
+    def delete(self, using=None, *, keep_parents=False):
+        self.remove_file()
+        return super().delete(using=using, keep_parents=keep_parents)
+
+    def hard_delete(self, using=None, *, keep_parents=False):
+        self.remove_file()
+        return super().hard_delete(using=using, keep_parents=keep_parents)
+
 
 class CustomField(models.Model):
     """
index 6fa2c6817c610c3f8746148af42dab306ea86914..5fd3dd0c5666171d4595979b5a9da0fb509c39d0 100644 (file)
@@ -2160,6 +2160,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
             "status",
             "size_bytes",
             "last_error",
+            "built_at",
             "documents",
             "document_ids",
             "document_count",
@@ -2172,6 +2173,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
             "status",
             "size_bytes",
             "last_error",
+            "built_at",
             "documents",
             "document_count",
         )
@@ -2223,10 +2225,14 @@ class ShareBundleSerializer(OwnedObjectSerializer):
 
         ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids]
         share_bundle.documents.set(ordered_documents)
+        share_bundle.document_total = len(ordered_documents)
 
         return share_bundle
 
     def get_document_count(self, obj: ShareBundle) -> int:
+        count = getattr(obj, "document_total", None)
+        if count is not None:
+            return count
         return obj.documents.count()
 
 
index 17bfce3b066c845b3df81037380ea460bdc0d145..2a942f4b95c0c692a0c2db316b2e134aac648ea6 100644 (file)
@@ -3,7 +3,9 @@ import hashlib
 import logging
 import shutil
 import uuid
+import zipfile
 from pathlib import Path
+from tempfile import NamedTemporaryFile
 from tempfile import TemporaryDirectory
 
 import tqdm
@@ -22,6 +24,8 @@ from whoosh.writing import AsyncWriter
 from documents import index
 from documents import sanity_checker
 from documents.barcodes import BarcodePlugin
+from documents.bulk_download import ArchiveOnlyStrategy
+from documents.bulk_download import OriginalsOnlyStrategy
 from documents.caching import clear_document_caches
 from documents.classifier import DocumentClassifier
 from documents.classifier import load_classifier
@@ -39,6 +43,8 @@ from documents.models import CustomFieldInstance
 from documents.models import Document
 from documents.models import DocumentType
 from documents.models import PaperlessTask
+from documents.models import ShareBundle
+from documents.models import ShareLink
 from documents.models import StoragePath
 from documents.models import Tag
 from documents.models import Workflow
@@ -563,3 +569,92 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
 
     if affected:
         bulk_update_documents.delay(document_ids=list(affected))
+
+
+@shared_task
+def build_share_bundle(bundle_id: int):
+    try:
+        bundle = (
+            ShareBundle.objects.filter(pk=bundle_id).prefetch_related("documents").get()
+        )
+    except ShareBundle.DoesNotExist:
+        logger.warning("Share bundle %s no longer exists.", bundle_id)
+        return
+
+    bundle.remove_file()
+    bundle.status = ShareBundle.Status.PROCESSING
+    bundle.last_error = ""
+    bundle.size_bytes = None
+    bundle.built_at = None
+    bundle.file_path = ""
+    bundle.save(
+        update_fields=[
+            "status",
+            "last_error",
+            "size_bytes",
+            "built_at",
+            "file_path",
+        ],
+    )
+
+    documents = list(bundle.documents.all().order_by("pk"))
+
+    with NamedTemporaryFile(
+        dir=settings.SCRATCH_DIR,
+        suffix=".zip",
+        delete=False,
+    ) as temp_zip:
+        temp_zip_path = Path(temp_zip.name)
+
+    try:
+        strategy_class = (
+            ArchiveOnlyStrategy
+            if bundle.file_version == ShareLink.FileVersion.ARCHIVE
+            else OriginalsOnlyStrategy
+        )
+        with zipfile.ZipFile(temp_zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
+            strategy = strategy_class(zipf)
+            for document in documents:
+                strategy.add_document(document)
+
+        output_dir = settings.SHARE_BUNDLE_DIR
+        output_dir.mkdir(parents=True, exist_ok=True)
+        final_path = (output_dir / f"{bundle.slug}.zip").resolve()
+        if final_path.exists():
+            final_path.unlink()
+        shutil.move(str(temp_zip_path), final_path)
+
+        try:
+            bundle.file_path = str(final_path.relative_to(settings.MEDIA_ROOT))
+        except ValueError:
+            bundle.file_path = str(final_path)
+        bundle.size_bytes = final_path.stat().st_size
+        bundle.status = ShareBundle.Status.READY
+        bundle.built_at = timezone.now()
+        bundle.last_error = ""
+        bundle.save(
+            update_fields=[
+                "file_path",
+                "size_bytes",
+                "status",
+                "built_at",
+                "last_error",
+            ],
+        )
+        logger.info("Built share bundle %s", bundle.pk)
+    except Exception as exc:
+        logger.exception("Failed to build share bundle %s: %s", bundle_id, exc)
+        bundle.status = ShareBundle.Status.FAILED
+        bundle.last_error = str(exc)
+        bundle.save(update_fields=["status", "last_error"])
+        try:
+            temp_zip_path.unlink()
+        except OSError:
+            pass
+        raise
+    finally:
+        if temp_zip_path.exists():
+            try:
+                temp_zip_path.unlink()
+            except OSError:
+                pass
index f305feef61e5fae95c851d604d2241b443857a2f..5137f8cc3fbd3b667bac82360d5eeea5d440f519 100644 (file)
@@ -182,6 +182,7 @@ from documents.serialisers import WorkflowActionSerializer
 from documents.serialisers import WorkflowSerializer
 from documents.serialisers import WorkflowTriggerSerializer
 from documents.signals import document_updated
+from documents.tasks import build_share_bundle
 from documents.tasks import consume_file
 from documents.tasks import empty_trash
 from documents.tasks import index_optimize
@@ -2618,7 +2619,12 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
     ordering_fields = ("created", "expiration", "status")
 
     def get_queryset(self):
-        return super().get_queryset().prefetch_related("documents")
+        return (
+            super()
+            .get_queryset()
+            .prefetch_related("documents")
+            .annotate(document_total=Count("documents", distinct=True))
+        )
 
     def create(self, request, *args, **kwargs):
         serializer = self.get_serializer(data=request.data)
@@ -2651,17 +2657,68 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
                     },
                 )
 
-        serializer.save(
+        document_map = {document.pk: document for document in documents}
+        ordered_documents = [document_map[doc_id] for doc_id in document_ids]
+
+        bundle = serializer.save(
             owner=request.user,
-            documents=documents,
+            documents=ordered_documents,
+        )
+        bundle.remove_file()
+        bundle.status = ShareBundle.Status.PENDING
+        bundle.last_error = ""
+        bundle.size_bytes = None
+        bundle.built_at = None
+        bundle.file_path = ""
+        bundle.save(
+            update_fields=[
+                "status",
+                "last_error",
+                "size_bytes",
+                "built_at",
+                "file_path",
+            ],
         )
-        headers = self.get_success_headers(serializer.data)
+        build_share_bundle.delay(bundle.pk)
+        bundle.document_total = len(ordered_documents)
+        response_serializer = self.get_serializer(bundle)
+        headers = self.get_success_headers(response_serializer.data)
         return Response(
-            serializer.data,
+            response_serializer.data,
             status=status.HTTP_201_CREATED,
             headers=headers,
         )
 
+    @action(detail=True, methods=["post"])
+    def rebuild(self, request, pk=None):
+        bundle = self.get_object()
+        if bundle.status == ShareBundle.Status.PROCESSING:
+            return Response(
+                {"detail": _("Bundle is already being processed.")},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+        bundle.remove_file()
+        bundle.status = ShareBundle.Status.PENDING
+        bundle.last_error = ""
+        bundle.size_bytes = None
+        bundle.built_at = None
+        bundle.file_path = ""
+        bundle.save(
+            update_fields=[
+                "status",
+                "last_error",
+                "size_bytes",
+                "built_at",
+                "file_path",
+            ],
+        )
+        build_share_bundle.delay(bundle.pk)
+        bundle.document_total = (
+            getattr(bundle, "document_total", None) or bundle.documents.count()
+        )
+        serializer = self.get_serializer(bundle)
+        return Response(serializer.data)
+
 
 class SharedLinkView(View):
     authentication_classes = []
@@ -2669,15 +2726,103 @@ class SharedLinkView(View):
 
     def get(self, request, slug):
         share_link = ShareLink.objects.filter(slug=slug).first()
-        if share_link is None:
+        if share_link is not None:
+            if (
+                share_link.expiration is not None
+                and share_link.expiration < timezone.now()
+            ):
+                return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
+            return serve_file(
+                doc=share_link.document,
+                use_archive=share_link.file_version == "archive",
+                disposition="inline",
+            )
+
+        share_bundle = ShareBundle.objects.filter(slug=slug).first()
+        if share_bundle is None:
             return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1")
-        if share_link.expiration is not None and share_link.expiration < timezone.now():
+
+        if (
+            share_bundle.expiration is not None
+            and share_bundle.expiration < timezone.now()
+        ):
             return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
-        return serve_file(
-            doc=share_link.document,
-            use_archive=share_link.file_version == "archive",
-            disposition="inline",
+
+        if share_bundle.status in {
+            ShareBundle.Status.PENDING,
+            ShareBundle.Status.PROCESSING,
+        }:
+            return HttpResponse(
+                _(
+                    "The shared bundle is still being prepared. Please try again later.",
+                ),
+                status=status.HTTP_202_ACCEPTED,
+            )
+
+        if share_bundle.status == ShareBundle.Status.FAILED:
+            share_bundle.remove_file()
+            share_bundle.status = ShareBundle.Status.PENDING
+            share_bundle.last_error = ""
+            share_bundle.size_bytes = None
+            share_bundle.built_at = None
+            share_bundle.file_path = ""
+            share_bundle.save(
+                update_fields=[
+                    "status",
+                    "last_error",
+                    "size_bytes",
+                    "built_at",
+                    "file_path",
+                ],
+            )
+            build_share_bundle.delay(share_bundle.pk)
+            return HttpResponse(
+                _(
+                    "The shared bundle is temporarily unavailable. A rebuild has been scheduled. Please try again later.",
+                ),
+                status=status.HTTP_503_SERVICE_UNAVAILABLE,
+            )
+
+        file_path = share_bundle.absolute_file_path
+        if file_path is None or not file_path.exists():
+            share_bundle.status = ShareBundle.Status.PENDING
+            share_bundle.last_error = ""
+            share_bundle.size_bytes = None
+            share_bundle.built_at = None
+            share_bundle.file_path = ""
+            share_bundle.save(
+                update_fields=[
+                    "status",
+                    "last_error",
+                    "size_bytes",
+                    "built_at",
+                    "file_path",
+                ],
+            )
+            build_share_bundle.delay(share_bundle.pk)
+            return HttpResponse(
+                _(
+                    "The shared bundle is being prepared. Please try again later.",
+                ),
+                status=status.HTTP_202_ACCEPTED,
+            )
+
+        response = FileResponse(file_path.open("rb"), content_type="application/zip")
+        download_name = f"paperless-share-{share_bundle.slug}.zip"
+        filename_normalized = (
+            normalize("NFKD", download_name)
+            .encode(
+                "ascii",
+                "ignore",
+            )
+            .decode("ascii")
         )
+        filename_encoded = quote(download_name)
+        response["Content-Disposition"] = (
+            f"attachment; filename='{filename_normalized}'; "
+            f"filename*=utf-8''{filename_encoded}"
+        )
+        return response
 
 
 def serve_file(*, doc: Document, use_archive: bool, disposition: str):
index 97d0ca06f30aaf51bd1429f6445da4d4c6665148..da75508d24631684a40be2ebb06d32c1cc35b8e5 100644 (file)
@@ -268,6 +268,7 @@ MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
 ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
 ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
 THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
+SHARE_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_bundles"
 
 DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")