]> git.ipfire.org Git - thirdparty/paperless-ngx.git/commitdiff
Performance fix: use subqueries to improve object retrieval in large installs (#11950)
authorshamoon <4887959+shamoon@users.noreply.github.com>
Thu, 5 Feb 2026 16:46:32 +0000 (08:46 -0800)
committerGitHub <noreply@github.com>
Thu, 5 Feb 2026 16:46:32 +0000 (08:46 -0800)
src/documents/permissions.py
src/documents/serialisers.py
src/documents/views.py

index ac6d3f9cae3239d3c48772013eaaba36e5cfa534..813136a3dd78dbe47dec47145231179711643fe3 100644 (file)
@@ -2,10 +2,17 @@ from django.contrib.auth.models import Group
 from django.contrib.auth.models import Permission
 from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
+from django.db.models import Count
+from django.db.models import IntegerField
+from django.db.models import OuterRef
 from django.db.models import Q
 from django.db.models import QuerySet
+from django.db.models import Subquery
+from django.db.models.functions import Cast
+from django.db.models.functions import Coalesce
 from guardian.core import ObjectPermissionChecker
 from guardian.models import GroupObjectPermission
+from guardian.models import UserObjectPermission
 from guardian.shortcuts import assign_perm
 from guardian.shortcuts import get_objects_for_user
 from guardian.shortcuts import get_users_with_perms
@@ -129,23 +136,96 @@ def set_permissions_for_object(permissions: dict, object, *, merge: bool = False
                         )
 
 
+def _permitted_document_ids(user):
+    """
+    Return a queryset of document IDs the user may view, limited to non-deleted
+    documents. This intentionally avoids ``get_objects_for_user`` to keep the
+    subquery small and index-friendly.
+    """
+
+    base_docs = Document.objects.filter(deleted_at__isnull=True).only("id", "owner")
+
+    if user is None or not getattr(user, "is_authenticated", False):
+        # Just Anonymous user e.g. for drf-spectacular
+        return base_docs.filter(owner__isnull=True).values_list("id", flat=True)
+
+    if getattr(user, "is_superuser", False):
+        return base_docs.values_list("id", flat=True)
+
+    document_ct = ContentType.objects.get_for_model(Document)
+    perm_filter = {
+        "permission__codename": "view_document",
+        "permission__content_type": document_ct,
+    }
+
+    user_perm_docs = (
+        UserObjectPermission.objects.filter(user=user, **perm_filter)
+        .annotate(object_pk_int=Cast("object_pk", IntegerField()))
+        .values_list("object_pk_int", flat=True)
+    )
+
+    group_perm_docs = (
+        GroupObjectPermission.objects.filter(group__user=user, **perm_filter)
+        .annotate(object_pk_int=Cast("object_pk", IntegerField()))
+        .values_list("object_pk_int", flat=True)
+    )
+
+    permitted_documents = user_perm_docs.union(group_perm_docs)
+
+    return base_docs.filter(
+        Q(owner=user) | Q(owner__isnull=True) | Q(id__in=permitted_documents),
+    ).values_list("id", flat=True)
+
+
 def get_document_count_filter_for_user(user):
     """
     Return the Q object used to filter document counts for the given user.
+
+    The filter is expressed as an ``id__in`` against a small subquery of permitted
+    document IDs to keep the generated SQL simple and avoid large OR clauses.
     """
 
-    if user is None or not getattr(user, "is_authenticated", False):
-        return Q(documents__deleted_at__isnull=True, documents__owner__isnull=True)
     if getattr(user, "is_superuser", False):
+        # Superuser: no permission filtering needed
         return Q(documents__deleted_at__isnull=True)
-    return Q(
-        documents__deleted_at__isnull=True,
-        documents__id__in=get_objects_for_user_owner_aware(
-            user,
-            "documents.view_document",
-            Document,
-        ).values_list("id", flat=True),
+
+    permitted_ids = _permitted_document_ids(user)
+    return Q(documents__id__in=permitted_ids)
+
+
+def annotate_document_count_for_related_queryset(
+    queryset,
+    through_model,
+    related_object_field: str,
+    target_field: str = "document_id",
+    user=None,
+):
+    """
+    Annotate a queryset with permissions-aware document counts using a subquery
+    against a relation table.
+
+    Args:
+        queryset: base queryset to annotate (must contain pk)
+        through_model: model representing the relation (e.g., Document.tags.through
+                       or CustomFieldInstance)
+        source_field: field on the relation pointing back to queryset pk
+        target_field: field on the relation pointing to Document id
+        user: the user for whom to filter permitted document ids
+    """
+
+    permitted_ids = _permitted_document_ids(user)
+    counts = (
+        through_model.objects.filter(
+            **{
+                related_object_field: OuterRef("pk"),
+                f"{target_field}__in": permitted_ids,
+            },
+        )
+        .values(related_object_field)
+        .annotate(c=Count(target_field))
+        .values("c")
     )
+    return queryset.annotate(document_count=Coalesce(Subquery(counts[:1]), 0))
 
 
 def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
index 75e73d8787db6e28ec848c935129db3e66123f39..a7d852fb8b61b77b647866f9e47422385cb04a78 100644 (file)
@@ -713,6 +713,9 @@ class StoragePathField(serializers.PrimaryKeyRelatedField):
 
 class CustomFieldSerializer(serializers.ModelSerializer):
     def __init__(self, *args, **kwargs):
+        # Ignore args passed by permissions mixin
+        kwargs.pop("user", None)
+        kwargs.pop("full_perms", None)
         context = kwargs.get("context")
         self.api_version = int(
             context.get("request").version
index 5a0f83699b169040f18ab5323eb70b1bd5e33ddd..babc4e9aacbd144beef42cec7880d3c6a52f91fb 100644 (file)
@@ -32,7 +32,6 @@ from django.db.models import Count
 from django.db.models import IntegerField
 from django.db.models import Max
 from django.db.models import Model
-from django.db.models import Q
 from django.db.models import Sum
 from django.db.models import When
 from django.db.models.functions import Length
@@ -128,6 +127,7 @@ from documents.matching import match_storage_paths
 from documents.matching import match_tags
 from documents.models import Correspondent
 from documents.models import CustomField
+from documents.models import CustomFieldInstance
 from documents.models import Document
 from documents.models import DocumentType
 from documents.models import Note
@@ -147,6 +147,7 @@ from documents.permissions import PaperlessAdminPermissions
 from documents.permissions import PaperlessNotePermissions
 from documents.permissions import PaperlessObjectPermissions
 from documents.permissions import ViewDocumentsPermissions
+from documents.permissions import annotate_document_count_for_related_queryset
 from documents.permissions import get_document_count_filter_for_user
 from documents.permissions import get_objects_for_user_owner_aware
 from documents.permissions import has_perms_owner_aware
@@ -370,22 +371,37 @@ class PermissionsAwareDocumentCountMixin(BulkPermissionMixin, PassUserMixin):
     Mixin to add document count to queryset, permissions-aware if needed
     """
 
+    # Default is simple relation path, override for through-table/count specialization.
+    document_count_through = None
+    document_count_source_field = None
+
     def get_document_count_filter(self):
         request = getattr(self, "request", None)
         user = getattr(request, "user", None) if request else None
         return get_document_count_filter_for_user(user)
 
     def get_queryset(self):
+        base_qs = super().get_queryset()
+
+        # Use optimized through-table counting when configured.
+        if self.document_count_through:
+            user = getattr(getattr(self, "request", None), "user", None)
+            return annotate_document_count_for_related_queryset(
+                base_qs,
+                through_model=self.document_count_through,
+                related_object_field=self.document_count_source_field,
+                user=user,
+            )
+
+        # Fallback: simple Count on relation with permission filter.
         filter = self.get_document_count_filter()
-        return (
-            super()
-            .get_queryset()
-            .annotate(document_count=Count("documents", filter=filter))
+        return base_qs.annotate(
+            document_count=Count("documents", filter=filter),
         )
 
 
 @extend_schema_view(**generate_object_with_permissions_schema(CorrespondentSerializer))
-class CorrespondentViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
+class CorrespondentViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
     model = Correspondent
 
     queryset = Correspondent.objects.select_related("owner").order_by(Lower("name"))
@@ -422,8 +438,10 @@ class CorrespondentViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
 
 
 @extend_schema_view(**generate_object_with_permissions_schema(TagSerializer))
-class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
+class TagViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
     model = Tag
+    document_count_through = Document.tags.through
+    document_count_source_field = "tag_id"
 
     queryset = Tag.objects.select_related("owner").order_by(
         Lower("name"),
@@ -466,12 +484,16 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
         descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()}
 
         if descendant_pks:
-            filter_q = self.get_document_count_filter()
+            user = getattr(getattr(self, "request", None), "user", None)
             children_source = list(
-                Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
-                .select_related("owner")
-                .annotate(document_count=Count("documents", filter=filter_q))
-                .order_by(*ordering),
+                annotate_document_count_for_related_queryset(
+                    Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
+                    .select_related("owner")
+                    .order_by(*ordering),
+                    through_model=self.document_count_through,
+                    related_object_field=self.document_count_source_field,
+                    user=user,
+                ),
             )
         else:
             children_source = all_tags
@@ -498,7 +520,7 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
 
 
 @extend_schema_view(**generate_object_with_permissions_schema(DocumentTypeSerializer))
-class DocumentTypeViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
+class DocumentTypeViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
     model = DocumentType
 
     queryset = DocumentType.objects.select_related("owner").order_by(Lower("name"))
@@ -2344,7 +2366,7 @@ class BulkDownloadView(GenericAPIView):
 
 
 @extend_schema_view(**generate_object_with_permissions_schema(StoragePathSerializer))
-class StoragePathViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
+class StoragePathViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
     model = StoragePath
 
     queryset = StoragePath.objects.select_related("owner").order_by(
@@ -2861,7 +2883,7 @@ class WorkflowViewSet(ModelViewSet):
     )
 
 
-class CustomFieldViewSet(ModelViewSet):
+class CustomFieldViewSet(PermissionsAwareDocumentCountMixin, ModelViewSet):
     permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
 
     serializer_class = CustomFieldSerializer
@@ -2873,35 +2895,11 @@ class CustomFieldViewSet(ModelViewSet):
     filterset_class = CustomFieldFilterSet
 
     model = CustomField
+    document_count_through = CustomFieldInstance
+    document_count_source_field = "field_id"
 
     queryset = CustomField.objects.all().order_by("-created")
 
-    def get_queryset(self):
-        filter = (
-            Q(fields__document__deleted_at__isnull=True)
-            if self.request.user is None or self.request.user.is_superuser
-            else (
-                Q(
-                    fields__document__deleted_at__isnull=True,
-                    fields__document__id__in=get_objects_for_user_owner_aware(
-                        self.request.user,
-                        "documents.view_document",
-                        Document,
-                    ).values_list("id", flat=True),
-                )
-            )
-        )
-        return (
-            super()
-            .get_queryset()
-            .annotate(
-                document_count=Count(
-                    "fields",
-                    filter=filter,
-                ),
-            )
-        )
-
 
 @extend_schema_view(
     get=extend_schema(