Perf/conditional unique selectinload

author Oliver Parker <oliver.parker@gridedge.co.uk>

Wed, 3 Jun 2026 06:51:13 +0000 (02:51 -0400)

committer Mike Bayer <mike_mp@zzzcomputing.com>

Wed, 3 Jun 2026 17:49:17 +0000 (13:49 -0400)
author Oliver Parker <oliver.parker@gridedge.co.uk>
Wed, 3 Jun 2026 06:51:13 +0000 (02:51 -0400)
committer Mike Bayer <mike_mp@zzzcomputing.com>
Wed, 3 Jun 2026 17:49:17 +0000 (13:49 -0400)
diff --git a/doc/build/changelog/unreleased_21/13339.rst b/doc/build/changelog/unreleased_21/13339.rst

new file mode 100644 (file)

index 0000000..43dbbfe
--- /dev/null
+++ b/doc/build/changelog/unreleased_21/13339.rst
@@ -0,0 +1,14 @@
+.. change::
+    :tags: usecase, orm, performance
+    :tickets: 13339
+
+    Optimized :func:`_orm.selectinload` to skip the ``.unique()`` call on inner
+    result sets when no nested :func:`_orm.joinedload` on a collection is
+    present.  The uniquing pass is only required when a joined eager load
+    inflates rows due to a one-to-many or many-to-many JOIN; in the common case
+    of a leaf selectin load, rows are already unique by construction and the
+    per-row hashing overhead can be avoided. As a side effect, ``yield_per``
+    set in a ``do_orm_execute`` event for a :func:`_orm.selectinload`
+    relationship load no longer raises ``InvalidRequestError`` when no nested
+    collection joinedload is in effect, since ``.unique()`` is no longer called
+    in that path. Pull request courtesy Oliver Parker.
diff --git a/lib/sqlalchemy/engine/result.py b/lib/sqlalchemy/engine/result.py

index 280449f7e6f0d85fe0630d35faf8d100dced7083..d5f31f8e707198296171e71de4f9e40d97778354 100644 (file)
--- a/lib/sqlalchemy/engine/result.py
+++ b/lib/sqlalchemy/engine/result.py
@@ -1826,11 +1826,13 @@ class IteratorResult(Result[Unpack[_Ts]]):
          iterator: Iterator[_InterimSupportsScalarsRowType],
          raw: Optional[Result[Any]] = None,
          _source_supports_scalars: bool = False,
+        context: Optional[Any] = None,
      ):
          self._metadata = cursor_metadata
          self.iterator = iterator
          self.raw = raw
          self._source_supports_scalars = _source_supports_scalars
+        self.context = context
  
      @property
      def closed(self) -> bool:
@@ -1933,6 +1935,7 @@ class ChunkedIteratorResult(IteratorResult[Unpack[_Ts]]):
          source_supports_scalars: bool = False,
          raw: Optional[Result[Any]] = None,
          dynamic_yield_per: bool = False,
+        context: Optional[Any] = None,
      ):
          self._metadata = cursor_metadata
          self.chunks = chunks
@@ -1940,6 +1943,7 @@ class ChunkedIteratorResult(IteratorResult[Unpack[_Ts]]):
          self.raw = raw
          self.iterator = itertools.chain.from_iterable(self.chunks(None))
          self.dynamic_yield_per = dynamic_yield_per
+        self.context = context
  
      @_generative
      def yield_per(self, num: int) -> Self:
diff --git a/lib/sqlalchemy/orm/context.py b/lib/sqlalchemy/orm/context.py

index d09956537d72175c7376039a341e492d22c217e4..ab6b7b31176a667af05180d6c504f74c871ec66e 100644 (file)
--- a/lib/sqlalchemy/orm/context.py
+++ b/lib/sqlalchemy/orm/context.py
@@ -133,6 +133,10 @@ class QueryContext:
      post_load_paths: Dict[PathRegistry, _PostLoad]
      compile_state: _ORMCompileState
  
+    @property
+    def requires_uniquing(self) -> bool:
+        return bool(self.compile_state.multi_row_eager_loaders)
+
      class default_load_options(Options):
          _only_return_tuples = False
          _populate_existing = False
diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py

index 2b1ff19acb150ca4f61feb09b7bf818206953326..6bae934508096050a5176b6f278bc402c2a13934 100644 (file)
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -281,6 +281,7 @@ def instances(
          source_supports_scalars=single_entity,
          raw=cursor,
          dynamic_yield_per=cursor.context._is_server_side,
+        context=context,
      )
  
      # filtered and single_entity are used to indicate to legacy Query that the
@@ -291,7 +292,7 @@ def instances(
      )
  
      # multi_row_eager_loaders OTOH is specific to joinedload.
-    if context.compile_state.multi_row_eager_loaders:
+    if context.requires_uniquing:
  
          def require_unique(obj):
              raise sa_exc.InvalidRequestError(
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py

index 3bfb511a11ba3e985ba5555c75e582bb95ce7127..dae48d2e6d2d8db1fe0980a12b14161ce5e81934 100644 (file)
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -3416,19 +3416,18 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
          while our_keys:
              chunk = our_keys[0:chunksize]
              our_keys = our_keys[chunksize:]
-            data = {
-                k: v
-                for k, v in context.session.execute(
-                    q,
-                    params={
-                        "primary_keys": [
-                            key[0] if query_info.zero_idx else key
-                            for key in chunk
-                        ]
-                    },
-                    execution_options=execution_options,
-                ).unique()
-            }
+            result = context.session.execute(
+                q,
+                params={
+                    "primary_keys": [
+                        key[0] if query_info.zero_idx else key for key in chunk
+                    ]
+                },
+                execution_options=execution_options,
+            )
+            if result.context is not None and result.context.requires_uniquing:
+                result = result.unique()
+            data = {k: v for k, v in result}
  
              for key in chunk:
                  # for a real foreign key and no concurrent changes to the
@@ -3470,15 +3469,15 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
                  for key, state, state_dict, overwrite in chunk
              ]
  
+            result = context.session.execute(
+                q,
+                params={"primary_keys": primary_keys},
+                execution_options=execution_options,
+            )
+            if result.context is not None and result.context.requires_uniquing:
+                result = result.unique()
              data = collections.defaultdict(list)
-            for k, v in itertools.groupby(
-                context.session.execute(
-                    q,
-                    params={"primary_keys": primary_keys},
-                    execution_options=execution_options,
-                ).unique(),
-                lambda x: x[0],
-            ):
+            for k, v in itertools.groupby(result, lambda x: x[0]):
                  data[k].extend(vv[1] for vv in v)
  
              for key, state, state_dict, overwrite in chunk:
diff --git a/test/orm/test_events.py b/test/orm/test_events.py

index 11756e365d44e0550249f383f73e5221957ee3dc..b39d3f533c0e9c5bb6733e87b962c998681e4c5e 100644 (file)
--- a/test/orm/test_events.py
+++ b/test/orm/test_events.py
@@ -528,8 +528,14 @@ class ORMExecuteTest(
              "autoflush": True,
              "is_post_load": True,
          }
+        # yield_per conflicts with unique() inside the loader.  immediateload
+        # calls unique() unconditionally; selectinload only calls unique() when
+        # a nested joinedload on a collection inflates rows, so without one it
+        # no longer conflicts.
          operation_should_fail = (
-            event_style.set_in_event_for_relationship and option_type.yield_per
+            event_style.set_in_event_for_relationship
+            and option_type.yield_per
+            and loader_opt is immediateload
          )
  
          if event_style.noop_event:
@@ -544,8 +550,7 @@ class ORMExecuteTest(
              # event where we actually set these options for relationship
              # loaders.  assert that what we do here does in fact get
              # to those loaders.   the yield_per setting will fail for
-            # immediateload and selectinload because they both use unique()
-            # on the result.
+            # immediateload because it calls unique() unconditionally.
              def go(context):
                  if context.is_relationship_load:
                      if option_type.yield_per:
diff --git a/test/orm/test_selectin_relations.py b/test/orm/test_selectin_relations.py

index ba1d7e4f5676964ce4b36a1c8e70149ad70b12b2..af6f8fe5cf35454ca1ce2b7760a28dc76642beaf 100644 (file)
--- a/test/orm/test_selectin_relations.py
+++ b/test/orm/test_selectin_relations.py
@@ -4227,6 +4227,42 @@ class M2MOmitJoinTest(
                  [3],
              )
  
+    def test_m2m_selectin_no_duplicate_children(self, simple_m2m, connection):
+        """selectinload over m2m (omit_join fast path) must load the correct
+        collection members and must not produce duplicate items.
+
+        The simple_m2m fixture has a1.bs=[b1,b2] and a2.bs=[b1].
+        """
+        A = simple_m2m
+
+        with Session(connection) as session:
+
+            def go():
+                statement = (
+                    select(A).options(selectinload(A.bs)).order_by(A.id)
+                )
+                return session.execute(statement).scalars().all()
+
+            results = self.assert_sql_execution(
+                connection,
+                go,
+                CompiledSQL("SELECT a.id FROM a ORDER BY a.id", {}),
+                CompiledSQL(
+                    "SELECT a_b.a_id, b.id "
+                    "FROM a_b JOIN b ON b.id = a_b.b_id "
+                    "WHERE a_b.a_id IN "
+                    "(__[POSTCOMPILE_primary_keys])",
+                    {"primary_keys": [1, 2]},
+                ),
+            )
+
+        eq_(sorted(b.id for b in results[0].bs), [1, 2])
+        eq_(sorted(b.id for b in results[1].bs), [1])
+
+        for a in results:
+            b_ids = [b.id for b in a.bs]
+            eq_(len(b_ids), len(set(b_ids)))
+
  
  class SameNamePolymorphicTest(fixtures.DeclarativeMappedTest):
      @classmethod
@@ -4412,6 +4448,150 @@ class TestBakedCancelsCorrectly(fixtures.DeclarativeMappedTest):
          self.assert_sql_count(testing.db, go, 2)
  
  
+class TestSelectinWithNestedJoinedCollectionDedup(
+    fixtures.DeclarativeMappedTest
+):
+    """Regression guard for selectinload(...).joinedload/selectinload(...) on a
+    collection.
+
+    When the inner selectin query uses joinedload on a collection, the result
+    rows are multiplied (one row per grandchild).  The .unique() call in
+    _load_via_parent must deduplicate those rows so that each parent row ends
+    up with exactly one copy of each child object.
+
+    Also verifies that the joinedload is folded into the selectin query rather
+    than issuing a separate third query — the total SQL count must be 2 for
+    joinedload and 3 for nested selectinload.
+    """
+
+    @classmethod
+    def setup_classes(cls):
+        Base = cls.DeclarativeBasic
+
+        class User(ComparableEntity, Base):
+            __tablename__ = "sel_dedup_user"
+            id = Column(Integer, primary_key=True)
+            name = Column(String(50))
+            addresses = relationship(
+                "Address", back_populates="user", order_by="Address.id"
+            )
+
+        class Address(ComparableEntity, Base):
+            __tablename__ = "sel_dedup_address"
+            id = Column(Integer, primary_key=True)
+            user_id = Column(ForeignKey("sel_dedup_user.id"))
+            email = Column(String(50))
+            user = relationship("User", back_populates="addresses")
+            dingalings = relationship(
+                "Dingaling", back_populates="address", order_by="Dingaling.id"
+            )
+
+        class Dingaling(ComparableEntity, Base):
+            __tablename__ = "sel_dedup_dingaling"
+            id = Column(Integer, primary_key=True)
+            address_id = Column(ForeignKey("sel_dedup_address.id"))
+            data = Column(String(50))
+            address = relationship("Address", back_populates="dingalings")
+
+    @classmethod
+    def insert_data(cls, connection):
+        User, Address, Dingaling = cls.classes("User", "Address", "Dingaling")
+        sess = Session(connection)
+        # user 1: one address with TWO dingalings — this is the key fixture.
+        # When selectinload(User.addresses).joinedload(Address.dingalings) runs
+        # the inner selectin query, address 1 will appear in 2 result rows
+        # (one per dingaling).  Without .unique(), loading.require_unique would
+        # raise InvalidRequestError: "The unique() method must be invoked on
+        # this Result".
+        sess.add(
+            User(
+                id=1,
+                name="u1",
+                addresses=[
+                    Address(
+                        id=1,
+                        email="a1@example.com",
+                        dingalings=[
+                            Dingaling(id=1, data="d1"),
+                            Dingaling(id=2, data="d2"),
+                        ],
+                    ),
+                    Address(id=2, email="a2@example.com"),
+                ],
+            )
+        )
+        # user 2: one address, one dingaling — control case
+        sess.add(
+            User(
+                id=2,
+                name="u2",
+                addresses=[
+                    Address(
+                        id=3,
+                        email="a3@example.com",
+                        dingalings=[Dingaling(id=3, data="d3")],
+                    )
+                ],
+            )
+        )
+        sess.commit()
+
+    @testing.combinations(
+        ("joinedload", 2),
+        ("selectinload", 3),
+        id_="sa",
+        argnames="inner_loader_name,expected_sql_count",
+    )
+    def test_selectin_with_nested_joined_collection_still_dedupes(
+        self, inner_loader_name, expected_sql_count
+    ):
+        """Regression: selectinload(...).joinedload/selectinload(...) on a
+        collection must continue to dedupe inner rows after the
+        conditional-unique optimization.
+        """
+        User, Address, Dingaling = self.classes("User", "Address", "Dingaling")
+        sess = fixture_session()
+
+        if inner_loader_name == "joinedload":
+            inner_opt = selectinload(User.addresses).joinedload(
+                Address.dingalings
+            )
+        else:
+            inner_opt = selectinload(User.addresses).selectinload(
+                Address.dingalings
+            )
+
+        def go():
+            # expunge so we get fresh queries on each call
+            sess.expunge_all()
+            return sess.query(User).options(inner_opt).order_by(User.id).all()
+
+        users = self.assert_sql_count(testing.db, go, expected_sql_count)
+
+        eq_(len(users), 2)
+
+        u1 = users[0]
+        # Address 1 has 2 dingalings; without .unique() the selectin result
+        # would produce 2 rows for address 1 and loading.require_unique would
+        # raise InvalidRequestError.  The assertions below are belt-and-
+        # suspenders for if the conditional logic changes.
+        address_ids = [a.id for a in u1.addresses]
+        eq_(len(address_ids), len(set(address_ids)))
+        eq_(len(u1.addresses), 2)
+
+        # Also verify the grandchildren loaded correctly
+        target_address = next(a for a in u1.addresses if a.id == 1)
+        eq_(len(target_address.dingalings), 2)
+
+        u2 = users[1]
+        address_ids2 = [a.id for a in u2.addresses]
+        assert len(address_ids2) == len(
+            set(address_ids2)
+        ), f"user {u2.id} has duplicate addresses: {address_ids2}"
+        eq_(len(u2.addresses), 1)
+        eq_(len(u2.addresses[0].dingalings), 1)
+
+
  class TestCompositePlusNonComposite(fixtures.DeclarativeMappedTest):
      __requires__ = ("tuple_in",)
author	Oliver Parker <oliver.parker@gridedge.co.uk>
	Wed, 3 Jun 2026 06:51:13 +0000 (02:51 -0400)
committer	Mike Bayer <mike_mp@zzzcomputing.com>
	Wed, 3 Jun 2026 17:49:17 +0000 (13:49 -0400)
doc/build/changelog/unreleased_21/13339.rst	[new file with mode: 0644]	patch \| blob
lib/sqlalchemy/engine/result.py		patch \| blob \| blame \| history
lib/sqlalchemy/orm/context.py		patch \| blob \| blame \| history
lib/sqlalchemy/orm/loading.py		patch \| blob \| blame \| history
lib/sqlalchemy/orm/strategies.py		patch \| blob \| blame \| history
test/orm/test_events.py		patch \| blob \| blame \| history
test/orm/test_selectin_relations.py		patch \| blob \| blame \| history