Improve performance of selectinload result handling by up to ~30%

author Oliver Parker <oliver.parker@gridedge.co.uk>

Wed, 17 Jun 2026 15:15:18 +0000 (11:15 -0400)

committer Michael Bayer <mike_mp@zzzcomputing.com>

Wed, 17 Jun 2026 21:50:36 +0000 (21:50 +0000)
author Oliver Parker <oliver.parker@gridedge.co.uk>
Wed, 17 Jun 2026 15:15:18 +0000 (11:15 -0400)
committer Michael Bayer <mike_mp@zzzcomputing.com>
Wed, 17 Jun 2026 21:50:36 +0000 (21:50 +0000)
diff --git a/doc/build/changelog/unreleased_21/13363.rst b/doc/build/changelog/unreleased_21/13363.rst

index 4daaac45bf950439b31170360d986c8adbc34e4a..1490c5511d27c2cb8a2634810d53fe9671356944 100644 (file)
--- a/doc/build/changelog/unreleased_21/13363.rst
+++ b/doc/build/changelog/unreleased_21/13363.rst
@@ -9,3 +9,27 @@
      that individual rows can be logged. Benchmarks show a 3-16% improvement in
      ORM entity load times depending on query shape.  Pull request courtesy
      Oliver Parker.
+
+.. change::
+    :tags: performance, orm
+    :tickets: 13363
+
+    Improved performance of :func:`_orm.selectinload` and
+    :func:`_orm.subqueryload` result handling:
+
+    * in selectinloader, the primary key columns used to correlate related
+      rows are now selected directly rather than being wrapped in a
+      :class:`.Bundle`, and are read from positional slices of each result
+      row.  This removes the per-row :class:`.Row` construction that the
+      :class:`.Bundle` introduced, including for the common single-column
+      primary key case.
+
+    * removed use of ``groupby()`` + ``lambda`` against :class:`.Row` objects
+      in subqueryloader; rows are converted to plain tuples and the result
+      lists are built via ``append()``.
+
+    * many-to-one selectinload reads foreign key values directly from the
+      parent instance dictionary when present, falling back to attribute-level
+      access only for expired or deferred attributes.
+
+    Pull request courtesy Oliver Parker.
diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py

index 8609fd35451cc5959e6ad3927ba47b940c6dec80..14dab6019133f5ee7cdbb11be2206d18448908ee 100644 (file)
--- a/lib/sqlalchemy/orm/mapper.py
+++ b/lib/sqlalchemy/orm/mapper.py
@@ -3561,6 +3561,24 @@ class Mapper(
          prop = self._columntoproperty[column]
          return state.manager[prop.key].impl.get(state, dict_, passive=passive)
  
+    def _state_ident_getter(self, columns, passive):
+        lookup_keys = [self._columntoproperty[col].key for col in columns]
+        missing = object()
+
+        def get_ident(state, state_dict):
+            return tuple(
+                (
+                    v
+                    if (v := state_dict.get(lk, missing)) is not missing
+                    else self._get_state_attr_by_column(
+                        state, state_dict, col, passive=passive
+                    )
+                )
+                for lk, col in zip(lookup_keys, columns)
+            )
+
+        return get_ident
+
      def _set_committed_state_attr_by_column(self, state, dict_, column, value):
          prop = self._columntoproperty[column]
          state.manager[prop.key].impl.set_committed_value(state, dict_, value)
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py

index 2bb9c14cb46e69387302b87fc9fb4f273200374d..c5e35a395cb52169e5ebd3661096f44b5fa0aca7 100644 (file)
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -13,7 +13,6 @@ implementations, and related MapperOptions."""
  from __future__ import annotations
  
  import collections
-import itertools
  from typing import Any
  from typing import Dict
  from typing import Literal
@@ -1830,9 +1829,12 @@ class _SubqueryLoader(_PostLoader):
              # to work with baked query, the parameters may have been
              # updated since this query was created, so take these into account
  
-            rows = list(q.params(self.params))
-            for k, v in itertools.groupby(rows, lambda x: x[1:]):
-                self._data[k].extend(vv[0] for vv in v)
+            data = self._data
+            for row in q.params(self.params):
+                # group plain tuples rather than Row slices, which would
+                # incur Row construction and Row.__eq__ per row
+                tup = row._to_tuple_instance()
+                data[tup[1:]].append(tup[0])
  
          def loader(self, state, dict_, row):
              if self._data is None:
@@ -2971,6 +2973,7 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
              "in_expr",
              "pk_cols",
              "zero_idx",
+            "n_pk",
              "child_lookup_cols",
          ],
      )
@@ -3046,7 +3049,9 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
              in_expr = fk_cols[0]
              zero_idx = True
  
-        return self.query_info(False, False, in_expr, pk_cols, zero_idx, None)
+        return self.query_info(
+            False, False, in_expr, pk_cols, zero_idx, len(pk_cols), None
+        )
  
      def _init_for_omit_join_m2o(self):
          pk_cols = self.mapper.primary_key
@@ -3061,7 +3066,7 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
          lookup_cols = [lazyloader._equated_columns[pk] for pk in pk_cols]
  
          return self.query_info(
-            True, False, in_expr, pk_cols, zero_idx, lookup_cols
+            True, False, in_expr, pk_cols, zero_idx, len(pk_cols), lookup_cols
          )
  
      def _init_for_join(self):
@@ -3076,7 +3081,9 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
          else:
              in_expr = pk_cols[0]
              zero_idx = True
-        return self.query_info(False, True, in_expr, pk_cols, zero_idx, None)
+        return self.query_info(
+            False, True, in_expr, pk_cols, zero_idx, len(pk_cols), None
+        )
  
      def init_class_attribute(self, mapper):
          self.parent_property._get_strategy(
@@ -3192,17 +3199,20 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
  
              mapper = self.parent
  
+            # attribute keys for the lookup columns; when these are
+            # present in a state's dict, reading them directly is
+            # equivalent to the PASSIVE_NO_FETCH attribute lookup below.
+            # whether or not a key is present can vary per state, e.g.
+            # individual instances may have the attribute expired or
+            # deferred, so this is determined state-by-state
+            get_related_ident = mapper._state_ident_getter(
+                query_info.child_lookup_cols,
+                passive=attributes.PASSIVE_NO_FETCH,
+            )
+
              for state, overwrite in states:
                  state_dict = state.dict
-                related_ident = tuple(
-                    mapper._get_state_attr_by_column(
-                        state,
-                        state_dict,
-                        lk,
-                        passive=attributes.PASSIVE_NO_FETCH,
-                    )
-                    for lk in query_info.child_lookup_cols
-                )
+                related_ident = get_related_ident(state, state_dict)
                  # if the loaded parent objects do not have the foreign key
                  # to the related item loaded, then degrade into the joined
                  # version of selectinload
@@ -3244,12 +3254,9 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
                  ]
                  in_expr = effective_entity._adapt_element(in_expr)
  
-        bundle_ent = orm_util.Bundle("pk", *pk_cols)
-        bundle_sql = bundle_ent.__clause_element__()
-
          entity_sql = effective_entity.__clause_element__()
          q = Select._create_raw_select(
-            _raw_columns=[bundle_sql, entity_sql],
+            _raw_columns=[*pk_cols, entity_sql],
              _compile_options=_ORMCompileState.default_compile_options,
              _propagate_attrs={
                  "compile_state_plugin": "orm",
@@ -3269,16 +3276,15 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
                  entity_sql, self.parent_property._join_condition.secondaryjoin
              )
          elif not query_info.load_with_join:
-            # the Bundle we have in the "omit_join" case is against raw, non
-            # annotated columns, so to ensure the Query knows its primary
-            # entity, we add it explicitly.  If we made the Bundle against
-            # annotated columns, we hit a performance issue in this specific
-            # case, which is detailed in issue #4347.
+            # the pk columns in the "omit_join" case are raw, non-annotated
+            # columns, so to ensure the Query knows its primary entity, we
+            # add it explicitly.  Using annotated columns here would hit a
+            # performance issue detailed in issue #4347.
              q = q.select_from(effective_entity)
          else:
-            # in the non-omit_join case, the Bundle is against the annotated/
-            # mapped column of the parent entity, but the #4347 issue does not
-            # occur in this case.
+            # in the non-omit_join case, the pk columns are against the
+            # annotated/mapped column of the parent entity, but the #4347
+            # issue does not occur in this case.
              q = q.select_from(self._parent_alias).join(
                  getattr(self._parent_alias, self.parent_property.key).of_type(
                      effective_entity
@@ -3410,28 +3416,26 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
          chunksize,
      ):
          uselist = self.uselist
+        n_pk = query_info.n_pk
  
          # this sort is really for the benefit of the unit tests
          our_keys = sorted(our_states)
          while our_keys:
              chunk = our_keys[0:chunksize]
              our_keys = our_keys[chunksize:]
+            primary_keys = [
+                key[0] if query_info.zero_idx else key for key in chunk
+            ]
              result = context.session.execute(
                  q,
-                params={
-                    "primary_keys": [
-                        key[0] if query_info.zero_idx else key for key in chunk
-                    ]
-                },
+                params={"primary_keys": primary_keys},
                  execution_options=execution_options,
              )
              if result.context is not None and result.context.requires_uniquing:
                  rows = result.unique()
              else:
-                # consume the result as plain tuples, skipping per-row
-                # Row construction
                  rows = result._raw_all_tuples()
-            data = {k: v for k, v in rows}
+            data = {row[:n_pk]: row[n_pk] for row in rows}
  
              for key in chunk:
                  # for a real foreign key and no concurrent changes to the
@@ -3462,6 +3466,7 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
          self, our_states, query_info, q, context, execution_options, chunksize
      ):
          uselist = self.uselist
+        n_pk = query_info.n_pk
          _empty_result = () if uselist else None
  
          while our_states:
@@ -3481,12 +3486,10 @@ class _SelectInLoader(_PostLoader, util.MemoizedSlots):
              if result.context is not None and result.context.requires_uniquing:
                  rows = result.unique()
              else:
-                # consume the result as plain tuples, skipping per-row
-                # Row construction
                  rows = result._raw_all_tuples()
              data = collections.defaultdict(list)
-            for k, v in itertools.groupby(rows, lambda x: x[0]):
-                data[k].extend(vv[1] for vv in v)
+            for row in rows:
+                data[row[:n_pk]].append(row[n_pk])
  
              for key, state, state_dict, overwrite in chunk:
                  if not overwrite and self.key in state_dict:
diff --git a/test/orm/test_selectin_relations.py b/test/orm/test_selectin_relations.py

index af6f8fe5cf35454ca1ce2b7760a28dc76642beaf..926002739358e20348cbcdf937231a5df453c459 100644 (file)
--- a/test/orm/test_selectin_relations.py
+++ b/test/orm/test_selectin_relations.py
@@ -4654,3 +4654,88 @@ class TestCompositePlusNonComposite(fixtures.DeclarativeMappedTest):
  
          eq_(a2.bs, [B2()])
          eq_(a1.bs, [B()])
+
+
+class UselistFalseMultipleRowsTest(fixtures.DeclarativeMappedTest):
+    """test the warning emitted by the selectin loader for a
+    uselist=False relationship that matches more than one row"""
+
+    @classmethod
+    def setup_classes(cls):
+        Base = cls.DeclarativeBasic
+
+        class A(ComparableEntity, Base):
+            __tablename__ = "a"
+            id = Column(Integer, primary_key=True)
+            b = relationship("B", uselist=False)
+
+        class B(ComparableEntity, Base):
+            __tablename__ = "b"
+            id = Column(Integer, primary_key=True)
+            a_id = Column(ForeignKey("a.id"))
+
+    @classmethod
+    def insert_data(cls, connection):
+        A, B = cls.classes("A", "B")
+        s = Session(connection)
+        s.add(A(id=1))
+        s.add_all([B(id=1, a_id=1), B(id=2, a_id=1)])
+        s.commit()
+
+    def test_multiple_rows_uselist_false_warns(self):
+        A = self.classes.A
+        s = fixture_session()
+        with testing.expect_warnings(
+            "Multiple rows returned with uselist=False for "
+            "eagerly-loaded attribute 'A.b'"
+        ):
+            a1 = s.query(A).options(selectinload(A.b)).one()
+        assert a1.b is not None
+        assert a1.b.id in (1, 2)
+
+
+class SelectinM2ONoRelatedRowTest(fixtures.DeclarativeMappedTest):
+    """test many-to-one selectinload where the candidate foreign key
+    value matches no related row, or is NULL"""
+
+    @classmethod
+    def setup_classes(cls):
+        Base = cls.DeclarativeBasic
+
+        class A(ComparableEntity, Base):
+            __tablename__ = "a"
+            id = Column(Integer, primary_key=True)
+
+        class B(ComparableEntity, Base):
+            __tablename__ = "b"
+            id = Column(Integer, primary_key=True)
+            a_id = Column(Integer)
+            a = relationship("A", primaryjoin="foreign(B.a_id) == A.id")
+
+    @classmethod
+    def insert_data(cls, connection):
+        A, B = cls.classes("A", "B")
+        s = Session(connection)
+        s.add(A(id=1))
+        s.add_all(
+            [
+                B(id=1, a_id=1),
+                B(id=2, a_id=42),
+                B(id=3, a_id=None),
+            ]
+        )
+        s.commit()
+
+    def test_missing_and_null_fk(self):
+        A, B = self.classes("A", "B")
+        s = fixture_session()
+        bs = s.query(B).order_by(B.id).options(selectinload(B.a)).all()
+
+        with self.assert_statement_count(testing.db, 0):
+            eq_(bs[0].a, A(id=1))
+
+            # a_id value with no matching A row
+            is_(bs[1].a, None)
+
+            # NULL a_id
+            is_(bs[2].a, None)
diff --git a/test/profiles.txt b/test/profiles.txt

index 9fcd432a06de988309def41a019084aa7d77ec5e..c58aba1883b4a9bec83f2d84ed0596c92b5f474c 100644 (file)
--- a/test/profiles.txt
+++ b/test/profiles.txt
@@ -408,10 +408,10 @@ test.aaa_profiling.test_orm.QueryTest.test_query_cols x86_64_linux_cpython_3.14_
  
  # TEST: test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results
  
-test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results x86_64_linux_cpython_3.13_sqlite_pysqlite_dbapiunicode_cextensions 271805
-test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results x86_64_linux_cpython_3.13_sqlite_pysqlite_dbapiunicode_nocextensions 298205
-test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results x86_64_linux_cpython_3.14_sqlite_pysqlite_dbapiunicode_cextensions 273005
-test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results x86_64_linux_cpython_3.14_sqlite_pysqlite_dbapiunicode_nocextensions 297705
+test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results x86_64_linux_cpython_3.13_sqlite_pysqlite_dbapiunicode_cextensions 248605
+test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results x86_64_linux_cpython_3.13_sqlite_pysqlite_dbapiunicode_nocextensions 263605
+test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results x86_64_linux_cpython_3.14_sqlite_pysqlite_dbapiunicode_cextensions 248905
+test.aaa_profiling.test_orm.SelectInEagerLoadTest.test_round_trip_results x86_64_linux_cpython_3.14_sqlite_pysqlite_dbapiunicode_nocextensions 263905
  
  # TEST: test.aaa_profiling.test_orm.SessionTest.test_expire_lots
author	Oliver Parker <oliver.parker@gridedge.co.uk>
	Wed, 17 Jun 2026 15:15:18 +0000 (11:15 -0400)
committer	Michael Bayer <mike_mp@zzzcomputing.com>
	Wed, 17 Jun 2026 21:50:36 +0000 (21:50 +0000)
doc/build/changelog/unreleased_21/13363.rst		patch \| blob \| blame \| history
lib/sqlalchemy/orm/mapper.py		patch \| blob \| blame \| history
lib/sqlalchemy/orm/strategies.py		patch \| blob \| blame \| history
test/orm/test_selectin_relations.py		patch \| blob \| blame \| history
test/profiles.txt		patch \| blob \| blame \| history