]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
refine transfer of cached ORM options for selectin, lazy
authorMike Bayer <mike_mp@zzzcomputing.com>
Tue, 16 Aug 2022 18:25:12 +0000 (14:25 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Wed, 17 Aug 2022 17:28:25 +0000 (13:28 -0400)
Fixed issue involving :func:`_orm.with_loader_criteria` where a closure
variable used as bound parameter value within the lambda would not carry
forward correctly into additional relationship loaders such as
:func:`_orm.selectinload` and :func:`_orm.lazyload` after the statement
were cached, using the stale originally-cached value instead.

This change brings forth a good refinement where we finally realize
we shouldn't be testing every ORM option with lots of switches, we
just let the option itself be given "here is your uncached version,
you are cached, tell us what to do!".   the current decision is
that strategy loader options used the cached in all cases as they
always have, with_loader_criteria uses the uncached, because the
uncached will have been invoked with new closure state that we
definitely need.   The only
edge that might not work is if with_loader_criteria referenced
an entity that is local to the query, namely a specific AliasedInsp,
however that's not a documented case for this.  if we had to do that,
then we perhaps would introduce a more complex reconcilation
logic, and this would also give us the hook to do that.

Fixes: #8399
Change-Id: Ided8e2123915131e3f11cf6b06d773039e73797a

doc/build/changelog/unreleased_14/8399.rst [new file with mode: 0644]
lib/sqlalchemy/orm/context.py
lib/sqlalchemy/orm/interfaces.py
lib/sqlalchemy/orm/strategies.py
lib/sqlalchemy/orm/strategy_options.py
test/orm/test_relationship_criteria.py

diff --git a/doc/build/changelog/unreleased_14/8399.rst b/doc/build/changelog/unreleased_14/8399.rst
new file mode 100644 (file)
index 0000000..aea9e52
--- /dev/null
@@ -0,0 +1,10 @@
+.. change::
+    :tags: bug, orm
+    :tickets: 8399
+
+    Fixed issue involving :func:`_orm.with_loader_criteria` where a closure
+    variable used as bound parameter value within the lambda would not carry
+    forward correctly into additional relationship loaders such as
+    :func:`_orm.selectinload` and :func:`_orm.lazyload` after the statement
+    were cached, using the stale originally-cached value instead.
+
index f8ea231395d7a83ec51e993b27170f7252740068..b762908c55619c464b54007f08654e2bf2ca85ab 100644 (file)
@@ -163,28 +163,24 @@ class QueryContext:
         self.params = params
         self.top_level_context = load_options._sa_top_level_orm_context
 
+        cached_options = compile_state.select_statement._with_options
+        uncached_options = statement._with_options
+
+        # see issue #7447 , #8399 for some background
+        # propagated loader options will be present on loaded InstanceState
+        # objects under state.load_options and are typically used by
+        # LazyLoader to apply options to the SELECT statement it emits.
+        # For compile state options (i.e. loader strategy options), these
+        # need to line up with the ".load_path" attribute which in
+        # loader.py is pulled from context.compile_state.current_path.
+        # so, this means these options have to be the ones from the
+        # *cached* statement that's travelling with compile_state, not the
+        # *current* statement which won't match up for an ad-hoc
+        # AliasedClass
         self.propagated_loader_options = tuple(
-            # issue 7447.
-            # propagated loader options will be present on loaded InstanceState
-            # objects under state.load_options and are typically used by
-            # LazyLoader to apply options to the SELECT statement it emits.
-            # For compile state options (i.e. loader strategy options), these
-            # need to line up with the ".load_path" attribute which in
-            # loader.py is pulled from context.compile_state.current_path.
-            # so, this means these options have to be the ones from the
-            # *cached* statement that's travelling with compile_state, not the
-            # *current* statement which won't match up for an ad-hoc
-            # AliasedClass
-            cached_o
-            for cached_o in compile_state.select_statement._with_options
-            if cached_o.propagate_to_loaders and cached_o._is_compile_state
-        ) + tuple(
-            # for user defined loader options that are not "compile state",
-            # those just need to be present as they are
-            uncached_o
-            for uncached_o in statement._with_options
-            if uncached_o.propagate_to_loaders
-            and not uncached_o._is_compile_state
+            opt._adapt_cached_option_to_uncached_option(self, uncached_opt)
+            for opt, uncached_opt in zip(cached_options, uncached_options)
+            if opt.propagate_to_loaders
         )
 
         self.attributes = dict(compile_state.attributes)
index 16062fffa3a255c5a67dbbbcdf57593576d7a9d7..545ea1e8cf62c71cb0018a92a76c0851d098779c 100644 (file)
@@ -77,6 +77,7 @@ if typing.TYPE_CHECKING:
     from .attributes import InstrumentedAttribute
     from .context import _MapperEntity
     from .context import ORMCompileState
+    from .context import QueryContext
     from .decl_api import RegistryType
     from .loading import _PopulatorDict
     from .mapper import Mapper
@@ -1092,6 +1093,50 @@ class ORMOption(ExecutableOption):
 
     _is_strategy_option = False
 
+    def _adapt_cached_option_to_uncached_option(
+        self, context: QueryContext, uncached_opt: ORMOption
+    ) -> ORMOption:
+        """adapt this option to the "uncached" version of itself in a
+        loader strategy context.
+
+        given "self" which is an option from a cached query, as well as the
+        corresponding option from the uncached version of the same query,
+        return the option we should use in a new query, in the context of a
+        loader strategy being asked to load related rows on behalf of that
+        cached query, which is assumed to be building a new query based on
+        entities passed to us from the cached query.
+
+        Currently this routine chooses between "self" and "uncached" without
+        manufacturing anything new. If the option is itself a loader strategy
+        option which has a path, that path needs to match to the entities being
+        passed to us by the cached query, so the :class:`_orm.Load` subclass
+        overrides this to return "self". For all other options, we return the
+        uncached form which may have changing state, such as a
+        with_loader_criteria() option which will very often have new state.
+
+        This routine could in the future involve
+        generating a new option based on both inputs if use cases arise,
+        such as if with_loader_criteria() needed to match up to
+        ``AliasedClass`` instances given in the parent query.
+
+        However, longer term it might be better to restructure things such that
+        ``AliasedClass`` entities are always matched up on their cache key,
+        instead of identity, in things like paths and such, so that this whole
+        issue of "the uncached option does not match the entities" goes away.
+        However this would make ``PathRegistry`` more complicated and difficult
+        to debug as well as potentially less performant in that it would be
+        hashing enormous cache keys rather than a simple AliasedInsp. UNLESS,
+        we could get cache keys overall to be reliably hashed into something
+        like an md5 key.
+
+        .. versionadded:: 1.4.41
+
+        """
+        if uncached_opt is not None:
+            return uncached_opt
+        else:
+            return self
+
 
 class CompileStateOption(HasCacheKey, ORMOption):
     """base for :class:`.ORMOption` classes that affect the compilation of
index 2c707439a1fc0d74e58e8d1a8c82c676be07ddad..599a12761bde9b2e1745c4142a3db8bde3ea99a8 100644 (file)
@@ -1026,7 +1026,6 @@ class LazyLoader(
 
         if extra_options:
             stmt._with_options += extra_options
-
         stmt._compile_options += {"_current_path": effective_path}
 
         if use_get:
@@ -3092,29 +3091,25 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
         # cached query, meaning it won't match on paths and loader lookups
         # and loaders like this one will be skipped if it is used in options.
         #
-        # Now we want to transfer loader options from the parent query to the
-        # "selectinload" query we're about to run.   Which query do we transfer
-        # the options from?  We use the cached query, because the options in
-        # that query will be in terms of the effective entity we were just
-        # handed.
+        # as it turns out, standard loader options like selectinload(),
+        # lazyload() that have a path need
+        # to come from the cached query so that the AliasedInsp etc. objects
+        # that are in the query line up with the object that's in the path
+        # of the strategy object. however other options like
+        # with_loader_criteria() that doesn't have a path (has a fixed entity)
+        # and needs to have access to the latest closure state in order to
+        # be correct, we need to use the uncached one.
         #
-        # But now the selectinload query we are running is *also*
-        # cached.  What if it's cached and running from some previous iteration
-        # of that AliasedInsp?  Well in that case it will also use the previous
-        # iteration of the loader options.   If the query expires and
-        # gets generated again, it will be handed the current effective_entity
-        # and the current _with_options, again in terms of whatever
-        # compile_state.select_statement happens to be right now, so the
-        # query will still be internally consistent and loader callables
-        # will be correctly invoked.
+        # as of #8399 we let the loader option itself figure out what it
+        # wants to do given cached and uncached version of itself.
 
         effective_path = path[self.parent_property]
 
         if orig_query is context.query:
-            options = new_options = orig_query._with_options
-            user_defined_options = []
+            new_options = orig_query._with_options
         else:
-            options = orig_query._with_options
+            cached_options = orig_query._with_options
+            uncached_options = context.query._with_options
 
             # propagate compile state options from the original query,
             # updating their "extra_criteria" as necessary.
@@ -3122,20 +3117,13 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
             # "orig" options if extra_criteria is present, because the copy
             # of extra_criteria will have different boundparam than that of
             # the QueryableAttribute in the path
-
             new_options = [
-                orig_opt._adjust_for_extra_criteria(context)
-                if orig_opt._is_strategy_option
-                else orig_opt
-                for orig_opt in options
-                if orig_opt._is_compile_state or orig_opt._is_legacy_option
-            ]
-
-            # propagate user defined options from the current query
-            user_defined_options = [
-                opt
-                for opt in context.query._with_options
-                if not opt._is_compile_state and not opt._is_legacy_option
+                orig_opt._adapt_cached_option_to_uncached_option(
+                    context, uncached_opt
+                )
+                for orig_opt, uncached_opt in zip(
+                    cached_options, uncached_options
+                )
             ]
 
         if loadopt and loadopt._extra_criteria:
@@ -3149,13 +3137,9 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
         if recursion_depth is not None:
             effective_path = effective_path._truncate_recursive()
 
-        q = q.options(*new_options)._update_compile_options(
-            {"_current_path": effective_path}
-        )
-
-        if user_defined_options:
-            q = q.options(*user_defined_options)
+        q = q.options(*new_options)
 
+        q = q._update_compile_options({"_current_path": effective_path})
         if context.populate_existing:
             q = q.execution_options(populate_existing=True)
 
index aa51eca16ac2cbf0867daf3e4f324aeac880112c..2aed60b61cb650253b3681fc465a30be38da9d5a 100644 (file)
@@ -67,6 +67,7 @@ if typing.TYPE_CHECKING:
     from .context import QueryContext
     from .interfaces import _StrategyKey
     from .interfaces import MapperProperty
+    from .interfaces import ORMOption
     from .mapper import Mapper
     from .path_registry import _PathRepresentation
     from ..sql._typing import _ColumnExpressionArgument
@@ -1072,6 +1073,11 @@ class Load(_AbstractLoad):
         load.propagate_to_loaders = False
         return load
 
+    def _adapt_cached_option_to_uncached_option(
+        self, context: QueryContext, uncached_opt: ORMOption
+    ) -> ORMOption:
+        return self._adjust_for_extra_criteria(context)
+
     def _adjust_for_extra_criteria(self, context: QueryContext) -> Load:
         """Apply the current bound parameters in a QueryContext to all
         occurrences "extra_criteria" stored within this ``Load`` object,
@@ -1082,12 +1088,15 @@ class Load(_AbstractLoad):
 
         orig_cache_key: Optional[CacheKey] = None
         replacement_cache_key: Optional[CacheKey] = None
+        found_crit = False
 
         def process(opt: _LoadElement) -> _LoadElement:
             if not opt._extra_criteria:
                 return opt
 
-            nonlocal orig_cache_key, replacement_cache_key
+            nonlocal orig_cache_key, replacement_cache_key, found_crit
+
+            found_crit = True
 
             # avoid generating cache keys for the queries if we don't
             # actually have any extra_criteria options, which is the
@@ -1107,14 +1116,17 @@ class Load(_AbstractLoad):
             )
             return opt
 
-        cloned = self._generate()
-
-        if self.context:
-            cloned.context = tuple(
-                process(value._clone()) for value in self.context
-            )
+        new_context = tuple(
+            process(value._clone()) if value._extra_criteria else value
+            for value in self.context
+        )
 
-        return cloned
+        if found_crit:
+            cloned = self._clone()
+            cloned.context = new_context
+            return cloned
+        else:
+            return self
 
     def _reconcile_query_entities_with_us(self, mapper_entities, raiseerr):
         """called at process time to allow adjustment of the root
index 4c89a69ca476629c86ed75c9f50f4ec3cd0f7951..16d2088ebffc75d2fb98d347ca6c31b52992a907 100644 (file)
@@ -481,6 +481,63 @@ class LoaderCriteriaTest(_Fixtures, testing.AssertsCompiledSQL):
             ),
         )
 
+    def test_select_selectinload_mapper_mapper_closure_criteria(
+        self, user_address_fixture
+    ):
+        User, Address = user_address_fixture
+
+        def get_statement(closure="name"):
+
+            stmt = select(User).options(
+                selectinload(User.addresses),
+                with_loader_criteria(
+                    Address, lambda cls: cls.email_address != closure
+                ),
+            )
+            return stmt
+
+        s = Session(testing.db, future=True)
+
+        stmt = get_statement(closure="name")
+        with self.sql_execution_asserter() as asserter:
+            s.execute(stmt).all()
+
+        asserter.assert_(
+            CompiledSQL(
+                "SELECT users.id, users.name FROM users",
+                [],
+            ),
+            CompiledSQL(
+                "SELECT addresses.user_id AS addresses_user_id, addresses.id "
+                "AS addresses_id, addresses.email_address "
+                "AS addresses_email_address FROM addresses "
+                "WHERE addresses.user_id IN (__[POSTCOMPILE_primary_keys]) "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"primary_keys": [7, 8, 9, 10], "closure_1": "name"}],
+            ),
+        )
+
+        stmt = get_statement(closure="new name")
+        with self.sql_execution_asserter() as asserter:
+            s.execute(stmt).all()
+
+        asserter.assert_(
+            CompiledSQL(
+                "SELECT users.id, users.name FROM users",
+                [],
+            ),
+            CompiledSQL(
+                "SELECT addresses.user_id AS addresses_user_id, addresses.id "
+                "AS addresses_id, addresses.email_address "
+                "AS addresses_email_address FROM addresses "
+                "WHERE addresses.user_id IN (__[POSTCOMPILE_primary_keys]) "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"primary_keys": [7, 8, 9, 10], "closure_1": "new name"}],
+            ),
+        )
+
     def test_select_lazyload_mapper_mapper_criteria(
         self, user_address_fixture
     ):
@@ -543,6 +600,125 @@ class LoaderCriteriaTest(_Fixtures, testing.AssertsCompiledSQL):
             ),
         )
 
+    def test_select_lazyload_mapper_mapper_closure_criteria(
+        self, user_address_fixture
+    ):
+        User, Address = user_address_fixture
+
+        def get_statement(closure="name"):
+
+            stmt = (
+                select(User)
+                .options(
+                    lazyload(User.addresses),
+                    with_loader_criteria(
+                        Address, lambda cls: cls.email_address != closure
+                    ),
+                )
+                .order_by(User.id)
+            )
+            return stmt
+
+        s = Session(testing.db, future=True)
+
+        stmt = get_statement(closure="name")
+        with self.sql_execution_asserter() as asserter:
+            for obj in s.scalars(stmt).all():
+                obj.addresses
+
+        asserter.assert_(
+            CompiledSQL(
+                "SELECT users.id, users.name FROM users ORDER BY users.id",
+                [],
+            ),
+            CompiledSQL(
+                "SELECT addresses.id AS addresses_id, "
+                "addresses.user_id AS addresses_user_id, "
+                "addresses.email_address AS addresses_email_address "
+                "FROM addresses WHERE :param_1 = addresses.user_id "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"param_1": 7, "closure_1": "name"}],
+            ),
+            CompiledSQL(
+                "SELECT addresses.id AS addresses_id, "
+                "addresses.user_id AS addresses_user_id, "
+                "addresses.email_address AS addresses_email_address "
+                "FROM addresses WHERE :param_1 = addresses.user_id "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"param_1": 8, "closure_1": "name"}],
+            ),
+            CompiledSQL(
+                "SELECT addresses.id AS addresses_id, "
+                "addresses.user_id AS addresses_user_id, "
+                "addresses.email_address AS addresses_email_address "
+                "FROM addresses WHERE :param_1 = addresses.user_id "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"param_1": 9, "closure_1": "name"}],
+            ),
+            CompiledSQL(
+                "SELECT addresses.id AS addresses_id, "
+                "addresses.user_id AS addresses_user_id, "
+                "addresses.email_address AS addresses_email_address "
+                "FROM addresses WHERE :param_1 = addresses.user_id "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"param_1": 10, "closure_1": "name"}],
+            ),
+        )
+
+        stmt = get_statement(closure="new name")
+        with self.sql_execution_asserter() as asserter:
+            for obj in s.scalars(
+                stmt, execution_options={"populate_existing": True}
+            ).all():
+                obj.addresses
+
+        asserter.assert_(
+            CompiledSQL(
+                "SELECT users.id, users.name FROM users ORDER BY users.id",
+                [],
+            ),
+            CompiledSQL(
+                "SELECT addresses.id AS addresses_id, "
+                "addresses.user_id AS addresses_user_id, "
+                "addresses.email_address AS addresses_email_address "
+                "FROM addresses WHERE :param_1 = addresses.user_id "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"param_1": 7, "closure_1": "new name"}],
+            ),
+            CompiledSQL(
+                "SELECT addresses.id AS addresses_id, "
+                "addresses.user_id AS addresses_user_id, "
+                "addresses.email_address AS addresses_email_address "
+                "FROM addresses WHERE :param_1 = addresses.user_id "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"param_1": 8, "closure_1": "new name"}],
+            ),
+            CompiledSQL(
+                "SELECT addresses.id AS addresses_id, "
+                "addresses.user_id AS addresses_user_id, "
+                "addresses.email_address AS addresses_email_address "
+                "FROM addresses WHERE :param_1 = addresses.user_id "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"param_1": 9, "closure_1": "new name"}],
+            ),
+            CompiledSQL(
+                "SELECT addresses.id AS addresses_id, "
+                "addresses.user_id AS addresses_user_id, "
+                "addresses.email_address AS addresses_email_address "
+                "FROM addresses WHERE :param_1 = addresses.user_id "
+                "AND addresses.email_address != :closure_1 "
+                "ORDER BY addresses.id",
+                [{"param_1": 10, "closure_1": "new name"}],
+            ),
+        )
+
     def test_select_aliased_inclaliased_criteria(self, user_address_fixture):
         User, Address = user_address_fixture