From: Mike Bayer Date: Tue, 16 Aug 2022 18:25:12 +0000 (-0400) Subject: refine transfer of cached ORM options for selectin, lazy X-Git-Tag: rel_1_4_41~24^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0d66f491d0f062bda95e2997d4f70841ac2228d8;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git refine transfer of cached ORM options for selectin, lazy Fixed issue involving :func:`_orm.with_loader_criteria` where a closure variable used as bound parameter value within the lambda would not carry forward correctly into additional relationship loaders such as :func:`_orm.selectinload` and :func:`_orm.lazyload` after the statement were cached, using the stale originally-cached value instead. This change brings forth a good refinement where we finally realize we shouldn't be testing every ORM option with lots of switches, we just let the option itself be given "here is your uncached version, you are cached, tell us what to do!". the current decision is that strategy loader options used the cached in all cases as they always have, with_loader_criteria uses the uncached, because the uncached will have been invoked with new closure state that we definitely need. The only edge that might not work is if with_loader_criteria referenced an entity that is local to the query, namely a specific AliasedInsp, however that's not a documented case for this. if we had to do that, then we perhaps would introduce a more complex reconcilation logic, and this would also give us the hook to do that. For this approach to work in 1.4, state.load_options has to be ordered, so, this does the switch of load_options from set->tuple, which has been in 2.0 for a long time. if this change is not feasbile, due to affecting other areas, we may have to scale back this fix a bit, but for here, it's just two collections without any deep impacts. Fixes: #8399 Change-Id: Ided8e2123915131e3f11cf6b06d773039e73797a (cherry picked from commit 860d582028f6bbbb39cbf17698f7d6b7a8e458ea) --- diff --git a/doc/build/changelog/unreleased_14/8399.rst b/doc/build/changelog/unreleased_14/8399.rst new file mode 100644 index 0000000000..aea9e52381 --- /dev/null +++ b/doc/build/changelog/unreleased_14/8399.rst @@ -0,0 +1,10 @@ +.. change:: + :tags: bug, orm + :tickets: 8399 + + Fixed issue involving :func:`_orm.with_loader_criteria` where a closure + variable used as bound parameter value within the lambda would not carry + forward correctly into additional relationship loaders such as + :func:`_orm.selectinload` and :func:`_orm.lazyload` after the statement + were cached, using the stale originally-cached value instead. + diff --git a/lib/sqlalchemy/orm/context.py b/lib/sqlalchemy/orm/context.py index 9d4f652ea4..592a2c1e4d 100644 --- a/lib/sqlalchemy/orm/context.py +++ b/lib/sqlalchemy/orm/context.py @@ -106,29 +106,25 @@ class QueryContext(object): self.loaders_require_uniquing = False self.params = params - self.propagated_loader_options = { - # issue 7447. - # propagated loader options will be present on loaded InstanceState - # objects under state.load_options and are typically used by - # LazyLoader to apply options to the SELECT statement it emits. - # For compile state options (i.e. loader strategy options), these - # need to line up with the ".load_path" attribute which in - # loader.py is pulled from context.compile_state.current_path. - # so, this means these options have to be the ones from the - # *cached* statement that's travelling with compile_state, not the - # *current* statement which won't match up for an ad-hoc - # AliasedClass - cached_o - for cached_o in compile_state.select_statement._with_options - if cached_o.propagate_to_loaders and cached_o._is_compile_state - } | { - # for user defined loader options that are not "compile state", - # those just need to be present as they are - uncached_o - for uncached_o in statement._with_options - if uncached_o.propagate_to_loaders - and not uncached_o._is_compile_state - } + cached_options = compile_state.select_statement._with_options + uncached_options = statement._with_options + + # see issue #7447 , #8399 for some background + # propagated loader options will be present on loaded InstanceState + # objects under state.load_options and are typically used by + # LazyLoader to apply options to the SELECT statement it emits. + # For compile state options (i.e. loader strategy options), these + # need to line up with the ".load_path" attribute which in + # loader.py is pulled from context.compile_state.current_path. + # so, this means these options have to be the ones from the + # *cached* statement that's travelling with compile_state, not the + # *current* statement which won't match up for an ad-hoc + # AliasedClass + self.propagated_loader_options = tuple( + opt._adapt_cached_option_to_uncached_option(self, uncached_opt) + for opt, uncached_opt in zip(cached_options, uncached_options) + if opt.propagate_to_loaders + ) self.attributes = dict(compile_state.attributes) diff --git a/lib/sqlalchemy/orm/interfaces.py b/lib/sqlalchemy/orm/interfaces.py index 63295d0b9e..7e86326cc4 100644 --- a/lib/sqlalchemy/orm/interfaces.py +++ b/lib/sqlalchemy/orm/interfaces.py @@ -754,6 +754,46 @@ class ORMOption(ExecutableOption): _is_strategy_option = False + def _adapt_cached_option_to_uncached_option(self, context, uncached_opt): + """given "self" which is an option from a cached query, as well as the + corresponding option from the uncached version of the same query, + return the option we should use in a new query, in the context of a + loader strategy being asked to load related rows on behalf of that + cached query, which is assumed to be building a new query based on + entities passed to us from the cached query. + + Currently this routine chooses between "self" and "uncached" without + manufacturing anything new. If the option is itself a loader strategy + option which has a path, that path needs to match to the entities being + passed to us by the cached query, so the :class:`_orm.Load` subclass + overrides this to return "self". For all other options, we return the + uncached form which may have changing state, such as a + with_loader_criteria() option which will very often have new state. + + This routine could in the future involve + generating a new option based on both inputs if use cases arise, + such as if with_loader_criteria() needed to match up to + ``AliasedClass`` instances given in the parent query. + + However, longer term it might be better to restructure things such that + ``AliasedClass`` entities are always matched up on their cache key, + instead of identity, in things like paths and such, so that this whole + issue of "the uncached option does not match the entities" goes away. + However this would make ``PathRegistry`` more complicated and difficult + to debug as well as potentially less performant in that it would be + hashing enormous cache keys rather than a simple AliasedInsp. UNLESS, + we could get cache keys overall to be reliably hashed into something + like an md5 key. + + .. versionadded:: 1.4.41 + + + """ + if uncached_opt is not None: + return uncached_opt + else: + return self + class CompileStateOption(HasCacheKey, ORMOption): """base for :class:`.ORMOption` classes that affect the compilation of diff --git a/lib/sqlalchemy/orm/state.py b/lib/sqlalchemy/orm/state.py index 9718024292..b4e7076a4a 100644 --- a/lib/sqlalchemy/orm/state.py +++ b/lib/sqlalchemy/orm/state.py @@ -70,7 +70,7 @@ class InstanceState(interfaces.InspectionAttrInfo): session_id = None key = None runid = None - load_options = util.EMPTY_SET + load_options = () load_path = PathRegistry.root insert_order = None _strong_obj = None diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 71aae00807..288e6e06bf 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -981,13 +981,11 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots): opts += ( orm_util.LoaderCriteriaOption(self.entity, extra_criteria), ) - stmt._with_options = opts else: # this path is used if there are not already any options # in the query, but an event may want to add them effective_path = state.mapper._path_registry[self.parent_property] - stmt._compile_options += {"_current_path": effective_path} if use_get: @@ -2932,29 +2930,25 @@ class SelectInLoader(PostLoader, util.MemoizedSlots): # cached query, meaning it won't match on paths and loader lookups # and loaders like this one will be skipped if it is used in options. # - # Now we want to transfer loader options from the parent query to the - # "selectinload" query we're about to run. Which query do we transfer - # the options from? We use the cached query, because the options in - # that query will be in terms of the effective entity we were just - # handed. + # as it turns out, standard loader options like selectinload(), + # lazyload() that have a path need + # to come from the cached query so that the AliasedInsp etc. objects + # that are in the query line up with the object that's in the path + # of the strategy object. however other options like + # with_loader_criteria() that doesn't have a path (has a fixed entity) + # and needs to have access to the latest closure state in order to + # be correct, we need to use the uncached one. # - # But now the selectinload query we are running is *also* - # cached. What if it's cached and running from some previous iteration - # of that AliasedInsp? Well in that case it will also use the previous - # iteration of the loader options. If the query expires and - # gets generated again, it will be handed the current effective_entity - # and the current _with_options, again in terms of whatever - # compile_state.select_statement happens to be right now, so the - # query will still be internally consistent and loader callables - # will be correctly invoked. + # as of #8399 we let the loader option itself figure out what it + # wants to do given cached and uncached version of itself. effective_path = path[self.parent_property] if orig_query is context.query: - options = new_options = orig_query._with_options - user_defined_options = [] + new_options = orig_query._with_options else: - options = orig_query._with_options + cached_options = orig_query._with_options + uncached_options = context.query._with_options # propagate compile state options from the original query, # updating their "extra_criteria" as necessary. @@ -2962,20 +2956,13 @@ class SelectInLoader(PostLoader, util.MemoizedSlots): # "orig" options if extra_criteria is present, because the copy # of extra_criteria will have different boundparam than that of # the QueryableAttribute in the path - new_options = [ - orig_opt._adjust_for_extra_criteria(context) - if orig_opt._is_strategy_option - else orig_opt - for orig_opt in options - if orig_opt._is_compile_state or orig_opt._is_legacy_option - ] - - # propagate user defined options from the current query - user_defined_options = [ - opt - for opt in context.query._with_options - if not opt._is_compile_state and not opt._is_legacy_option + orig_opt._adapt_cached_option_to_uncached_option( + context, uncached_opt + ) + for orig_opt, uncached_opt in zip( + cached_options, uncached_options + ) ] if loadopt and loadopt._extra_criteria: @@ -2986,12 +2973,9 @@ class SelectInLoader(PostLoader, util.MemoizedSlots): ), ) - q = q.options(*new_options)._update_compile_options( - {"_current_path": effective_path} - ) - if user_defined_options: - q = q.options(*user_defined_options) + q = q.options(*new_options) + q = q._update_compile_options({"_current_path": effective_path}) if context.populate_existing: q = q.execution_options(populate_existing=True) diff --git a/lib/sqlalchemy/orm/strategy_options.py b/lib/sqlalchemy/orm/strategy_options.py index c3dd5df3b5..1b5e762eb2 100644 --- a/lib/sqlalchemy/orm/strategy_options.py +++ b/lib/sqlalchemy/orm/strategy_options.py @@ -115,6 +115,9 @@ class Load(Generative, LoaderOption): load._extra_criteria = () return load + def _adapt_cached_option_to_uncached_option(self, context, uncached_opt): + return self._adjust_for_extra_criteria(context) + def _generate_extra_criteria(self, context): """Apply the current bound parameters in a QueryContext to the immediate "extra_criteria" stored with this Load object. diff --git a/test/orm/inheritance/test_poly_loading.py b/test/orm/inheritance/test_poly_loading.py index 1e3b15575d..31e5e4ca90 100644 --- a/test/orm/inheritance/test_poly_loading.py +++ b/test/orm/inheritance/test_poly_loading.py @@ -24,6 +24,7 @@ from sqlalchemy.sql.selectable import LABEL_STYLE_TABLENAME_PLUS_COL from sqlalchemy.testing import assertsql from sqlalchemy.testing import eq_ from sqlalchemy.testing import fixtures +from sqlalchemy.testing import mock from sqlalchemy.testing.assertions import expect_raises_message from sqlalchemy.testing.assertsql import AllOf from sqlalchemy.testing.assertsql import CompiledSQL @@ -963,15 +964,23 @@ class LazyLoaderTransfersOptsTest(fixtures.DeclarativeMappedTest): _cache_key_traversal = () propagate_to_loaders = True - any_opt = AnyOpt() - if strat is None: - opts = (any_opt,) - else: - opts = (strat(User.address), any_opt) + def _adjust_for_extra_criteria(self, context): + return self + + from sqlalchemy.orm.strategy_options import Load + + with mock.patch.object( + Load, "_adjust_for_extra_criteria", lambda self, ctx: self + ): + any_opt = AnyOpt() + if strat is None: + opts = (any_opt,) + else: + opts = (strat(User.address), any_opt) - u = sess.execute(select(User).options(*opts)).scalars().one() - address = u.address - eq_(inspect(address).load_options, set(opts)) + u = sess.execute(select(User).options(*opts)).scalars().one() + address = u.address + eq_(inspect(address).load_options, opts) class NoBaseWPPlusAliasedTest( diff --git a/test/orm/test_events.py b/test/orm/test_events.py index c0fbaba7d6..4009dc3aec 100644 --- a/test/orm/test_events.py +++ b/test/orm/test_events.py @@ -201,7 +201,7 @@ class ORMExecuteTest(_RemoveListeners, _fixtures.FixtureTest): def go(context): for elem in context.user_defined_options: if isinstance(elem, SetShardOption): - m1.update_execution_options(_sa_shard_id=elem.payload) + m1.do_some_mock_thing(_sa_shard_id=elem.payload) stmt = select(User).options( loader_opt(User.addresses).options(loader_opt(Address.dingaling)), @@ -217,21 +217,15 @@ class ORMExecuteTest(_RemoveListeners, _fixtures.FixtureTest): loader_opt(User.addresses).options(loader_opt(Address.dingaling)), SetShardOption("some_other_shard"), ) + for u in s.execute(stmt).unique().scalars(): for a in u.addresses: a.dingaling eq_( m1.mock_calls, - ( - [call.update_execution_options(_sa_shard_id="some_shard")] - * num_opts - ) + ([call.do_some_mock_thing(_sa_shard_id="some_shard")] * num_opts) + ( - [ - call.update_execution_options( - _sa_shard_id="some_other_shard" - ) - ] + [call.do_some_mock_thing(_sa_shard_id="some_other_shard")] * num_opts ), ) diff --git a/test/orm/test_merge.py b/test/orm/test_merge.py index d2eade0ea1..3e29d5cd79 100644 --- a/test/orm/test_merge.py +++ b/test/orm/test_merge.py @@ -1567,7 +1567,7 @@ class MergeTest(_fixtures.FixtureTest): for u in s1_users: ustate = attributes.instance_state(u) eq_(ustate.load_path.path, (umapper,)) - eq_(ustate.load_options, set()) + eq_(ustate.load_options, ()) for u in s2_users: sess.merge(u) @@ -1575,7 +1575,7 @@ class MergeTest(_fixtures.FixtureTest): for u in s1_users: ustate = attributes.instance_state(u) eq_(ustate.load_path.path, (umapper,)) - eq_(ustate.load_options, set([opt2])) + eq_(ustate.load_options, (opt2,)) # test 2. present options are replaced by merge options sess = fixture_session() @@ -1583,7 +1583,7 @@ class MergeTest(_fixtures.FixtureTest): for u in s1_users: ustate = attributes.instance_state(u) eq_(ustate.load_path.path, (umapper,)) - eq_(ustate.load_options, set([opt1])) + eq_(ustate.load_options, (opt1,)) for u in s2_users: sess.merge(u) @@ -1591,7 +1591,7 @@ class MergeTest(_fixtures.FixtureTest): for u in s1_users: ustate = attributes.instance_state(u) eq_(ustate.load_path.path, (umapper,)) - eq_(ustate.load_options, set([opt2])) + eq_(ustate.load_options, (opt2,)) def test_resolve_conflicts_pending_doesnt_interfere_no_ident(self): User, Address, Order = ( diff --git a/test/orm/test_options.py b/test/orm/test_options.py index 1a2a5ba70f..840b3dc214 100644 --- a/test/orm/test_options.py +++ b/test/orm/test_options.py @@ -23,6 +23,7 @@ from sqlalchemy.orm import synonym from sqlalchemy.orm import util as orm_util from sqlalchemy.orm import with_polymorphic from sqlalchemy.testing import fixtures +from sqlalchemy.testing import mock from sqlalchemy.testing.assertions import assert_raises_message from sqlalchemy.testing.assertions import AssertsCompiledSQL from sqlalchemy.testing.assertions import emits_warning @@ -2050,12 +2051,16 @@ class MapperOptionsTest(_fixtures.FixtureTest): oalias = aliased(Order) opt1 = sa.orm.joinedload(User.orders, Order.items) opt2 = sa.orm.contains_eager(User.orders, Order.items, alias=oalias) - u1 = ( - sess.query(User) - .join(oalias, User.orders) - .options(opt1, opt2) - .first() - ) - ustate = attributes.instance_state(u1) - assert opt1 in ustate.load_options - assert opt2 not in ustate.load_options + + with mock.patch.object( + Load, "_adjust_for_extra_criteria", lambda self, ctx: self + ): + u1 = ( + sess.query(User) + .join(oalias, User.orders) + .options(opt1, opt2) + .first() + ) + ustate = attributes.instance_state(u1) + assert opt1 in ustate.load_options + assert opt2 not in ustate.load_options diff --git a/test/orm/test_relationship_criteria.py b/test/orm/test_relationship_criteria.py index 5f47b49ac7..7a347cd55b 100644 --- a/test/orm/test_relationship_criteria.py +++ b/test/orm/test_relationship_criteria.py @@ -481,6 +481,63 @@ class LoaderCriteriaTest(_Fixtures, testing.AssertsCompiledSQL): ), ) + def test_select_selectinload_mapper_mapper_closure_criteria( + self, user_address_fixture + ): + User, Address = user_address_fixture + + def get_statement(closure="name"): + + stmt = select(User).options( + selectinload(User.addresses), + with_loader_criteria( + Address, lambda cls: cls.email_address != closure + ), + ) + return stmt + + s = Session(testing.db, future=True) + + stmt = get_statement(closure="name") + with self.sql_execution_asserter() as asserter: + s.execute(stmt).all() + + asserter.assert_( + CompiledSQL( + "SELECT users.id, users.name FROM users", + [], + ), + CompiledSQL( + "SELECT addresses.user_id AS addresses_user_id, addresses.id " + "AS addresses_id, addresses.email_address " + "AS addresses_email_address FROM addresses " + "WHERE addresses.user_id IN (__[POSTCOMPILE_primary_keys]) " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"primary_keys": [7, 8, 9, 10], "closure_1": "name"}], + ), + ) + + stmt = get_statement(closure="new name") + with self.sql_execution_asserter() as asserter: + s.execute(stmt).all() + + asserter.assert_( + CompiledSQL( + "SELECT users.id, users.name FROM users", + [], + ), + CompiledSQL( + "SELECT addresses.user_id AS addresses_user_id, addresses.id " + "AS addresses_id, addresses.email_address " + "AS addresses_email_address FROM addresses " + "WHERE addresses.user_id IN (__[POSTCOMPILE_primary_keys]) " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"primary_keys": [7, 8, 9, 10], "closure_1": "new name"}], + ), + ) + def test_select_lazyload_mapper_mapper_criteria( self, user_address_fixture ): @@ -543,6 +600,125 @@ class LoaderCriteriaTest(_Fixtures, testing.AssertsCompiledSQL): ), ) + def test_select_lazyload_mapper_mapper_closure_criteria( + self, user_address_fixture + ): + User, Address = user_address_fixture + + def get_statement(closure="name"): + + stmt = ( + select(User) + .options( + lazyload(User.addresses), + with_loader_criteria( + Address, lambda cls: cls.email_address != closure + ), + ) + .order_by(User.id) + ) + return stmt + + s = Session(testing.db, future=True) + + stmt = get_statement(closure="name") + with self.sql_execution_asserter() as asserter: + for obj in s.scalars(stmt).all(): + obj.addresses + + asserter.assert_( + CompiledSQL( + "SELECT users.id, users.name FROM users ORDER BY users.id", + [], + ), + CompiledSQL( + "SELECT addresses.id AS addresses_id, " + "addresses.user_id AS addresses_user_id, " + "addresses.email_address AS addresses_email_address " + "FROM addresses WHERE :param_1 = addresses.user_id " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"param_1": 7, "closure_1": "name"}], + ), + CompiledSQL( + "SELECT addresses.id AS addresses_id, " + "addresses.user_id AS addresses_user_id, " + "addresses.email_address AS addresses_email_address " + "FROM addresses WHERE :param_1 = addresses.user_id " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"param_1": 8, "closure_1": "name"}], + ), + CompiledSQL( + "SELECT addresses.id AS addresses_id, " + "addresses.user_id AS addresses_user_id, " + "addresses.email_address AS addresses_email_address " + "FROM addresses WHERE :param_1 = addresses.user_id " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"param_1": 9, "closure_1": "name"}], + ), + CompiledSQL( + "SELECT addresses.id AS addresses_id, " + "addresses.user_id AS addresses_user_id, " + "addresses.email_address AS addresses_email_address " + "FROM addresses WHERE :param_1 = addresses.user_id " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"param_1": 10, "closure_1": "name"}], + ), + ) + + stmt = get_statement(closure="new name") + with self.sql_execution_asserter() as asserter: + for obj in s.scalars( + stmt, execution_options={"populate_existing": True} + ).all(): + obj.addresses + + asserter.assert_( + CompiledSQL( + "SELECT users.id, users.name FROM users ORDER BY users.id", + [], + ), + CompiledSQL( + "SELECT addresses.id AS addresses_id, " + "addresses.user_id AS addresses_user_id, " + "addresses.email_address AS addresses_email_address " + "FROM addresses WHERE :param_1 = addresses.user_id " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"param_1": 7, "closure_1": "new name"}], + ), + CompiledSQL( + "SELECT addresses.id AS addresses_id, " + "addresses.user_id AS addresses_user_id, " + "addresses.email_address AS addresses_email_address " + "FROM addresses WHERE :param_1 = addresses.user_id " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"param_1": 8, "closure_1": "new name"}], + ), + CompiledSQL( + "SELECT addresses.id AS addresses_id, " + "addresses.user_id AS addresses_user_id, " + "addresses.email_address AS addresses_email_address " + "FROM addresses WHERE :param_1 = addresses.user_id " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"param_1": 9, "closure_1": "new name"}], + ), + CompiledSQL( + "SELECT addresses.id AS addresses_id, " + "addresses.user_id AS addresses_user_id, " + "addresses.email_address AS addresses_email_address " + "FROM addresses WHERE :param_1 = addresses.user_id " + "AND addresses.email_address != :closure_1 " + "ORDER BY addresses.id", + [{"param_1": 10, "closure_1": "new name"}], + ), + ) + def test_select_aliased_inclaliased_criteria(self, user_address_fixture): User, Address = user_address_fixture