From f62e0c0c7fc4be842a0f4dc4f59019b3c6285fee Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Fri, 5 Mar 2021 21:52:03 -0500 Subject: [PATCH] improve targeting and labeling for unary() in columns clause Fixed regression where usage of the standalone :func:`_sql.distinct()` used in the form of being directly SELECTed would fail to be locatable in the result set by column identity, which is how the ORM locates columns. While standalone :func:`_sql.distinct()` is not oriented towards being directly SELECTed (use :meth:`_sql.select.distinct` for a regular ``SELECT DISTINCT..``) , it was usable to a limited extent in this way previously (but wouldn't work in subqueries, for example). The column targeting for unary expressions such as "DISTINCT " has been improved so that this case works again, and an additional improvement has been made so that usage of this form in a subquery at least generates valid SQL which was not the case previously. The change additionally enhances the ability to target elements in ``row._mapping`` based on SQL expression objects in ORM-enabled SELECT statements, including whether the statement was invoked by ``connection.execute()`` or ``session.execute()``. Fixes: #6008 Change-Id: I5cfa39435f5418861d70a7db8f52ab4ced6a792e --- doc/build/changelog/unreleased_14/6008.rst | 20 +++++ lib/sqlalchemy/engine/cursor.py | 4 +- lib/sqlalchemy/orm/context.py | 88 ++++++++++++++++++---- lib/sqlalchemy/sql/compiler.py | 40 +++------- test/orm/test_cache_key.py | 63 ++++++++++++++++ test/orm/test_eager_relations.py | 4 +- test/orm/test_query.py | 20 +++++ test/sql/test_compiler.py | 17 +++++ 8 files changed, 209 insertions(+), 47 deletions(-) create mode 100644 doc/build/changelog/unreleased_14/6008.rst diff --git a/doc/build/changelog/unreleased_14/6008.rst b/doc/build/changelog/unreleased_14/6008.rst new file mode 100644 index 0000000000..87b4c64061 --- /dev/null +++ b/doc/build/changelog/unreleased_14/6008.rst @@ -0,0 +1,20 @@ +.. change:: + :tags: bug, regression, sql + :tickets: 6008 + + Fixed regression where usage of the standalone :func:`_sql.distinct()` used + in the form of being directly SELECTed would fail to be locatable in the + result set by column identity, which is how the ORM locates columns. While + standalone :func:`_sql.distinct()` is not oriented towards being directly + SELECTed (use :meth:`_sql.select.distinct` for a regular + ``SELECT DISTINCT..``) , it was usable to a limited extent in this way + previously (but wouldn't work in subqueries, for example). The column + targeting for unary expressions such as "DISTINCT " has been improved + so that this case works again, and an additional improvement has been made + so that usage of this form in a subquery at least generates valid SQL which + was not the case previously. + + The change additionally enhances the ability to target elements in + ``row._mapping`` based on SQL expression objects in ORM-enabled + SELECT statements, including whether the statement was invoked by + ``connection.execute()`` or ``session.execute()``. diff --git a/lib/sqlalchemy/engine/cursor.py b/lib/sqlalchemy/engine/cursor.py index a9933c7ae7..b5b92b1714 100644 --- a/lib/sqlalchemy/engine/cursor.py +++ b/lib/sqlalchemy/engine/cursor.py @@ -1290,7 +1290,9 @@ class BaseCursorResult(object): compiled and compiled._result_columns and context.cache_hit is context.dialect.CACHE_HIT - and not compiled._rewrites_selected_columns + and not context.execution_options.get( + "_result_disable_adapt_to_context", False + ) and compiled.statement is not context.invoked_statement ): metadata = metadata._adapt_to_context(context) diff --git a/lib/sqlalchemy/orm/context.py b/lib/sqlalchemy/orm/context.py index 23bae5cc08..9cfa0fdc6d 100644 --- a/lib/sqlalchemy/orm/context.py +++ b/lib/sqlalchemy/orm/context.py @@ -127,6 +127,11 @@ class QueryContext(object): ) +_result_disable_adapt_to_context = util.immutabledict( + {"_result_disable_adapt_to_context": True} +) + + class ORMCompileState(CompileState): # note this is a dictionary, but the # default_compile_options._with_polymorphic_adapt_map is a tuple @@ -234,6 +239,17 @@ class ORMCompileState(CompileState): statement._execution_options, ) + # add _result_disable_adapt_to_context=True to execution options. + # this will disable the ResultSetMetadata._adapt_to_context() + # step which we don't need, as we have result processors cached + # against the original SELECT statement before caching. + if not execution_options: + execution_options = _result_disable_adapt_to_context + else: + execution_options = execution_options.union( + _result_disable_adapt_to_context + ) + if "yield_per" in execution_options or load_options._yield_per: execution_options = execution_options.union( { @@ -343,7 +359,6 @@ class ORMFromStatementCompileState(ORMCompileState): def create_for_statement(cls, statement_container, compiler, **kw): if compiler is not None: - compiler._rewrites_selected_columns = True toplevel = not compiler.stack else: toplevel = True @@ -475,7 +490,6 @@ class ORMSelectCompileState(ORMCompileState, SelectState): if compiler is not None: toplevel = not compiler.stack - compiler._rewrites_selected_columns = True self.global_attributes = compiler._global_attributes else: toplevel = True @@ -2160,7 +2174,7 @@ class _QueryEntity(object): @classmethod def to_compile_state(cls, compile_state, entities): - for entity in entities: + for idx, entity in enumerate(entities): if entity._is_lambda_element: if entity._is_sequence: cls.to_compile_state(compile_state, entity._resolved) @@ -2174,7 +2188,7 @@ class _QueryEntity(object): _MapperEntity(compile_state, entity) else: _ColumnEntity._for_columns( - compile_state, entity._select_iterable + compile_state, entity._select_iterable, idx ) else: if entity._annotations.get("bundle", False): @@ -2183,10 +2197,12 @@ class _QueryEntity(object): # this is legacy only - test_composites.py # test_query_cols_legacy _ColumnEntity._for_columns( - compile_state, entity._select_iterable + compile_state, entity._select_iterable, idx ) else: - _ColumnEntity._for_columns(compile_state, [entity]) + _ColumnEntity._for_columns( + compile_state, [entity], idx + ) elif entity.is_bundle: _BundleEntity(compile_state, entity) @@ -2411,7 +2427,7 @@ class _BundleEntity(_QueryEntity): _BundleEntity(compile_state, expr, parent_bundle=self) else: _ORMColumnEntity._for_columns( - compile_state, [expr], parent_bundle=self + compile_state, [expr], None, parent_bundle=self ) self.supports_single_entity = self.bundle.single_entity @@ -2470,10 +2486,17 @@ class _BundleEntity(_QueryEntity): class _ColumnEntity(_QueryEntity): - __slots__ = ("_fetch_column", "_row_processor") + __slots__ = ( + "_fetch_column", + "_row_processor", + "raw_column_index", + "translate_raw_column", + ) @classmethod - def _for_columns(cls, compile_state, columns, parent_bundle=None): + def _for_columns( + cls, compile_state, columns, raw_column_index, parent_bundle=None + ): for column in columns: annotations = column._annotations if "parententity" in annotations: @@ -2489,6 +2512,7 @@ class _ColumnEntity(_QueryEntity): compile_state, column, _entity, + raw_column_index, parent_bundle=parent_bundle, ) else: @@ -2496,11 +2520,15 @@ class _ColumnEntity(_QueryEntity): compile_state, column, _entity, + raw_column_index, parent_bundle=parent_bundle, ) else: _RawColumnEntity( - compile_state, column, parent_bundle=parent_bundle + compile_state, + column, + raw_column_index, + parent_bundle=parent_bundle, ) @property @@ -2517,7 +2545,15 @@ class _ColumnEntity(_QueryEntity): # the resulting callable is entirely cacheable so just return # it if we already made one if self._row_processor is not None: - return self._row_processor + getter, label_name, extra_entities = self._row_processor + if self.translate_raw_column: + extra_entities += ( + result.context.invoked_statement._raw_columns[ + self.raw_column_index + ], + ) + + return getter, label_name, extra_entities # retrieve the column that would have been set up in # setup_compile_state, to avoid doing redundant work @@ -2547,7 +2583,16 @@ class _ColumnEntity(_QueryEntity): ret = getter, self._label_name, self._extra_entities self._row_processor = ret - return ret + + if self.translate_raw_column: + extra_entities = self._extra_entities + ( + result.context.invoked_statement._raw_columns[ + self.raw_column_index + ], + ) + return getter, self._label_name, extra_entities + else: + return ret class _RawColumnEntity(_ColumnEntity): @@ -2563,9 +2608,12 @@ class _RawColumnEntity(_ColumnEntity): "_extra_entities", ) - def __init__(self, compile_state, column, parent_bundle=None): + def __init__( + self, compile_state, column, raw_column_index, parent_bundle=None + ): self.expr = column - + self.raw_column_index = raw_column_index + self.translate_raw_column = raw_column_index is not None self._label_name = compile_state._label_convention(column) if parent_bundle: @@ -2619,9 +2667,9 @@ class _ORMColumnEntity(_ColumnEntity): compile_state, column, parententity, + raw_column_index, parent_bundle=None, ): - annotations = column._annotations _entity = parententity @@ -2634,9 +2682,17 @@ class _ORMColumnEntity(_ColumnEntity): orm_key = annotations.get("proxy_key", None) if orm_key: self.expr = getattr(_entity.entity, orm_key) + self.translate_raw_column = False else: + # if orm_key is not present, that means this is an ad-hoc + # SQL ColumnElement, like a CASE() or other expression. + # include this column position from the invoked statement + # in the ORM-level ResultSetMetaData on each execute, so that + # it can be targeted by identity after caching self.expr = column + self.translate_raw_column = raw_column_index is not None + self.raw_column_index = raw_column_index self._label_name = compile_state._label_convention( column, col_name=orm_key ) @@ -2715,6 +2771,8 @@ class _ORMColumnEntity(_ColumnEntity): class _IdentityTokenEntity(_ORMColumnEntity): + translate_raw_column = False + def setup_compile_state(self, compile_state): pass diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index c6fa6072ea..f635c1ee41 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -404,35 +404,6 @@ class Compiled(object): .. versionadded:: 1.4 - """ - - _rewrites_selected_columns = False - """if True, indicates the compile_state object rewrites an incoming - ReturnsRows (like a Select) so that the columns we compile against in the - result set are not what were expressed on the outside. this is a hint to - the execution context to not link the statement.selected_columns to the - columns mapped in the result object. - - That is, when this flag is False:: - - stmt = some_statement() - - result = conn.execute(stmt) - row = result.first() - - # selected_columns are in a 1-1 relationship with the - # columns in the result, and are targetable in mapping - for col in stmt.selected_columns: - assert col in row._mapping - - When True:: - - # selected columns are not what are in the rows. the context - # rewrote the statement for some other set of selected_columns. - for col in stmt.selected_columns: - assert col not in row._mapping - - """ cache_key = None @@ -1858,7 +1829,15 @@ class SQLCompiler(Compiled): ) return getattr(self, attrname, None) - def visit_unary(self, unary, **kw): + def visit_unary( + self, unary, add_to_result_map=None, result_map_targets=(), **kw + ): + + if add_to_result_map is not None: + result_map_targets += (unary,) + kw["add_to_result_map"] = add_to_result_map + kw["result_map_targets"] = result_map_targets + if unary.operator: if unary.modifier: raise exc.CompileError( @@ -2870,6 +2849,7 @@ class SQLCompiler(Compiled): and ( not isinstance(column, elements.UnaryExpression) or column.wraps_column_expression + or asfrom ) and ( not hasattr(column, "name") diff --git a/test/orm/test_cache_key.py b/test/orm/test_cache_key.py index 8b1d185382..cd06ce56a1 100644 --- a/test/orm/test_cache_key.py +++ b/test/orm/test_cache_key.py @@ -19,6 +19,7 @@ from sqlalchemy.orm import subqueryload from sqlalchemy.orm import with_loader_criteria from sqlalchemy.orm import with_polymorphic from sqlalchemy.sql.base import CacheableOptions +from sqlalchemy.sql.expression import case from sqlalchemy.sql.visitors import InternalTraversal from sqlalchemy.testing import AssertsCompiledSQL from sqlalchemy.testing import eq_ @@ -599,3 +600,65 @@ class RoundTripTest(QueryTest, AssertsCompiledSQL): for i in range(5): fn = random.choice([go1, go2]) self.assert_sql_count(testing.db, fn, 2) + + @testing.combinations((True,), (False,), argnames="use_core") + @testing.combinations((True,), (False,), argnames="arbitrary_element") + @testing.combinations((True,), (False,), argnames="exercise_caching") + def test_column_targeting_core_execute( + self, + plain_fixture, + connection, + use_core, + arbitrary_element, + exercise_caching, + ): + """test that CursorResultSet will do a column rewrite for any core + execute even if the ORM compiled the statement. + + This translates the current stmt.selected_columns to the cached + ResultSetMetaData._keymap. The ORM skips this because loading.py + has also cached the selected_columns that are used. But for + an outside-facing Core execute, this has to remain turned on. + + Additionally, we want targeting of SQL expressions to work with both + Core and ORM statement executions. So the ORM still has to do some + translation here for these elements to be supported. + + """ + User, Address = plain_fixture + user_table = inspect(User).persist_selectable + + def go(): + + my_thing = case((User.id > 9, 1), else_=2) + + # include entities in the statement so that we test that + # the column indexing from + # ORM select()._raw_columns -> Core select()._raw_columns is + # translated appropriately + stmt = ( + select(User, Address.email_address, my_thing, User.name) + .join(Address) + .where(User.name == "ed") + ) + + if arbitrary_element: + target, exp = (my_thing, 2) + elif use_core: + target, exp = (user_table.c.name, "ed") + else: + target, exp = (User.name, "ed") + + if use_core: + row = connection.execute(stmt).first() + + else: + row = Session(connection).execute(stmt).first() + + eq_(row._mapping[target], exp) + + if exercise_caching: + for i in range(3): + go() + else: + go() diff --git a/test/orm/test_eager_relations.py b/test/orm/test_eager_relations.py index d575b85f1b..ca0508db4d 100644 --- a/test/orm/test_eager_relations.py +++ b/test/orm/test_eager_relations.py @@ -3537,6 +3537,8 @@ class SubqueryAliasingTest(fixtures.MappedTest, testing.AssertsCompiledSQL): # test a different unary operator # TODO: there is no test in Core that asserts what is happening # here as far as the label generation for the ORDER BY + # NOTE: this very old test was in fact producing invalid SQL + # until #6008 was fixed self.assert_compile( fixture_session() .query(A) @@ -3547,7 +3549,7 @@ class SubqueryAliasingTest(fixtures.MappedTest, testing.AssertsCompiledSQL): "AS anon_1_anon_2, b_1.id AS b_1_id, b_1.a_id AS " "b_1_a_id, b_1.value AS b_1_value FROM (SELECT a.id " "AS a_id, NOT (SELECT sum(b.value) AS sum_1 FROM b " - "WHERE b.a_id = a.id) FROM a ORDER BY NOT (SELECT " + "WHERE b.a_id = a.id) AS anon_2 FROM a ORDER BY NOT (SELECT " "sum(b.value) AS sum_1 FROM b WHERE b.a_id = a.id) " "LIMIT :param_1) AS anon_1 LEFT OUTER JOIN b AS b_1 " "ON anon_1.a_id = b_1.a_id ORDER BY anon_1.anon_2", diff --git a/test/orm/test_query.py b/test/orm/test_query.py index f58a1161e4..c36946b6ed 100644 --- a/test/orm/test_query.py +++ b/test/orm/test_query.py @@ -4141,6 +4141,26 @@ class DistinctTest(QueryTest, AssertsCompiledSQL): .all(), ) + def test_basic_standalone(self): + User = self.classes.User + + # issue 6008. the UnaryExpression now places itself into the + # result map so that it can be matched positionally without the need + # for any label. + q = fixture_session().query(distinct(User.id)).order_by(User.id) + self.assert_compile( + q, "SELECT DISTINCT users.id FROM users ORDER BY users.id" + ) + eq_([(7,), (8,), (9,), (10,)], q.all()) + + def test_standalone_w_subquery(self): + User = self.classes.User + q = fixture_session().query(distinct(User.id)) + + subq = q.subquery() + q = fixture_session().query(subq).order_by(subq.c[0]) + eq_([(7,), (8,), (9,), (10,)], q.all()) + def test_no_automatic_distinct_thing_w_future(self): User = self.classes.User diff --git a/test/sql/test_compiler.py b/test/sql/test_compiler.py index 7d1817b295..b2d4434384 100644 --- a/test/sql/test_compiler.py +++ b/test/sql/test_compiler.py @@ -1719,6 +1719,23 @@ class SelectTest(fixtures.TestBase, AssertsCompiledSQL): "SELECT DISTINCT mytable.myid FROM mytable", ) + self.assert_compile( + select(distinct(table1.c.myid)).set_label_style( + LABEL_STYLE_TABLENAME_PLUS_COL + ), + "SELECT DISTINCT mytable.myid FROM mytable", + ) + + # the bug fixed here as part of #6008 is the same bug that's + # in 1.3 as well, producing + # "SELECT anon_2.anon_1 FROM (SELECT distinct mytable.myid + # FROM mytable) AS anon_2" + self.assert_compile( + select(select(distinct(table1.c.myid)).subquery()), + "SELECT anon_2.anon_1 FROM (SELECT " + "DISTINCT mytable.myid AS anon_1 FROM mytable) AS anon_2", + ) + self.assert_compile( select(table1.c.myid).distinct(), "SELECT DISTINCT mytable.myid FROM mytable", -- 2.47.2