From cdbe3f84ba4656fd54205212b5adcd5ad9c8e8d2 Mon Sep 17 00:00:00 2001 From: Federico Caselli Date: Sat, 15 Feb 2020 12:34:37 +0100 Subject: [PATCH] Deprecate add of columns in order by with distinct Deprecate automatic addition of order by column in a query with a distinct Fixes: #5134 Change-Id: I467a39379c496be7e84a05f11ba9f8ca2bcc6e32 --- doc/build/changelog/migration_20.rst | 35 ++++++++++ doc/build/changelog/unreleased_14/5134.rst | 11 ++++ lib/sqlalchemy/orm/query.py | 36 ++++++---- lib/sqlalchemy/orm/strategies.py | 9 +++ lib/sqlalchemy/testing/assertsql.py | 9 ++- test/orm/test_deprecations.py | 77 ++++++++++++++++++++++ test/orm/test_query.py | 29 ++++++-- 7 files changed, 185 insertions(+), 21 deletions(-) create mode 100644 doc/build/changelog/unreleased_14/5134.rst diff --git a/doc/build/changelog/migration_20.rst b/doc/build/changelog/migration_20.rst index f21a5e3a9f..a039d97388 100644 --- a/doc/build/changelog/migration_20.rst +++ b/doc/build/changelog/migration_20.rst @@ -533,6 +533,8 @@ equally:: result.scalar() # first col of first row (warns if additional rows remain?) result.scalars() # iterator of first col of each row + result.scalars(1) # iterator of second col of each row + result.scalars('a') # iterator of the "a" col of each row result.scalars().all() # same, as a list result.columns('a', 'b'). # limit column tuples @@ -543,6 +545,8 @@ equally:: # if the result is an ORM result, you could do: result.columns(User, Address) # assuming these are available entities + # or to get just User as a list + result.scalars(User).all() # index access and slices ? result[0].all() # same as result.scalars().all() @@ -868,6 +872,37 @@ of the form ``(target, onclause)`` as well:: By using attributes instead of strings above, the :meth:`.Query.join` method no longer needs the almost never-used option of ``from_joinpoint``. +Other ORM Query patterns changed +================================= + +This section will collect various :class:`.Query` patterns and how they work +in terms of :func:`.future.select`. + +.. _migration_20_query_distinct: + +Using DISTINCT with additional columns, but only select the entity +------------------------------------------------------------------- + +:class:`.Query` will automatically add columns in the ORDER BY when +distinct is used. The following query will select from all User columns +as well as "address.email_address" but only return User objects:: + + result = session.query(User).join(User.addresses).\ + distinct().order_by(Address.email_address).all() + +Relational databases won't allow you to ORDER BY "address.email_address" if +it isn't also in the columns clause. But the above query only wants "User" +objects back. In 2.0, this very unusual use case is performed explicitly, +and the limiting of the entities/columns to ``User`` is done on the result:: + + + from sqlalchemy.future import select + + stmt = select(User, Address.email_address).join(User.addresses).\ + distinct().order_by(Address.email_address) + + result = session.execute(stmt).scalars(User).all() + Transparent Statement Compilation Caching replaces "Baked" queries, works in Core ================================================================================== diff --git a/doc/build/changelog/unreleased_14/5134.rst b/doc/build/changelog/unreleased_14/5134.rst new file mode 100644 index 0000000000..229fd5e2db --- /dev/null +++ b/doc/build/changelog/unreleased_14/5134.rst @@ -0,0 +1,11 @@ +.. change:: + :tags: orm, bug + :tickets: 5134 + + Deprecated logic in :meth:`.Query.distinct` that automatically adds + columns in the ORDER BY clause to the columns clause; this will be removed + in 2.0. + + .. seealso:: + + :ref:`migration_20_query_distinct` diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index 617bba3150..24d8975e41 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -3071,15 +3071,18 @@ class Query(Generative): .. note:: - The :meth:`.distinct` call includes logic that will automatically - add columns from the ORDER BY of the query to the columns - clause of the SELECT statement, to satisfy the common need - of the database backend that ORDER BY columns be part of the - SELECT list when DISTINCT is used. These columns *are not* - added to the list of columns actually fetched by the - :class:`.Query`, however, so would not affect results. - The columns are passed through when using the - :attr:`.Query.statement` accessor, however. + The ORM-level :meth:`.distinct` call includes logic that will + automatically add columns from the ORDER BY of the query to the + columns clause of the SELECT statement, to satisfy the common need + of the database backend that ORDER BY columns be part of the SELECT + list when DISTINCT is used. These columns *are not* added to the + list of columns actually fetched by the :class:`.Query`, however, + so would not affect results. The columns are passed through when + using the :attr:`.Query.statement` accessor, however. + + .. deprecated:: 2.0 This logic is deprecated and will be removed + in SQLAlchemy 2.0. See :ref:`migration_20_query_distinct` + for a description of this use case in 2.0. :param \*expr: optional column expressions. When present, the PostgreSQL dialect will render a ``DISTINCT ON ()`` @@ -3994,9 +3997,18 @@ class Query(Generative): context.order_by = None if self._distinct is True and context.order_by: - context.primary_columns += ( - sql_util.expand_column_list_from_order_by - )(context.primary_columns, context.order_by) + to_add = sql_util.expand_column_list_from_order_by( + context.primary_columns, context.order_by + ) + if to_add: + util.warn_deprecated_20( + "ORDER BY columns added implicitly due to " + "DISTINCT is deprecated and will be removed in " + "SQLAlchemy 2.0. SELECT statements with DISTINCT " + "should be written to explicitly include the appropriate " + "columns in the columns clause" + ) + context.primary_columns += to_add context.froms += tuple(context.eager_joins.values()) statement = sql.select( diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 0946370821..4da6015305 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -1235,6 +1235,15 @@ class SubqueryLoader(PostLoader): if q._limit is None and q._offset is None: q._order_by = None + if q._distinct is True and q._order_by: + # the logic to automatically add the order by columns to the query + # when distinct is True is deprecated in the query + to_add = sql_util.expand_column_list_from_order_by( + target_cols, q._order_by + ) + if to_add: + q._set_entities(target_cols + to_add) + # the original query now becomes a subquery # which we'll join onto. diff --git a/lib/sqlalchemy/testing/assertsql.py b/lib/sqlalchemy/testing/assertsql.py index f0da694007..8876c23041 100644 --- a/lib/sqlalchemy/testing/assertsql.py +++ b/lib/sqlalchemy/testing/assertsql.py @@ -184,8 +184,8 @@ class CompiledSQL(SQLMatchRule): def _failure_message(self, expected_params): return ( - "Testing for compiled statement %r partial params %s, " - "received %%(received_statement)r with params " + "Testing for compiled statement\n%r partial params %s, " + "received\n%%(received_statement)r with params " "%%(received_parameters)r" % ( self.statement.replace("%", "%%"), @@ -343,6 +343,9 @@ class SQLExecuteObserved(object): self.parameters = _distill_params(multiparams, params) self.statements = [] + def __repr__(self): + return str(self.statements) + class SQLCursorExecuteObserved( collections.namedtuple( @@ -373,7 +376,7 @@ class SQLAsserter(object): elif rule.errormessage: assert False, rule.errormessage if observed: - assert False, "Additional SQL statements remain" + assert False, "Additional SQL statements remain:\n%s" % observed elif not rule.is_consumed: rule.no_more_statements() diff --git a/test/orm/test_deprecations.py b/test/orm/test_deprecations.py index 4efff1f769..475e1c4ec9 100644 --- a/test/orm/test_deprecations.py +++ b/test/orm/test_deprecations.py @@ -1,5 +1,6 @@ import sqlalchemy as sa from sqlalchemy import and_ +from sqlalchemy import desc from sqlalchemy import event from sqlalchemy import func from sqlalchemy import Integer @@ -7,6 +8,7 @@ from sqlalchemy import select from sqlalchemy import String from sqlalchemy import testing from sqlalchemy import text +from sqlalchemy import true from sqlalchemy.ext.declarative import comparable_using from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import aliased @@ -2287,3 +2289,78 @@ class TestDeprecation20(fixtures.TestBase): def test_eagerloading(self): with testing.expect_deprecated_20(".*joinedload"): eagerload("foo") + + +class DistinctOrderByImplicitTest(QueryTest, AssertsCompiledSQL): + __dialect__ = "default" + + def test_columns_augmented_roundtrip_one(self): + User, Address = self.classes.User, self.classes.Address + + sess = create_session() + q = ( + sess.query(User) + .join("addresses") + .distinct() + .order_by(desc(Address.email_address)) + ) + with testing.expect_deprecated( + "ORDER BY columns added implicitly due to " + ): + eq_([User(id=7), User(id=9), User(id=8)], q.all()) + + def test_columns_augmented_roundtrip_three(self): + User, Address = self.classes.User, self.classes.Address + + sess = create_session() + + q = ( + sess.query(User.id, User.name.label("foo"), Address.id) + .join(Address, true()) + .filter(User.name == "jack") + .filter(User.id + Address.user_id > 0) + .distinct() + .order_by(User.id, User.name, Address.email_address) + ) + + # even though columns are added, they aren't in the result + with testing.expect_deprecated( + "ORDER BY columns added implicitly due to " + ): + eq_( + q.all(), + [ + (7, "jack", 3), + (7, "jack", 4), + (7, "jack", 2), + (7, "jack", 5), + (7, "jack", 1), + ], + ) + for row in q: + eq_(row._mapping.keys(), ["id", "foo", "id"]) + + def test_columns_augmented_sql_one(self): + User, Address = self.classes.User, self.classes.Address + + sess = create_session() + + q = ( + sess.query(User.id, User.name.label("foo"), Address.id) + .distinct() + .order_by(User.id, User.name, Address.email_address) + ) + + # Address.email_address is added because of DISTINCT, + # however User.id, User.name are not b.c. they're already there, + # even though User.name is labeled + with testing.expect_deprecated( + "ORDER BY columns added implicitly due to " + ): + self.assert_compile( + q, + "SELECT DISTINCT users.id AS users_id, users.name AS foo, " + "addresses.id AS addresses_id, addresses.email_address AS " + "addresses_email_address FROM users, addresses " + "ORDER BY users.id, users.name, addresses.email_address", + ) diff --git a/test/orm/test_query.py b/test/orm/test_query.py index 2fb79604a7..7d49aceba7 100644 --- a/test/orm/test_query.py +++ b/test/orm/test_query.py @@ -3894,10 +3894,11 @@ class DistinctTest(QueryTest, AssertsCompiledSQL): sess = create_session() q = ( - sess.query(User) + sess.query(User, Address.email_address) .join("addresses") .distinct() .order_by(desc(Address.email_address)) + .from_self(User) ) eq_([User(id=7), User(id=9), User(id=8)], q.all()) @@ -3931,12 +3932,18 @@ class DistinctTest(QueryTest, AssertsCompiledSQL): sess = create_session() q = ( - sess.query(User.id, User.name.label("foo"), Address.id) + sess.query( + User.id, + User.name.label("foo"), + Address.id, + Address.email_address, + ) .join(Address, true()) .filter(User.name == "jack") .filter(User.id + Address.user_id > 0) .distinct() .order_by(User.id, User.name, Address.email_address) + .from_self(User.id, User.name.label("foo"), Address.id) ) # even though columns are added, they aren't in the result @@ -3959,9 +3966,15 @@ class DistinctTest(QueryTest, AssertsCompiledSQL): sess = create_session() q = ( - sess.query(User.id, User.name.label("foo"), Address.id) + sess.query( + User.id, + User.name.label("foo"), + Address.id, + Address.email_address, + ) .distinct() .order_by(User.id, User.name, Address.email_address) + .from_self(User.id, User.name.label("foo"), Address.id) ) # Address.email_address is added because of DISTINCT, @@ -3969,10 +3982,14 @@ class DistinctTest(QueryTest, AssertsCompiledSQL): # even though User.name is labeled self.assert_compile( q, + "SELECT anon_1.users_id AS anon_1_users_id, anon_1.foo AS foo, " + "anon_1.addresses_id AS anon_1_addresses_id " + "FROM (" "SELECT DISTINCT users.id AS users_id, users.name AS foo, " - "addresses.id AS addresses_id, " - "addresses.email_address AS addresses_email_address FROM users, " - "addresses ORDER BY users.id, users.name, addresses.email_address", + "addresses.id AS addresses_id, addresses.email_address AS " + "addresses_email_address FROM users, addresses ORDER BY " + "users.id, users.name, addresses.email_address" + ") AS anon_1", ) def test_columns_augmented_sql_two(self): -- 2.47.3