From cdbe3f84ba4656fd54205212b5adcd5ad9c8e8d2 Mon Sep 17 00:00:00 2001
From: Federico Caselli <cfederico87@gmail.com>
Date: Sat, 15 Feb 2020 12:34:37 +0100
Subject: [PATCH] Deprecate add of columns in order by with distinct

Deprecate automatic addition of order by column in a query with a distinct

Fixes: #5134
Change-Id: I467a39379c496be7e84a05f11ba9f8ca2bcc6e32
---
 doc/build/changelog/migration_20.rst       | 35 ++++++++++
 doc/build/changelog/unreleased_14/5134.rst | 11 ++++
 lib/sqlalchemy/orm/query.py                | 36 ++++++----
 lib/sqlalchemy/orm/strategies.py           |  9 +++
 lib/sqlalchemy/testing/assertsql.py        |  9 ++-
 test/orm/test_deprecations.py              | 77 ++++++++++++++++++++++
 test/orm/test_query.py                     | 29 ++++++--
 7 files changed, 185 insertions(+), 21 deletions(-)
 create mode 100644 doc/build/changelog/unreleased_14/5134.rst
diff --git a/doc/build/changelog/migration_20.rst b/doc/build/changelog/migration_20.rst
index f21a5e3a9f..a039d97388 100644
--- a/doc/build/changelog/migration_20.rst
+++ b/doc/build/changelog/migration_20.rst
@@ -533,6 +533,8 @@ equally::
 
         result.scalar()  # first col of first row  (warns if additional rows remain?)
         result.scalars()  # iterator of first col of each row
+        result.scalars(1)  # iterator of second col of each row
+        result.scalars('a')  # iterator of the "a" col of each row
         result.scalars().all()  # same, as a list
 
         result.columns('a', 'b').<anything>  # limit column tuples
@@ -543,6 +545,8 @@ equally::
         # if the result is an ORM result, you could do:
         result.columns(User, Address)   # assuming these are available entities
 
+        # or to get just User as a list
+        result.scalars(User).all()
 
         # index access and slices ?
         result[0].all()  # same as result.scalars().all()
@@ -868,6 +872,37 @@ of the form ``(target, onclause)`` as well::
 By using attributes instead of strings above, the :meth:`.Query.join` method
 no longer needs the almost never-used option of ``from_joinpoint``.
 
+Other ORM Query patterns changed
+=================================
+
+This section will collect various :class:`.Query` patterns and how they work
+in terms of :func:`.future.select`.
+
+.. _migration_20_query_distinct:
+
+Using DISTINCT with additional columns, but only select the entity
+-------------------------------------------------------------------
+
+:class:`.Query` will automatically add columns in the ORDER BY when
+distinct is used.  The following query will select from all User columns
+as well as "address.email_address" but only return User objects::
+
+    result = session.query(User).join(User.addresses).\
+        distinct().order_by(Address.email_address).all()
+
+Relational databases won't allow you to ORDER BY "address.email_address" if
+it isn't also in the columns clause.   But the above query only wants "User"
+objects back.  In 2.0, this very unusual use case is performed explicitly,
+and the limiting of the entities/columns to ``User`` is done on the result::
+
+
+    from sqlalchemy.future import select
+
+    stmt = select(User, Address.email_address).join(User.addresses).\
+        distinct().order_by(Address.email_address)
+
+    result = session.execute(stmt).scalars(User).all()
+
 Transparent Statement Compilation Caching replaces "Baked" queries, works in Core
 ==================================================================================
 
diff --git a/doc/build/changelog/unreleased_14/5134.rst b/doc/build/changelog/unreleased_14/5134.rst
new file mode 100644
index 0000000000..229fd5e2db
--- /dev/null
+++ b/doc/build/changelog/unreleased_14/5134.rst
@@ -0,0 +1,11 @@
+.. change::
+    :tags: orm, bug
+    :tickets: 5134
+
+    Deprecated logic in :meth:`.Query.distinct` that automatically adds
+    columns in the ORDER BY clause to the columns clause; this will be removed
+    in 2.0.
+
+    .. seealso::
+
+        :ref:`migration_20_query_distinct`
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index 617bba3150..24d8975e41 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -3071,15 +3071,18 @@ class Query(Generative):
 
         .. note::
 
-            The :meth:`.distinct` call includes logic that will automatically
-            add columns from the ORDER BY of the query to the columns
-            clause of the SELECT statement, to satisfy the common need
-            of the database backend that ORDER BY columns be part of the
-            SELECT list when DISTINCT is used.   These columns *are not*
-            added to the list of columns actually fetched by the
-            :class:`.Query`, however, so would not affect results.
-            The columns are passed through when using the
-            :attr:`.Query.statement` accessor, however.
+            The ORM-level :meth:`.distinct` call includes logic that will
+            automatically add columns from the ORDER BY of the query to the
+            columns clause of the SELECT statement, to satisfy the common need
+            of the database backend that ORDER BY columns be part of the SELECT
+            list when DISTINCT is used.   These columns *are not* added to the
+            list of columns actually fetched by the :class:`.Query`, however,
+            so would not affect results. The columns are passed through when
+            using the :attr:`.Query.statement` accessor, however.
+
+            .. deprecated:: 2.0  This logic is deprecated and will be removed
+               in SQLAlchemy 2.0.     See :ref:`migration_20_query_distinct`
+               for a description of this use case in 2.0.
 
         :param \*expr: optional column expressions.  When present,
          the PostgreSQL dialect will render a ``DISTINCT ON (<expressions>)``
@@ -3994,9 +3997,18 @@ class Query(Generative):
             context.order_by = None
 
         if self._distinct is True and context.order_by:
-            context.primary_columns += (
-                sql_util.expand_column_list_from_order_by
-            )(context.primary_columns, context.order_by)
+            to_add = sql_util.expand_column_list_from_order_by(
+                context.primary_columns, context.order_by
+            )
+            if to_add:
+                util.warn_deprecated_20(
+                    "ORDER BY columns added implicitly due to "
+                    "DISTINCT is deprecated and will be removed in "
+                    "SQLAlchemy 2.0.  SELECT statements with DISTINCT "
+                    "should be written to explicitly include the appropriate "
+                    "columns in the columns clause"
+                )
+            context.primary_columns += to_add
         context.froms += tuple(context.eager_joins.values())
 
         statement = sql.select(
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 0946370821..4da6015305 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -1235,6 +1235,15 @@ class SubqueryLoader(PostLoader):
         if q._limit is None and q._offset is None:
             q._order_by = None
 
+        if q._distinct is True and q._order_by:
+            # the logic to automatically add the order by columns to the query
+            # when distinct is True is deprecated in the query
+            to_add = sql_util.expand_column_list_from_order_by(
+                target_cols, q._order_by
+            )
+            if to_add:
+                q._set_entities(target_cols + to_add)
+
         # the original query now becomes a subquery
         # which we'll join onto.
 
diff --git a/lib/sqlalchemy/testing/assertsql.py b/lib/sqlalchemy/testing/assertsql.py
index f0da694007..8876c23041 100644
--- a/lib/sqlalchemy/testing/assertsql.py
+++ b/lib/sqlalchemy/testing/assertsql.py
@@ -184,8 +184,8 @@ class CompiledSQL(SQLMatchRule):
 
     def _failure_message(self, expected_params):
         return (
-            "Testing for compiled statement %r partial params %s, "
-            "received %%(received_statement)r with params "
+            "Testing for compiled statement\n%r partial params %s, "
+            "received\n%%(received_statement)r with params "
             "%%(received_parameters)r"
             % (
                 self.statement.replace("%", "%%"),
@@ -343,6 +343,9 @@ class SQLExecuteObserved(object):
         self.parameters = _distill_params(multiparams, params)
         self.statements = []
 
+    def __repr__(self):
+        return str(self.statements)
+
 
 class SQLCursorExecuteObserved(
     collections.namedtuple(
@@ -373,7 +376,7 @@ class SQLAsserter(object):
             elif rule.errormessage:
                 assert False, rule.errormessage
         if observed:
-            assert False, "Additional SQL statements remain"
+            assert False, "Additional SQL statements remain:\n%s" % observed
         elif not rule.is_consumed:
             rule.no_more_statements()
 
diff --git a/test/orm/test_deprecations.py b/test/orm/test_deprecations.py
index 4efff1f769..475e1c4ec9 100644
--- a/test/orm/test_deprecations.py
+++ b/test/orm/test_deprecations.py
@@ -1,5 +1,6 @@
 import sqlalchemy as sa
 from sqlalchemy import and_
+from sqlalchemy import desc
 from sqlalchemy import event
 from sqlalchemy import func
 from sqlalchemy import Integer
@@ -7,6 +8,7 @@ from sqlalchemy import select
 from sqlalchemy import String
 from sqlalchemy import testing
 from sqlalchemy import text
+from sqlalchemy import true
 from sqlalchemy.ext.declarative import comparable_using
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import aliased
@@ -2287,3 +2289,78 @@ class TestDeprecation20(fixtures.TestBase):
     def test_eagerloading(self):
         with testing.expect_deprecated_20(".*joinedload"):
             eagerload("foo")
+
+
+class DistinctOrderByImplicitTest(QueryTest, AssertsCompiledSQL):
+    __dialect__ = "default"
+
+    def test_columns_augmented_roundtrip_one(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+        q = (
+            sess.query(User)
+            .join("addresses")
+            .distinct()
+            .order_by(desc(Address.email_address))
+        )
+        with testing.expect_deprecated(
+            "ORDER BY columns added implicitly due to "
+        ):
+            eq_([User(id=7), User(id=9), User(id=8)], q.all())
+
+    def test_columns_augmented_roundtrip_three(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = (
+            sess.query(User.id, User.name.label("foo"), Address.id)
+            .join(Address, true())
+            .filter(User.name == "jack")
+            .filter(User.id + Address.user_id > 0)
+            .distinct()
+            .order_by(User.id, User.name, Address.email_address)
+        )
+
+        # even though columns are added, they aren't in the result
+        with testing.expect_deprecated(
+            "ORDER BY columns added implicitly due to "
+        ):
+            eq_(
+                q.all(),
+                [
+                    (7, "jack", 3),
+                    (7, "jack", 4),
+                    (7, "jack", 2),
+                    (7, "jack", 5),
+                    (7, "jack", 1),
+                ],
+            )
+            for row in q:
+                eq_(row._mapping.keys(), ["id", "foo", "id"])
+
+    def test_columns_augmented_sql_one(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = (
+            sess.query(User.id, User.name.label("foo"), Address.id)
+            .distinct()
+            .order_by(User.id, User.name, Address.email_address)
+        )
+
+        # Address.email_address is added because of DISTINCT,
+        # however User.id, User.name are not b.c. they're already there,
+        # even though User.name is labeled
+        with testing.expect_deprecated(
+            "ORDER BY columns added implicitly due to "
+        ):
+            self.assert_compile(
+                q,
+                "SELECT DISTINCT users.id AS users_id, users.name AS foo, "
+                "addresses.id AS addresses_id, addresses.email_address AS "
+                "addresses_email_address FROM users, addresses "
+                "ORDER BY users.id, users.name, addresses.email_address",
+            )
diff --git a/test/orm/test_query.py b/test/orm/test_query.py
index 2fb79604a7..7d49aceba7 100644
--- a/test/orm/test_query.py
+++ b/test/orm/test_query.py
@@ -3894,10 +3894,11 @@ class DistinctTest(QueryTest, AssertsCompiledSQL):
 
         sess = create_session()
         q = (
-            sess.query(User)
+            sess.query(User, Address.email_address)
             .join("addresses")
             .distinct()
             .order_by(desc(Address.email_address))
+            .from_self(User)
         )
 
         eq_([User(id=7), User(id=9), User(id=8)], q.all())
@@ -3931,12 +3932,18 @@ class DistinctTest(QueryTest, AssertsCompiledSQL):
         sess = create_session()
 
         q = (
-            sess.query(User.id, User.name.label("foo"), Address.id)
+            sess.query(
+                User.id,
+                User.name.label("foo"),
+                Address.id,
+                Address.email_address,
+            )
             .join(Address, true())
             .filter(User.name == "jack")
             .filter(User.id + Address.user_id > 0)
             .distinct()
             .order_by(User.id, User.name, Address.email_address)
+            .from_self(User.id, User.name.label("foo"), Address.id)
         )
 
         # even though columns are added, they aren't in the result
@@ -3959,9 +3966,15 @@ class DistinctTest(QueryTest, AssertsCompiledSQL):
         sess = create_session()
 
         q = (
-            sess.query(User.id, User.name.label("foo"), Address.id)
+            sess.query(
+                User.id,
+                User.name.label("foo"),
+                Address.id,
+                Address.email_address,
+            )
             .distinct()
             .order_by(User.id, User.name, Address.email_address)
+            .from_self(User.id, User.name.label("foo"), Address.id)
         )
 
         # Address.email_address is added because of DISTINCT,
@@ -3969,10 +3982,14 @@ class DistinctTest(QueryTest, AssertsCompiledSQL):
         # even though User.name is labeled
         self.assert_compile(
             q,
+            "SELECT anon_1.users_id AS anon_1_users_id, anon_1.foo AS foo, "
+            "anon_1.addresses_id AS anon_1_addresses_id "
+            "FROM ("
             "SELECT DISTINCT users.id AS users_id, users.name AS foo, "
-            "addresses.id AS addresses_id, "
-            "addresses.email_address AS addresses_email_address FROM users, "
-            "addresses ORDER BY users.id, users.name, addresses.email_address",
+            "addresses.id AS addresses_id, addresses.email_address AS "
+            "addresses_email_address FROM users, addresses ORDER BY "
+            "users.id, users.name, addresses.email_address"
+            ") AS anon_1",
         )
 
     def test_columns_augmented_sql_two(self):
-- 
2.47.3