Deprecate add of columns in order by with distinct

author Federico Caselli <cfederico87@gmail.com>

Sat, 15 Feb 2020 11:34:37 +0000 (12:34 +0100)

committer Mike Bayer <mike_mp@zzzcomputing.com>

Wed, 25 Mar 2020 14:49:27 +0000 (10:49 -0400)
author Federico Caselli <cfederico87@gmail.com>
Sat, 15 Feb 2020 11:34:37 +0000 (12:34 +0100)
committer Mike Bayer <mike_mp@zzzcomputing.com>
Wed, 25 Mar 2020 14:49:27 +0000 (10:49 -0400)
diff --git a/doc/build/changelog/migration_20.rst b/doc/build/changelog/migration_20.rst

index f21a5e3a9fb405aa2c035844ecf1a7a03ecfff57..a039d973889f804462277065566dbf0c52441908 100644 (file)
--- a/doc/build/changelog/migration_20.rst
+++ b/doc/build/changelog/migration_20.rst
@@ -533,6 +533,8 @@ equally::
  
          result.scalar()  # first col of first row  (warns if additional rows remain?)
          result.scalars()  # iterator of first col of each row
+        result.scalars(1)  # iterator of second col of each row
+        result.scalars('a')  # iterator of the "a" col of each row
          result.scalars().all()  # same, as a list
  
          result.columns('a', 'b').<anything>  # limit column tuples
@@ -543,6 +545,8 @@ equally::
          # if the result is an ORM result, you could do:
          result.columns(User, Address)   # assuming these are available entities
  
+        # or to get just User as a list
+        result.scalars(User).all()
  
          # index access and slices ?
          result[0].all()  # same as result.scalars().all()
@@ -868,6 +872,37 @@ of the form ``(target, onclause)`` as well::
  By using attributes instead of strings above, the :meth:`.Query.join` method
  no longer needs the almost never-used option of ``from_joinpoint``.
  
+Other ORM Query patterns changed
+=================================
+
+This section will collect various :class:`.Query` patterns and how they work
+in terms of :func:`.future.select`.
+
+.. _migration_20_query_distinct:
+
+Using DISTINCT with additional columns, but only select the entity
+-------------------------------------------------------------------
+
+:class:`.Query` will automatically add columns in the ORDER BY when
+distinct is used.  The following query will select from all User columns
+as well as "address.email_address" but only return User objects::
+
+    result = session.query(User).join(User.addresses).\
+        distinct().order_by(Address.email_address).all()
+
+Relational databases won't allow you to ORDER BY "address.email_address" if
+it isn't also in the columns clause.   But the above query only wants "User"
+objects back.  In 2.0, this very unusual use case is performed explicitly,
+and the limiting of the entities/columns to ``User`` is done on the result::
+
+
+    from sqlalchemy.future import select
+
+    stmt = select(User, Address.email_address).join(User.addresses).\
+        distinct().order_by(Address.email_address)
+
+    result = session.execute(stmt).scalars(User).all()
+
  Transparent Statement Compilation Caching replaces "Baked" queries, works in Core
  ==================================================================================
  
diff --git a/doc/build/changelog/unreleased_14/5134.rst b/doc/build/changelog/unreleased_14/5134.rst

new file mode 100644 (file)

index 0000000..229fd5e
--- /dev/null
+++ b/doc/build/changelog/unreleased_14/5134.rst
@@ -0,0 +1,11 @@
+.. change::
+    :tags: orm, bug
+    :tickets: 5134
+
+    Deprecated logic in :meth:`.Query.distinct` that automatically adds
+    columns in the ORDER BY clause to the columns clause; this will be removed
+    in 2.0.
+
+    .. seealso::
+
+        :ref:`migration_20_query_distinct`
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py

index 617bba315015d713f3caab39c9d185fb28806f15..24d8975e41cbcb0239428de94b193e8e453b3a8c 100644 (file)
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -3071,15 +3071,18 @@ class Query(Generative):
  
          .. note::
  
-            The :meth:`.distinct` call includes logic that will automatically
-            add columns from the ORDER BY of the query to the columns
-            clause of the SELECT statement, to satisfy the common need
-            of the database backend that ORDER BY columns be part of the
-            SELECT list when DISTINCT is used.   These columns *are not*
-            added to the list of columns actually fetched by the
-            :class:`.Query`, however, so would not affect results.
-            The columns are passed through when using the
-            :attr:`.Query.statement` accessor, however.
+            The ORM-level :meth:`.distinct` call includes logic that will
+            automatically add columns from the ORDER BY of the query to the
+            columns clause of the SELECT statement, to satisfy the common need
+            of the database backend that ORDER BY columns be part of the SELECT
+            list when DISTINCT is used.   These columns *are not* added to the
+            list of columns actually fetched by the :class:`.Query`, however,
+            so would not affect results. The columns are passed through when
+            using the :attr:`.Query.statement` accessor, however.
+
+            .. deprecated:: 2.0  This logic is deprecated and will be removed
+               in SQLAlchemy 2.0.     See :ref:`migration_20_query_distinct`
+               for a description of this use case in 2.0.
  
          :param \*expr: optional column expressions.  When present,
           the PostgreSQL dialect will render a ``DISTINCT ON (<expressions>)``
@@ -3994,9 +3997,18 @@ class Query(Generative):
              context.order_by = None
  
          if self._distinct is True and context.order_by:
-            context.primary_columns += (
-                sql_util.expand_column_list_from_order_by
-            )(context.primary_columns, context.order_by)
+            to_add = sql_util.expand_column_list_from_order_by(
+                context.primary_columns, context.order_by
+            )
+            if to_add:
+                util.warn_deprecated_20(
+                    "ORDER BY columns added implicitly due to "
+                    "DISTINCT is deprecated and will be removed in "
+                    "SQLAlchemy 2.0.  SELECT statements with DISTINCT "
+                    "should be written to explicitly include the appropriate "
+                    "columns in the columns clause"
+                )
+            context.primary_columns += to_add
          context.froms += tuple(context.eager_joins.values())
  
          statement = sql.select(
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py

index 094637082158eab3978c9c7e7a6390c5826d0ceb..4da60153059cde24e0bf457a10b5481606a1e843 100644 (file)
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -1235,6 +1235,15 @@ class SubqueryLoader(PostLoader):
          if q._limit is None and q._offset is None:
              q._order_by = None
  
+        if q._distinct is True and q._order_by:
+            # the logic to automatically add the order by columns to the query
+            # when distinct is True is deprecated in the query
+            to_add = sql_util.expand_column_list_from_order_by(
+                target_cols, q._order_by
+            )
+            if to_add:
+                q._set_entities(target_cols + to_add)
+
          # the original query now becomes a subquery
          # which we'll join onto.
  
diff --git a/lib/sqlalchemy/testing/assertsql.py b/lib/sqlalchemy/testing/assertsql.py

index f0da694007e42c3ce11f9d68812b0a4fd6d85f5f..8876c23041a3e3d4d23445d44e00be5382f8de6d 100644 (file)
--- a/lib/sqlalchemy/testing/assertsql.py
+++ b/lib/sqlalchemy/testing/assertsql.py
@@ -184,8 +184,8 @@ class CompiledSQL(SQLMatchRule):
  
      def _failure_message(self, expected_params):
          return (
-            "Testing for compiled statement %r partial params %s, "
-            "received %%(received_statement)r with params "
+            "Testing for compiled statement\n%r partial params %s, "
+            "received\n%%(received_statement)r with params "
              "%%(received_parameters)r"
              % (
                  self.statement.replace("%", "%%"),
@@ -343,6 +343,9 @@ class SQLExecuteObserved(object):
          self.parameters = _distill_params(multiparams, params)
          self.statements = []
  
+    def __repr__(self):
+        return str(self.statements)
+
  
  class SQLCursorExecuteObserved(
      collections.namedtuple(
@@ -373,7 +376,7 @@ class SQLAsserter(object):
              elif rule.errormessage:
                  assert False, rule.errormessage
          if observed:
-            assert False, "Additional SQL statements remain"
+            assert False, "Additional SQL statements remain:\n%s" % observed
          elif not rule.is_consumed:
              rule.no_more_statements()
  
diff --git a/test/orm/test_deprecations.py b/test/orm/test_deprecations.py

index 4efff1f7697e507ed63154c04099a64ad22457bd..475e1c4ec946e848608b22a16370b9c382b77297 100644 (file)
--- a/test/orm/test_deprecations.py
+++ b/test/orm/test_deprecations.py
@@ -1,5 +1,6 @@
  import sqlalchemy as sa
  from sqlalchemy import and_
+from sqlalchemy import desc
  from sqlalchemy import event
  from sqlalchemy import func
  from sqlalchemy import Integer
@@ -7,6 +8,7 @@ from sqlalchemy import select
  from sqlalchemy import String
  from sqlalchemy import testing
  from sqlalchemy import text
+from sqlalchemy import true
  from sqlalchemy.ext.declarative import comparable_using
  from sqlalchemy.ext.declarative import declarative_base
  from sqlalchemy.orm import aliased
@@ -2287,3 +2289,78 @@ class TestDeprecation20(fixtures.TestBase):
      def test_eagerloading(self):
          with testing.expect_deprecated_20(".*joinedload"):
              eagerload("foo")
+
+
+class DistinctOrderByImplicitTest(QueryTest, AssertsCompiledSQL):
+    __dialect__ = "default"
+
+    def test_columns_augmented_roundtrip_one(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+        q = (
+            sess.query(User)
+            .join("addresses")
+            .distinct()
+            .order_by(desc(Address.email_address))
+        )
+        with testing.expect_deprecated(
+            "ORDER BY columns added implicitly due to "
+        ):
+            eq_([User(id=7), User(id=9), User(id=8)], q.all())
+
+    def test_columns_augmented_roundtrip_three(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = (
+            sess.query(User.id, User.name.label("foo"), Address.id)
+            .join(Address, true())
+            .filter(User.name == "jack")
+            .filter(User.id + Address.user_id > 0)
+            .distinct()
+            .order_by(User.id, User.name, Address.email_address)
+        )
+
+        # even though columns are added, they aren't in the result
+        with testing.expect_deprecated(
+            "ORDER BY columns added implicitly due to "
+        ):
+            eq_(
+                q.all(),
+                [
+                    (7, "jack", 3),
+                    (7, "jack", 4),
+                    (7, "jack", 2),
+                    (7, "jack", 5),
+                    (7, "jack", 1),
+                ],
+            )
+            for row in q:
+                eq_(row._mapping.keys(), ["id", "foo", "id"])
+
+    def test_columns_augmented_sql_one(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        q = (
+            sess.query(User.id, User.name.label("foo"), Address.id)
+            .distinct()
+            .order_by(User.id, User.name, Address.email_address)
+        )
+
+        # Address.email_address is added because of DISTINCT,
+        # however User.id, User.name are not b.c. they're already there,
+        # even though User.name is labeled
+        with testing.expect_deprecated(
+            "ORDER BY columns added implicitly due to "
+        ):
+            self.assert_compile(
+                q,
+                "SELECT DISTINCT users.id AS users_id, users.name AS foo, "
+                "addresses.id AS addresses_id, addresses.email_address AS "
+                "addresses_email_address FROM users, addresses "
+                "ORDER BY users.id, users.name, addresses.email_address",
+            )
diff --git a/test/orm/test_query.py b/test/orm/test_query.py

index 2fb79604a7692ad1d849e86c9f5f3d125bd56b64..7d49aceba7fa866d208c002ddbead04ec27b45b5 100644 (file)
--- a/test/orm/test_query.py
+++ b/test/orm/test_query.py
@@ -3894,10 +3894,11 @@ class DistinctTest(QueryTest, AssertsCompiledSQL):
  
          sess = create_session()
          q = (
-            sess.query(User)
+            sess.query(User, Address.email_address)
              .join("addresses")
              .distinct()
              .order_by(desc(Address.email_address))
+            .from_self(User)
          )
  
          eq_([User(id=7), User(id=9), User(id=8)], q.all())
@@ -3931,12 +3932,18 @@ class DistinctTest(QueryTest, AssertsCompiledSQL):
          sess = create_session()
  
          q = (
-            sess.query(User.id, User.name.label("foo"), Address.id)
+            sess.query(
+                User.id,
+                User.name.label("foo"),
+                Address.id,
+                Address.email_address,
+            )
              .join(Address, true())
              .filter(User.name == "jack")
              .filter(User.id + Address.user_id > 0)
              .distinct()
              .order_by(User.id, User.name, Address.email_address)
+            .from_self(User.id, User.name.label("foo"), Address.id)
          )
  
          # even though columns are added, they aren't in the result
@@ -3959,9 +3966,15 @@ class DistinctTest(QueryTest, AssertsCompiledSQL):
          sess = create_session()
  
          q = (
-            sess.query(User.id, User.name.label("foo"), Address.id)
+            sess.query(
+                User.id,
+                User.name.label("foo"),
+                Address.id,
+                Address.email_address,
+            )
              .distinct()
              .order_by(User.id, User.name, Address.email_address)
+            .from_self(User.id, User.name.label("foo"), Address.id)
          )
  
          # Address.email_address is added because of DISTINCT,
@@ -3969,10 +3982,14 @@ class DistinctTest(QueryTest, AssertsCompiledSQL):
          # even though User.name is labeled
          self.assert_compile(
              q,
+            "SELECT anon_1.users_id AS anon_1_users_id, anon_1.foo AS foo, "
+            "anon_1.addresses_id AS anon_1_addresses_id "
+            "FROM ("
              "SELECT DISTINCT users.id AS users_id, users.name AS foo, "
-            "addresses.id AS addresses_id, "
-            "addresses.email_address AS addresses_email_address FROM users, "
-            "addresses ORDER BY users.id, users.name, addresses.email_address",
+            "addresses.id AS addresses_id, addresses.email_address AS "
+            "addresses_email_address FROM users, addresses ORDER BY "
+            "users.id, users.name, addresses.email_address"
+            ") AS anon_1",
          )
  
      def test_columns_augmented_sql_two(self):
author	Federico Caselli <cfederico87@gmail.com>
	Sat, 15 Feb 2020 11:34:37 +0000 (12:34 +0100)
committer	Mike Bayer <mike_mp@zzzcomputing.com>
	Wed, 25 Mar 2020 14:49:27 +0000 (10:49 -0400)
doc/build/changelog/migration_20.rst		patch \| blob \| blame \| history
doc/build/changelog/unreleased_14/5134.rst	[new file with mode: 0644]	patch \| blob
lib/sqlalchemy/orm/query.py		patch \| blob \| blame \| history
lib/sqlalchemy/orm/strategies.py		patch \| blob \| blame \| history
lib/sqlalchemy/testing/assertsql.py		patch \| blob \| blame \| history
test/orm/test_deprecations.py		patch \| blob \| blame \| history
test/orm/test_query.py		patch \| blob \| blame \| history