some issues may be moved to later milestones in order to allow
for a timely release.
- Document last updated: January 29, 2016
+ Document last updated: Feburary 9, 2016
Introduction
============
:ticket:`3081`
+.. _change_3641:
+
+Columns no longer added redundantly with DISTINCT + ORDER BY
+------------------------------------------------------------
+
+A query such as the following will now augment only those columns
+that are missing from the SELECT list, without duplicates::
+
+ q = session.query(User.id, User.name.label('name')).\
+ distinct().\
+ order_by(User.id, User.name, User.fullname)
+
+Produces::
+
+ SELECT DISTINCT user.id AS a_id, user.name AS name,
+ user.fullname AS a_fullname
+ FROM a ORDER BY user.id, user.name, user.fullname
+
+Previously, it would produce::
+
+ SELECT DISTINCT user.id AS a_id, user.name AS name, user.name AS a_name,
+ user.fullname AS a_fullname
+ FROM a ORDER BY user.id, user.name, user.fullname
+
+Where above, the ``user.name`` column is added unnecessarily. The results
+would not be affected, as the additional columns are not included in the
+result in any case, but the columns are unnecessary.
+
+Additionally, when the Postgresql DISTINCT ON format is used by passing
+expressions to :meth:`.Query.distinct`, the above "column adding" logic
+is disabled entirely.
+
+When the query is being bundled into a subquery for the purposes of
+joined eager loading, the "augment column list" rules are are necessarily
+more aggressive so that the ORDER BY can still be satisifed, so this case
+remains unchanged.
+
+:ticket:`3641`
+
New Features and Improvements - Core
====================================
eq_(q.distinct().count(), 3)
-class DistinctTest(QueryTest):
+class DistinctTest(QueryTest, AssertsCompiledSQL):
+ __dialect__ = 'default'
+
def test_basic(self):
User = self.classes.User
order_by(desc(User.name)).all()
)
- def test_joined(self):
- """test that orderbys from a joined table get placed into the columns
- clause when DISTINCT is used"""
-
+ def test_columns_augmented_roundtrip_one(self):
User, Address = self.classes.User, self.classes.Address
sess = create_session()
q = sess.query(User).join('addresses').distinct(). \
order_by(desc(Address.email_address))
- assert [User(id=7), User(id=9), User(id=8)] == q.all()
+ eq_(
+ [User(id=7), User(id=9), User(id=8)],
+ q.all()
+ )
+
+ def test_columns_augmented_roundtrip_two(self):
+ User, Address = self.classes.User, self.classes.Address
- sess.expunge_all()
+ sess = create_session()
# test that it works on embedded joinedload/LIMIT subquery
q = sess.query(User).join('addresses').distinct(). \
] == q.all()
self.assert_sql_count(testing.db, go, 1)
+ def test_columns_augmented_roundtrip_three(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User.id, User.name.label('foo'), Address.id).\
+ filter(User.name == 'jack').\
+ distinct().\
+ order_by(User.id, User.name, Address.email_address)
+
+ # even though columns are added, they aren't in the result
+ eq_(
+ q.all(),
+ [(7, 'jack', 3), (7, 'jack', 4), (7, 'jack', 2),
+ (7, 'jack', 5), (7, 'jack', 1)]
+ )
+ for row in q:
+ eq_(row.keys(), ['id', 'foo', 'id'])
+
+ def test_columns_augmented_sql_one(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User.id, User.name.label('foo'), Address.id).\
+ distinct().\
+ order_by(User.id, User.name, Address.email_address)
+
+ # Address.email_address is added because of DISTINCT,
+ # however User.id, User.name are not b.c. they're already there,
+ # even though User.name is labeled
+ self.assert_compile(
+ q,
+ "SELECT DISTINCT users.id AS users_id, users.name AS foo, "
+ "addresses.id AS addresses_id, "
+ "addresses.email_address AS addresses_email_address FROM users, "
+ "addresses ORDER BY users.id, users.name, addresses.email_address"
+ )
+
+ def test_columns_augmented_sql_two(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User).\
+ options(joinedload(User.addresses)).\
+ distinct().\
+ order_by(User.name, Address.email_address).\
+ limit(5)
+
+ # addresses.email_address is added to inner query so that
+ # it is available in ORDER BY
+ self.assert_compile(
+ q,
+ "SELECT anon_1.users_id AS anon_1_users_id, "
+ "anon_1.users_name AS anon_1_users_name, "
+ "anon_1.addresses_email_address AS "
+ "anon_1_addresses_email_address, "
+ "addresses_1.id AS addresses_1_id, "
+ "addresses_1.user_id AS addresses_1_user_id, "
+ "addresses_1.email_address AS addresses_1_email_address "
+ "FROM (SELECT DISTINCT users.id AS users_id, "
+ "users.name AS users_name, "
+ "addresses.email_address AS addresses_email_address "
+ "FROM users, addresses "
+ "ORDER BY users.name, addresses.email_address "
+ "LIMIT :param_1) AS anon_1 LEFT OUTER JOIN "
+ "addresses AS addresses_1 "
+ "ON anon_1.users_id = addresses_1.user_id "
+ "ORDER BY anon_1.users_name, "
+ "anon_1.addresses_email_address, addresses_1.id"
+ )
+
+ def test_columns_augmented_sql_three(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User.id, User.name.label('foo'), Address.id).\
+ distinct(User.name).\
+ order_by(User.id, User.name, Address.email_address)
+
+ # no columns are added when DISTINCT ON is used
+ self.assert_compile(
+ q,
+ "SELECT DISTINCT ON (users.name) users.id AS users_id, "
+ "users.name AS foo, addresses.id AS addresses_id FROM users, "
+ "addresses ORDER BY users.id, users.name, addresses.email_address",
+ dialect='postgresql'
+ )
+
+ def test_columns_augmented_sql_four(self):
+ User, Address = self.classes.User, self.classes.Address
+
+ sess = create_session()
+
+ q = sess.query(User).join('addresses').\
+ distinct(Address.email_address). \
+ options(joinedload('addresses')).\
+ order_by(desc(Address.email_address)).limit(2)
+
+ # but for the subquery / eager load case, we still need to make
+ # the inner columns available for the ORDER BY even though its
+ # a DISTINCT ON
+ self.assert_compile(
+ q,
+ "SELECT anon_1.users_id AS anon_1_users_id, "
+ "anon_1.users_name AS anon_1_users_name, "
+ "anon_1.addresses_email_address AS "
+ "anon_1_addresses_email_address, "
+ "addresses_1.id AS addresses_1_id, "
+ "addresses_1.user_id AS addresses_1_user_id, "
+ "addresses_1.email_address AS addresses_1_email_address "
+ "FROM (SELECT DISTINCT ON (addresses.email_address) "
+ "users.id AS users_id, users.name AS users_name, "
+ "addresses.email_address AS addresses_email_address "
+ "FROM users JOIN addresses ON users.id = addresses.user_id "
+ "ORDER BY addresses.email_address DESC "
+ "LIMIT %(param_1)s) AS anon_1 "
+ "LEFT OUTER JOIN addresses AS addresses_1 "
+ "ON anon_1.users_id = addresses_1.user_id "
+ "ORDER BY anon_1.addresses_email_address DESC, addresses_1.id",
+ dialect='postgresql'
+ )
+
class PrefixWithTest(QueryTest, AssertsCompiledSQL):