From 3f9a343d725eea883aba2145de4cbb7b84f9d08c Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Thu, 10 Feb 2011 14:17:08 -0500 Subject: [PATCH] - Query.distinct() now accepts column expressions as *args, interpreted by the Postgresql dialect as DISTINCT ON (). [ticket:1069] - select.distinct() now accepts column expressions as *args, interpreted by the Postgresql dialect as DISTINCT ON (). Note this was already available via passing a list to the `distinct` keyword argument to select(). [ticket:1069] - select.prefix_with() accepts multiple expressions (i.e. *expr), 'prefix' keyword argument to select() accepts a list or tuple. - Passing a string to the `distinct` keyword argument of `select()` for the purpose of emitting special MySQL keywords (DISTINCTROW etc.) is deprecated - use `prefix_with()` for this. - put kw arguments to select() in order - restore docs for _SelectBase, renamed from _SelectBaseMixin --- CHANGES | 19 +++ doc/build/core/expression_api.rst | 2 +- lib/sqlalchemy/dialects/mysql/base.py | 7 + lib/sqlalchemy/dialects/postgresql/base.py | 5 +- lib/sqlalchemy/orm/query.py | 15 +- lib/sqlalchemy/sql/expression.py | 141 ++++++++++++----- test/dialect/test_mysql.py | 36 ++++- test/dialect/test_postgresql.py | 173 +++++++++++++++------ 8 files changed, 300 insertions(+), 98 deletions(-) diff --git a/CHANGES b/CHANGES index ad123388bc..f2f0562082 100644 --- a/CHANGES +++ b/CHANGES @@ -118,10 +118,29 @@ CHANGES in the session. Was a warning in 0.6.6. [ticket:2046] + - Query.distinct() now accepts column expressions + as *args, interpreted by the Postgresql dialect + as DISTINCT ON (). [ticket:1069] + - sql - LIMIT/OFFSET clauses now use bind parameters [ticket:805] + - select.distinct() now accepts column expressions + as *args, interpreted by the Postgresql dialect + as DISTINCT ON (). Note this was already + available via passing a list to the `distinct` + keyword argument to select(). [ticket:1069] + + - select.prefix_with() accepts multiple expressions + (i.e. *expr), 'prefix' keyword argument to select() + accepts a list or tuple. + + - Passing a string to the `distinct` keyword argument + of `select()` for the purpose of emitting special + MySQL keywords (DISTINCTROW etc.) is deprecated - + use `prefix_with()` for this. + - TypeDecorator works with primary key columns [ticket:2005] [ticket:2006] diff --git a/doc/build/core/expression_api.rst b/doc/build/core/expression_api.rst index ef8039ad06..a977e86a5e 100644 --- a/doc/build/core/expression_api.rst +++ b/doc/build/core/expression_api.rst @@ -166,7 +166,7 @@ Classes :members: :show-inheritance: -.. autoclass:: _SelectBaseMixin +.. autoclass:: _SelectBase :members: :show-inheritance: diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py index 271218dba8..21e0312f5a 100644 --- a/lib/sqlalchemy/dialects/mysql/base.py +++ b/lib/sqlalchemy/dialects/mysql/base.py @@ -1176,6 +1176,13 @@ class MySQLCompiler(compiler.SQLCompiler): return value def get_select_precolumns(self, select): + """Add special MySQL keywords in place of DISTINCT. + + .. note:: this usage is deprecated. :meth:`.Select.prefix_with` + should be used for special keywords at the start + of a SELECT. + + """ if isinstance(select._distinct, basestring): return select._distinct.upper() + " " elif select._distinct: diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index 22d269cdf0..8bc95ef482 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -457,11 +457,10 @@ class PGCompiler(compiler.SQLCompiler): return "DISTINCT " elif isinstance(select._distinct, (list, tuple)): return "DISTINCT ON (" + ', '.join( - [(isinstance(col, basestring) and col - or self.process(col)) for col in select._distinct] + [self.process(col) for col in select._distinct] )+ ") " else: - return "DISTINCT ON (" + unicode(select._distinct) + ") " + return "DISTINCT ON (" + self.process(select._distinct) + ") " else: return "" diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index 79b0809277..71a6edd310 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -1609,12 +1609,23 @@ class Query(object): self._offset = offset @_generative(_no_statement_condition) - def distinct(self): + def distinct(self, *criterion): """Apply a ``DISTINCT`` to the query and return the newly resulting ``Query``. + + :param \*expr: optional column expressions. When present, + the Postgresql dialect will render a ``DISTINCT ON (>)`` + construct. """ - self._distinct = True + if not criterion: + self._distinct = True + else: + criterion = self._adapt_col_list(criterion) + if isinstance(self._distinct, list): + self._distinct += criterion + else: + self._distinct = criterion def all(self): """Return the results represented by this ``Query`` as a list. diff --git a/lib/sqlalchemy/sql/expression.py b/lib/sqlalchemy/sql/expression.py index 04ca147bbd..4287216a42 100644 --- a/lib/sqlalchemy/sql/expression.py +++ b/lib/sqlalchemy/sql/expression.py @@ -187,23 +187,38 @@ def select(columns=None, whereclause=None, from_obj=[], **kwargs): Deprecated. Use .execution_options(autocommit=) to set the autocommit option. - :param prefixes: - a list of strings or :class:`ClauseElement` objects to include - directly after the SELECT keyword in the generated statement, - for dialect-specific query features. + :param bind=None: + an :class:`~.base.Engine` or :class:`~.base.Connection` instance + to which the + resulting :class:`.Select` object will be bound. The :class:`.Select` + object will otherwise automatically bind to whatever + :class:`~.base.Connectable` instances can be located within its contained + :class:`.ClauseElement` members. + + :param correlate=True: + indicates that this :class:`Select` object should have its + contained :class:`FromClause` elements "correlated" to an enclosing + :class:`Select` object. This means that any :class:`ClauseElement` + instance within the "froms" collection of this :class:`Select` + which is also present in the "froms" collection of an + enclosing select will not be rendered in the ``FROM`` clause + of this select statement. :param distinct=False: when ``True``, applies a ``DISTINCT`` qualifier to the columns clause of the resulting statement. - - :param use_labels=False: - when ``True``, the statement will be generated using labels - for each column in the columns clause, which qualify each - column with its parent table's (or aliases) name so that name - conflicts between columns in different tables don't occur. - The format of the label is _. The "c" - collection of the resulting :class:`Select` object will use these - names as well for targeting column members. + + The boolean argument may also be a column expression or list + of column expressions - this is a special calling form which + is understood by the Postgresql dialect to render the + ``DISTINCT ON ()`` syntax. + + ``distinct`` is also available via the :meth:`~.Select.distinct` + generative method. + + .. note:: The ``distinct`` keyword's acceptance of a string + argument for usage with MySQL is deprecated. Use + the ``prefixes`` argument or :meth:`~.Select.prefix_with`. :param for_update=False: when ``True``, applies ``FOR UPDATE`` to the end of the @@ -213,15 +228,6 @@ def select(columns=None, whereclause=None, from_obj=[], **kwargs): and oracle supports "nowait" which translates to ``FOR UPDATE NOWAIT``. - :param correlate=True: - indicates that this :class:`Select` object should have its - contained :class:`FromClause` elements "correlated" to an enclosing - :class:`Select` object. This means that any :class:`ClauseElement` - instance within the "froms" collection of this :class:`Select` - which is also present in the "froms" collection of an - enclosing select will not be rendered in the ``FROM`` clause - of this select statement. - :param group_by: a list of :class:`ClauseElement` objects which will comprise the ``GROUP BY`` clause of the resulting select. @@ -230,10 +236,6 @@ def select(columns=None, whereclause=None, from_obj=[], **kwargs): a :class:`ClauseElement` that will comprise the ``HAVING`` clause of the resulting select when ``GROUP BY`` is used. - :param order_by: - a scalar or list of :class:`ClauseElement` objects which will - comprise the ``ORDER BY`` clause of the resulting select. - :param limit=None: a numerical value which usually compiles to a ``LIMIT`` expression in the resulting select. Databases that don't @@ -246,13 +248,29 @@ def select(columns=None, whereclause=None, from_obj=[], **kwargs): support ``OFFSET`` will attempt to provide similar functionality. - :param bind=None: - an ``Engine`` or ``Connection`` instance to which the - resulting ``Select ` object will be bound. The ``Select`` - object will otherwise automatically bind to whatever - ``Connectable`` instances can be located within its contained - :class:`ClauseElement` members. + :param order_by: + a scalar or list of :class:`ClauseElement` objects which will + comprise the ``ORDER BY`` clause of the resulting select. + :param prefixes: + a list of strings or :class:`ClauseElement` objects to include + directly after the SELECT keyword in the generated statement, + for dialect-specific query features. ``prefixes`` is + also available via the :meth:`~.Select.prefix_with` + generative method. + + :param use_labels=False: + when ``True``, the statement will be generated using labels + for each column in the columns clause, which qualify each + column with its parent table's (or aliases) name so that name + conflicts between columns in different tables don't occur. + The format of the label is _. The "c" + collection of the resulting :class:`Select` object will use these + names as well for targeting column members. + + use_labels is also available via the :meth:`~._SelectBase.apply_labels` + generative method. + """ return Select(columns, whereclause=whereclause, from_obj=from_obj, **kwargs) @@ -4085,6 +4103,7 @@ class Select(_SelectBase): _prefixes = () _hints = util.immutabledict() + _distinct = False def __init__(self, columns, @@ -4106,7 +4125,24 @@ class Select(_SelectBase): """ self._should_correlate = correlate - self._distinct = distinct + if distinct is not False: + if isinstance(distinct, basestring): + util.warn_deprecated( + "A string argument passed to the 'distinct' " + "keyword argument of 'select()' is deprecated " + "- please use 'prefixes' or 'prefix_with()' " + "to specify additional prefixes") + if prefixes: + prefixes = util.to_list(prefixes) + [distinct] + else: + prefixes = [distinct] + elif distinct is True: + self._distinct = True + else: + self._distinct = [ + _literal_as_text(e) + for e in util.to_list(distinct) + ] self._correlate = set() self._froms = util.OrderedSet() @@ -4329,21 +4365,44 @@ class Select(_SelectBase): self.append_having(having) @_generative - def distinct(self): - """return a new select() construct which will apply DISTINCT to its + def distinct(self, *expr): + """Return a new select() construct which will apply DISTINCT to its columns clause. - """ - self._distinct = True + :param \*expr: optional column expressions. When present, + the Postgresql dialect will render a ``DISTINCT ON (>)`` + construct. + + """ + if expr: + expr = [_literal_as_text(e) for e in expr] + if isinstance(self._distinct, list): + self._distinct = self._distinct + expr + else: + self._distinct = expr + else: + self._distinct = True @_generative - def prefix_with(self, clause): + def prefix_with(self, *expr): """return a new select() construct which will apply the given - expression to the start of its columns clause, not using any commas. + expressions, typically strings, to the start of its columns clause, + not using any commas. In particular is useful for MySQL + keywords. + + e.g.:: + + select(['a', 'b']).prefix_with('HIGH_PRIORITY', + 'SQL_SMALL_RESULT', + 'ALL') + + Would render:: + + SELECT HIGH_PRIORITY SQL_SMALL_RESULT ALL a, b """ - clause = _literal_as_text(clause) - self._prefixes = self._prefixes + (clause,) + expr = tuple(_literal_as_text(e) for e in expr) + self._prefixes = self._prefixes + expr @_generative def select_from(self, fromclause): diff --git a/test/dialect/test_mysql.py b/test/dialect/test_mysql.py index fd7a15e5b1..9b92ddb092 100644 --- a/test/dialect/test_mysql.py +++ b/test/dialect/test_mysql.py @@ -1023,25 +1023,47 @@ class SQLTest(TestBase, AssertsCompiledSQL): eq_(gen(None), 'SELECT q') eq_(gen(True), 'SELECT DISTINCT q') - eq_(gen(1), 'SELECT DISTINCT q') - eq_(gen('diSTInct'), 'SELECT DISTINCT q') - eq_(gen('DISTINCT'), 'SELECT DISTINCT q') - # Standard SQL - eq_(gen('all'), 'SELECT ALL q') - eq_(gen('distinctrow'), 'SELECT DISTINCTROW q') + assert_raises( + exc.SADeprecationWarning, + gen, 'DISTINCT' + ) + + eq_(gen(prefixes=['ALL']), 'SELECT ALL q') + eq_(gen(prefixes=['DISTINCTROW']), + 'SELECT DISTINCTROW q') # Interaction with MySQL prefix extensions eq_( gen(None, ['straight_join']), 'SELECT straight_join q') eq_( - gen('all', ['HIGH_PRIORITY SQL_SMALL_RESULT']), + gen(False, ['HIGH_PRIORITY', 'SQL_SMALL_RESULT', 'ALL']), 'SELECT HIGH_PRIORITY SQL_SMALL_RESULT ALL q') eq_( gen(True, ['high_priority', sql.text('sql_cache')]), 'SELECT high_priority sql_cache DISTINCT q') + @testing.uses_deprecated + def test_deprecated_distinct(self): + dialect = self.__dialect__ + + self.assert_compile( + select(['q'], distinct='ALL'), + 'SELECT ALL q', + ) + + self.assert_compile( + select(['q'], distinct='distinctROW'), + 'SELECT DISTINCTROW q', + ) + + self.assert_compile( + select(['q'], distinct='ALL', + prefixes=['HIGH_PRIORITY', 'SQL_SMALL_RESULT']), + 'SELECT HIGH_PRIORITY SQL_SMALL_RESULT ALL q' + ) + def test_backslash_escaping(self): self.assert_compile( sql.column('foo').like('bar', escape='\\'), diff --git a/test/dialect/test_postgresql.py b/test/dialect/test_postgresql.py index 10067a6696..5d67e19216 100644 --- a/test/dialect/test_postgresql.py +++ b/test/dialect/test_postgresql.py @@ -1084,24 +1084,125 @@ class DomainReflectionTest(TestBase, AssertsExecutionResults): finally: postgresql.PGDialect.ischema_names = ischema_names +class DistinctOnTest(TestBase, AssertsCompiledSQL): + """Test 'DISTINCT' with SQL expression language and orm.Query with + an emphasis on PG's 'DISTINCT ON' syntax. + + """ + __dialect__ = postgresql.dialect() + + def setup(self): + self.table = Table('t', MetaData(), + Column('id',Integer, primary_key=True), + Column('a', String), + Column('b', String), + ) + + def test_plain_generative(self): + self.assert_compile( + select([self.table]).distinct(), + "SELECT DISTINCT t.id, t.a, t.b FROM t" + ) + + def test_on_columns_generative(self): + self.assert_compile( + select([self.table]).distinct(self.table.c.a), + "SELECT DISTINCT ON (t.a) t.id, t.a, t.b FROM t" + ) + + def test_on_columns_generative_multi_call(self): + self.assert_compile( + select([self.table]).distinct(self.table.c.a). + distinct(self.table.c.b), + "SELECT DISTINCT ON (t.a, t.b) t.id, t.a, t.b FROM t" + ) + + def test_plain_inline(self): + self.assert_compile( + select([self.table], distinct=True), + "SELECT DISTINCT t.id, t.a, t.b FROM t" + ) + + def test_on_columns_inline_list(self): + self.assert_compile( + select([self.table], + distinct=[self.table.c.a, self.table.c.b]). + order_by(self.table.c.a, self.table.c.b), + "SELECT DISTINCT ON (t.a, t.b) t.id, " + "t.a, t.b FROM t ORDER BY t.a, t.b" + ) + + def test_on_columns_inline_scalar(self): + self.assert_compile( + select([self.table], distinct=self.table.c.a), + "SELECT DISTINCT ON (t.a) t.id, t.a, t.b FROM t" + ) + + def test_query_plain(self): + sess = Session() + self.assert_compile( + sess.query(self.table).distinct(), + "SELECT DISTINCT t.id AS t_id, t.a AS t_a, " + "t.b AS t_b FROM t" + ) + + def test_query_on_columns(self): + sess = Session() + self.assert_compile( + sess.query(self.table).distinct(self.table.c.a), + "SELECT DISTINCT ON (t.a) t.id AS t_id, t.a AS t_a, " + "t.b AS t_b FROM t" + ) + + def test_query_on_columns_multi_call(self): + sess = Session() + self.assert_compile( + sess.query(self.table).distinct(self.table.c.a). + distinct(self.table.c.b), + "SELECT DISTINCT ON (t.a, t.b) t.id AS t_id, t.a AS t_a, " + "t.b AS t_b FROM t" + ) + + def test_query_on_columns_subquery(self): + sess = Session() + class Foo(object): + pass + mapper(Foo, self.table) + sess = Session() + self.assert_compile( + sess.query(Foo).from_self().distinct(Foo.a, Foo.b), + "SELECT DISTINCT ON (anon_1.t_a, anon_1.t_b) anon_1.t_id " + "AS anon_1_t_id, anon_1.t_a AS anon_1_t_a, anon_1.t_b " + "AS anon_1_t_b FROM (SELECT t.id AS t_id, t.a AS t_a, " + "t.b AS t_b FROM t) AS anon_1" + ) + + def test_query_distinct_on_aliased(self): + class Foo(object): + pass + mapper(Foo, self.table) + a1 = aliased(Foo) + sess = Session() + self.assert_compile( + sess.query(a1).distinct(a1.a), + "SELECT DISTINCT ON (t_1.a) t_1.id AS t_1_id, " + "t_1.a AS t_1_a, t_1.b AS t_1_b FROM t AS t_1" + ) class MiscTest(TestBase, AssertsExecutionResults, AssertsCompiledSQL): __only_on__ = 'postgresql' + @testing.provide_metadata def test_date_reflection(self): - m1 = MetaData(testing.db) - t1 = Table('pgdate', m1, Column('date1', + t1 = Table('pgdate', metadata, Column('date1', DateTime(timezone=True)), Column('date2', DateTime(timezone=False))) - m1.create_all() - try: - m2 = MetaData(testing.db) - t2 = Table('pgdate', m2, autoload=True) - assert t2.c.date1.type.timezone is True - assert t2.c.date2.type.timezone is False - finally: - m1.drop_all() + metadata.create_all() + m2 = MetaData(testing.db) + t2 = Table('pgdate', m2, autoload=True) + assert t2.c.date1.type.timezone is True + assert t2.c.date2.type.timezone is False @testing.fails_on('+zxjdbc', 'The JDBC driver handles the version parsing') @@ -1192,41 +1293,25 @@ class MiscTest(TestBase, AssertsExecutionResults, AssertsCompiledSQL): finally: t.drop(checkfirst=True) + @testing.provide_metadata def test_renamed_sequence_reflection(self): - m1 = MetaData(testing.db) - t = Table('t', m1, Column('id', Integer, primary_key=True)) - m1.create_all() - try: - m2 = MetaData(testing.db) - t2 = Table('t', m2, autoload=True, implicit_returning=False) - eq_(t2.c.id.server_default.arg.text, - "nextval('t_id_seq'::regclass)") - r = t2.insert().execute() - eq_(r.inserted_primary_key, [1]) - testing.db.connect().execution_options(autocommit=True).\ - execute('alter table t_id_seq rename to foobar_id_seq' - ) - m3 = MetaData(testing.db) - t3 = Table('t', m3, autoload=True, implicit_returning=False) - eq_(t3.c.id.server_default.arg.text, - "nextval('foobar_id_seq'::regclass)") - r = t3.insert().execute() - eq_(r.inserted_primary_key, [2]) - finally: - m1.drop_all() - - def test_distinct_on(self): - t = Table('mytable', MetaData(testing.db), Column('id', - Integer, primary_key=True), Column('a', String(8))) - eq_(str(t.select(distinct=t.c.a)), - 'SELECT DISTINCT ON (mytable.a) mytable.id, mytable.a ' - '\nFROM mytable') - eq_(str(t.select(distinct=['id', 'a'])), - 'SELECT DISTINCT ON (id, a) mytable.id, mytable.a \nFROM ' - 'mytable') - eq_(str(t.select(distinct=[t.c.id, t.c.a])), - 'SELECT DISTINCT ON (mytable.id, mytable.a) mytable.id, ' - 'mytable.a \nFROM mytable') + t = Table('t', metadata, Column('id', Integer, primary_key=True)) + metadata.create_all() + m2 = MetaData(testing.db) + t2 = Table('t', m2, autoload=True, implicit_returning=False) + eq_(t2.c.id.server_default.arg.text, + "nextval('t_id_seq'::regclass)") + r = t2.insert().execute() + eq_(r.inserted_primary_key, [1]) + testing.db.connect().execution_options(autocommit=True).\ + execute('alter table t_id_seq rename to foobar_id_seq' + ) + m3 = MetaData(testing.db) + t3 = Table('t', m3, autoload=True, implicit_returning=False) + eq_(t3.c.id.server_default.arg.text, + "nextval('foobar_id_seq'::regclass)") + r = t3.insert().execute() + eq_(r.inserted_primary_key, [2]) def test_schema_reflection(self): """note: this test requires that the 'test_schema' schema be -- 2.47.2