From 9eafb43c0ff761fa43425d329d281bb8fbece80e Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Wed, 3 Sep 2008 16:53:05 +0000 Subject: [PATCH] - limit/offset no longer uses ROW NUMBER OVER to limit rows, and instead uses subqueries in conjunction with a special Oracle optimization comment. Allows LIMIT/OFFSET to work in conjunction with DISTINCT. [ticket:536] --- CHANGES | 7 +++ lib/sqlalchemy/databases/oracle.py | 58 +++++++++++++----- lib/sqlalchemy/sql/compiler.py | 3 +- test/dialect/oracle.py | 35 ++++++----- test/sql/query.py | 94 ++++++++++++++++++++++-------- 5 files changed, 141 insertions(+), 56 deletions(-) diff --git a/CHANGES b/CHANGES index 8ec9ecc33d..f834ea5d8a 100644 --- a/CHANGES +++ b/CHANGES @@ -137,6 +137,13 @@ CHANGES - Added MSMediumInteger type [ticket:1146]. +- oracle + - limit/offset no longer uses ROW NUMBER OVER to limit rows, + and instead uses subqueries in conjunction with a special + Oracle optimization comment. Allows LIMIT/OFFSET to work + in conjunction with DISTINCT. [ticket:536] + + 0.5beta3 ======== diff --git a/lib/sqlalchemy/databases/oracle.py b/lib/sqlalchemy/databases/oracle.py index 12b22f4456..ed2cb3ef20 100644 --- a/lib/sqlalchemy/databases/oracle.py +++ b/lib/sqlalchemy/databases/oracle.py @@ -654,7 +654,7 @@ class OracleCompiler(compiler.DefaultCompiler): def visit_select(self, select, **kwargs): """Look for ``LIMIT`` and OFFSET in a select statement, and if - so tries to wrap it in a subquery with ``row_number()`` criterion. + so tries to wrap it in a subquery with ``rownum`` criterion. """ if not getattr(select, '_oracle_visit', None): @@ -671,26 +671,54 @@ class OracleCompiler(compiler.DefaultCompiler): select._oracle_visit = True if select._limit is not None or select._offset is not None: - # to use ROW_NUMBER(), an ORDER BY is required. - orderby = self.process(select._order_by_clause) - if not orderby: - orderby = list(select.oid_column.proxies)[0] - orderby = self.process(orderby) - - select = select.column(sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" % orderby).label("ora_rn")).order_by(None) + # See http://www.oracle.com/technology/oramag/oracle/06-sep/o56asktom.html + # + # Generalized form of an Oracle pagination query: + # select ... from ( + # select /*+ FIRST_ROWS(N) */ ...., rownum as ora_rn from ( + # select distinct ... where ... order by ... + # ) where ROWNUM <= :limit+:offset + # ) where ora_rn > :offset + # Outer select and "ROWNUM as ora_rn" can be dropped if limit=0 + + # TODO: use annotations instead of clone + attr set ? + select = select._generate() select._oracle_visit = True - limitselect = sql.select([c for c in select.c if c.key!='ora_rn']) + # Wrap the middle select and add the hint + limitselect = sql.select([c for c in select.c]) + if select._limit: + limitselect = limitselect.prefix_with("/*+ FIRST_ROWS(%d) */" % select._limit) + limitselect._oracle_visit = True limitselect._is_wrapper = True - if select._offset is not None: - limitselect.append_whereclause("ora_rn>%d" % select._offset) - if select._limit is not None: - limitselect.append_whereclause("ora_rn<=%d" % (select._limit + select._offset)) + # If needed, add the limiting clause + if select._limit is not None: + max_row = select._limit + if select._offset is not None: + max_row += select._offset + limitselect.append_whereclause( + sql.literal_column("ROWNUM")<=max_row) + + # If needed, add the ora_rn, and wrap again with offset. + if select._offset is None: + select = limitselect else: - limitselect.append_whereclause("ora_rn<=%d" % select._limit) - select = limitselect + limitselect = limitselect.column( + sql.literal_column("ROWNUM").label("ora_rn")) + limitselect._oracle_visit = True + limitselect._is_wrapper = True + + offsetselect = sql.select( + [c for c in limitselect.c if c.key!='ora_rn']) + offsetselect._oracle_visit = True + offsetselect._is_wrapper = True + + offsetselect.append_whereclause( + sql.literal_column("ora_rn")>select._offset) + + select = offsetselect kwargs['iswrapper'] = getattr(select, '_is_wrapper', False) return compiler.DefaultCompiler.visit_select(self, select, **kwargs) diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index eacbe59e10..2982a17596 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -477,7 +477,8 @@ class DefaultCompiler(engine.Compiled): if asfrom or (prev_entry and 'select' in prev_entry): stack_entry['is_subquery'] = True - if prev_entry and 'iswrapper' in prev_entry: + stack_entry['iswrapper'] = iswrapper + if not iswrapper and prev_entry and 'iswrapper' in prev_entry: column_clause_args = {'result_map':self.result_map} else: column_clause_args = {} diff --git a/test/dialect/oracle.py b/test/dialect/oracle.py index dcc191cdc2..1a7a060b5c 100644 --- a/test/dialect/oracle.py +++ b/test/dialect/oracle.py @@ -60,8 +60,9 @@ class CompileTest(TestBase, AssertsCompiledSQL): s = select([t]).limit(10).offset(20) - self.assert_compile(s, "SELECT col1, col2 FROM (SELECT sometable.col1 AS col1, sometable.col2 AS col2, " - "ROW_NUMBER() OVER (ORDER BY sometable.rowid) AS ora_rn FROM sometable) WHERE ora_rn>20 AND ora_rn<=30" + self.assert_compile(s, "SELECT col1, col2 FROM (SELECT /*+ FIRST_ROWS(10) */ col1, col2, ROWNUM AS ora_rn " + "FROM (SELECT sometable.col1 AS col1, sometable.col2 AS col2 " + "FROM sometable) WHERE ROWNUM <= :ROWNUM_1) WHERE ora_rn > :ora_rn_1" ) # assert that despite the subquery, the columns from the table, @@ -71,17 +72,16 @@ class CompileTest(TestBase, AssertsCompiledSQL): s = select([s.c.col1, s.c.col2]) - self.assert_compile(s, "SELECT col1, col2 FROM (SELECT col1, col2 FROM (SELECT sometable.col1 AS col1, " - "sometable.col2 AS col2, ROW_NUMBER() OVER (ORDER BY sometable.rowid) AS ora_rn FROM sometable) WHERE ora_rn>20 AND ora_rn<=30)") + self.assert_compile(s, "SELECT col1, col2 FROM (SELECT col1, col2 FROM (SELECT /*+ FIRST_ROWS(10) */ col1, col2, ROWNUM AS ora_rn FROM (SELECT sometable.col1 AS col1, sometable.col2 AS col2 FROM sometable) WHERE ROWNUM <= :ROWNUM_1) WHERE ora_rn > :ora_rn_1)") # testing this twice to ensure oracle doesn't modify the original statement - self.assert_compile(s, "SELECT col1, col2 FROM (SELECT col1, col2 FROM (SELECT sometable.col1 AS col1, " - "sometable.col2 AS col2, ROW_NUMBER() OVER (ORDER BY sometable.rowid) AS ora_rn FROM sometable) WHERE ora_rn>20 AND ora_rn<=30)") + self.assert_compile(s, "SELECT col1, col2 FROM (SELECT col1, col2 FROM (SELECT /*+ FIRST_ROWS(10) */ col1, col2, ROWNUM AS ora_rn FROM (SELECT sometable.col1 AS col1, sometable.col2 AS col2 FROM sometable) WHERE ROWNUM <= :ROWNUM_1) WHERE ora_rn > :ora_rn_1)") s = select([t]).limit(10).offset(20).order_by(t.c.col2) - self.assert_compile(s, "SELECT col1, col2 FROM (SELECT sometable.col1 AS col1, " - "sometable.col2 AS col2, ROW_NUMBER() OVER (ORDER BY sometable.col2) AS ora_rn FROM sometable) WHERE ora_rn>20 AND ora_rn<=30") + self.assert_compile(s, "SELECT col1, col2 FROM (SELECT /*+ FIRST_ROWS(10) */ col1, col2, ROWNUM " + "AS ora_rn FROM (SELECT sometable.col1 AS col1, sometable.col2 AS col2 FROM sometable " + "ORDER BY sometable.col2) WHERE ROWNUM <= :ROWNUM_1) WHERE ora_rn > :ora_rn_1") def test_outer_join(self): table1 = table('mytable', @@ -128,13 +128,18 @@ AND mytable.myid = myothertable.otherid(+)", self.assert_compile(query.select(), "SELECT mytable.myid, mytable.name, mytable.description, myothertable.otherid, myothertable.othername, thirdtable.userid, thirdtable.otherstuff FROM mytable, myothertable, thirdtable WHERE thirdtable.userid = myothertable.otherid AND mytable.myid = myothertable.otherid", dialect=oracle.dialect(use_ansi=False)) query = table1.join(table2, table1.c.myid==table2.c.otherid).outerjoin(table3, table3.c.userid==table2.c.otherid) - self.assert_compile(query.select().order_by(table1.oid_column).limit(10).offset(5), "SELECT myid, name, description, otherid, othername, userid, \ -otherstuff FROM (SELECT mytable.myid AS myid, mytable.name AS name, \ -mytable.description AS description, myothertable.otherid AS otherid, \ -myothertable.othername AS othername, thirdtable.userid AS userid, \ -thirdtable.otherstuff AS otherstuff, ROW_NUMBER() OVER (ORDER BY mytable.rowid) AS ora_rn \ -FROM mytable, myothertable, thirdtable WHERE thirdtable.userid(+) = myothertable.otherid AND mytable.myid = myothertable.otherid) \ -WHERE ora_rn>5 AND ora_rn<=15", dialect=oracle.dialect(use_ansi=False)) + + self.assert_compile(query.select().order_by(table1.oid_column).limit(10).offset(5), + + "SELECT myid, name, description, otherid, othername, userid, " + "otherstuff FROM (SELECT /*+ FIRST_ROWS(10) */ myid, name, description, " + "otherid, othername, userid, otherstuff, ROWNUM AS ora_rn FROM (SELECT " + "mytable.myid AS myid, mytable.name AS name, mytable.description AS description, " + "myothertable.otherid AS otherid, myothertable.othername AS othername, " + "thirdtable.userid AS userid, thirdtable.otherstuff AS otherstuff FROM mytable, " + "myothertable, thirdtable WHERE thirdtable.userid(+) = myothertable.otherid AND " + "mytable.myid = myothertable.otherid ORDER BY mytable.rowid) WHERE " + "ROWNUM <= :ROWNUM_1) WHERE ora_rn > :ora_rn_1", dialect=oracle.dialect(use_ansi=False)) def test_alias_outer_join(self): address_types = table('address_types', diff --git a/test/sql/query.py b/test/sql/query.py index 6bed07a9bd..fa247a7b2e 100644 --- a/test/sql/query.py +++ b/test/sql/query.py @@ -317,31 +317,7 @@ class QueryTest(TestBase): print repr(users.select().execute().fetchall()) - def test_select_limit(self): - users.insert().execute(user_id=1, user_name='john') - users.insert().execute(user_id=2, user_name='jack') - users.insert().execute(user_id=3, user_name='ed') - users.insert().execute(user_id=4, user_name='wendy') - users.insert().execute(user_id=5, user_name='laura') - users.insert().execute(user_id=6, user_name='ralph') - users.insert().execute(user_id=7, user_name='fido') - r = users.select(limit=3, order_by=[users.c.user_id]).execute().fetchall() - self.assert_(r == [(1, 'john'), (2, 'jack'), (3, 'ed')], repr(r)) - @testing.crashes('mssql', 'FIXME: guessing') - @testing.fails_on('maxdb') - def test_select_limit_offset(self): - users.insert().execute(user_id=1, user_name='john') - users.insert().execute(user_id=2, user_name='jack') - users.insert().execute(user_id=3, user_name='ed') - users.insert().execute(user_id=4, user_name='wendy') - users.insert().execute(user_id=5, user_name='laura') - users.insert().execute(user_id=6, user_name='ralph') - users.insert().execute(user_id=7, user_name='fido') - r = users.select(limit=3, offset=2, order_by=[users.c.user_id]).execute().fetchall() - self.assert_(r==[(3, 'ed'), (4, 'wendy'), (5, 'laura')]) - r = users.select(offset=5, order_by=[users.c.user_id]).execute().fetchall() - self.assert_(r==[(6, 'ralph'), (7, 'fido')]) @testing.exclude('mysql', '<', (5, 0, 37), 'database bug') def test_scalar_select(self): @@ -556,7 +532,7 @@ class QueryTest(TestBase): finally: shadowed.drop(checkfirst=True) - @testing.fails_on('firebird', 'maxdb') + @testing.fails_on('firebird', 'maxdb', 'oracle') def test_in_filtering(self): """test the behavior of the in_() function.""" @@ -608,6 +584,74 @@ class QueryTest(TestBase): assert len(r) == 1 +class LimitTest(TestBase): + + def setUpAll(self): + global users, addresses, metadata + metadata = MetaData(testing.db) + users = Table('query_users', metadata, + Column('user_id', INT, primary_key = True), + Column('user_name', VARCHAR(20)), + ) + addresses = Table('query_addresses', metadata, + Column('address_id', Integer, primary_key=True), + Column('user_id', Integer, ForeignKey('query_users.user_id')), + Column('address', String(30))) + metadata.create_all() + self._data() + + def _data(self): + users.insert().execute(user_id=1, user_name='john') + addresses.insert().execute(address_id=1, user_id=1, address='addr1') + users.insert().execute(user_id=2, user_name='jack') + addresses.insert().execute(address_id=2, user_id=2, address='addr1') + users.insert().execute(user_id=3, user_name='ed') + addresses.insert().execute(address_id=3, user_id=3, address='addr2') + users.insert().execute(user_id=4, user_name='wendy') + addresses.insert().execute(address_id=4, user_id=4, address='addr3') + users.insert().execute(user_id=5, user_name='laura') + addresses.insert().execute(address_id=5, user_id=5, address='addr4') + users.insert().execute(user_id=6, user_name='ralph') + addresses.insert().execute(address_id=6, user_id=6, address='addr5') + users.insert().execute(user_id=7, user_name='fido') + addresses.insert().execute(address_id=7, user_id=7, address='addr5') + + def tearDownAll(self): + metadata.drop_all() + + def test_select_limit(self): + r = users.select(limit=3, order_by=[users.c.user_id]).execute().fetchall() + self.assert_(r == [(1, 'john'), (2, 'jack'), (3, 'ed')], repr(r)) + + @testing.crashes('mssql', 'FIXME: guessing') + @testing.fails_on('maxdb') + def test_select_limit_offset(self): + r = users.select(limit=3, offset=2, order_by=[users.c.user_id]).execute().fetchall() + self.assert_(r==[(3, 'ed'), (4, 'wendy'), (5, 'laura')]) + r = users.select(offset=5, order_by=[users.c.user_id]).execute().fetchall() + self.assert_(r==[(6, 'ralph'), (7, 'fido')]) + + def test_select_distinct_limit(self): + """Test the interaction between limit and distinct""" + + r = sorted([x[0] for x in select([addresses.c.address]).distinct().limit(3).execute().fetchall()]) + self.assert_(len(r) == 3, repr(r)) + self.assert_(r[0] != r[1] and r[1] != r[2], repr(r)) + + def test_select_distinct_offset(self): + """Test the interaction between limit and offset""" + + r = sorted([x[0] for x in select([addresses.c.address]).distinct().offset(1).execute().fetchall()]) + self.assert_(len(r) == 4, repr(r)) + self.assert_(r[0] != r[1] and r[1] != r[2] and r[2] != [3], repr(r)) + + def test_select_distinct_limit_offset(self): + """Test the interaction between limit and limit/offset""" + + r = select([addresses.c.address]).order_by(addresses.c.address).distinct().offset(2).limit(3).execute().fetchall() + self.assert_(len(r) == 3, repr(r)) + self.assert_(r[0] != r[1] and r[1] != r[2], repr(r)) + class CompoundTest(TestBase): """test compound statements like UNION, INTERSECT, particularly their ability to nest on different databases.""" -- 2.47.3