From: dan Date: Tue, 12 Jun 2018 18:40:17 +0000 (+0000) Subject: Fix some problems with using window-functions in aggregate queries. X-Git-Tag: version-3.25.0~178^2~29 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7392569f5ee59708ede3dde1d17e0f02ef5b8123;p=thirdparty%2Fsqlite.git Fix some problems with using window-functions in aggregate queries. FossilOrigin-Name: fe7081e0952950f577234fcbb58f3c1efa4579267654fd2f713dc4804e470e7e --- diff --git a/manifest b/manifest index c3a8208d5d..7cc7cd4f7f 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Clarify\sthe\srelationship\sbetween\sa\sWindow\sobject\sand\sits\sassociated\sExpr. -D 2018-06-11T20:50:25.844 +C Fix\ssome\sproblems\swith\susing\swindow-functions\sin\saggregate\squeries. +D 2018-06-12T18:40:17.751 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F Makefile.in 498b77b89a8cb42f2ee20fcd6317f279a45c0d6ff40d27825f94b69884c09bbe @@ -495,7 +495,7 @@ F src/printf.c 7f6f3cba8e0c49c19e30a1ff4e9aeda6e06814dcbad4b664a69e1b6cb6e7e365 F src/random.c 80f5d666f23feb3e6665a6ce04c7197212a88384 F src/resolve.c a8cf3d6144f6a821f002dad72f80659691e827a96e6da6dedf8b263edefe3a80 F src/rowset.c 7b7e7e479212e65b723bf40128c7b36dc5afdfac -F src/select.c 0b0ce29bd7b8a7232e6f7602ddb447caa954a1fc476ff5e23ce1e5aaa6a0e0ed +F src/select.c 224312eb28c1170117c8cef29abc8f6a420b2a60df26543df75632b731ecac8d F src/shell.c.in 4d0ddf10c403710d241bf920163dcf032c21119aebb61e70840942c0eafecdf9 F src/sqlite.h.in 63b07f76731f2b1e55c48fdb9f0508dcc6fbe3971010b8612ffd847c3c56d9a1 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 @@ -583,7 +583,7 @@ F src/where.c fe1a6f97c12cc9472ccce86166ba3f827cf61d6ae770c036a6396b63863baac4 F src/whereInt.h b90ef9b9707ef750eab2a7a080c48fb4900315033274689def32d0cf5a81ebe4 F src/wherecode.c 3317f2b083a66d3e65a03edf316ade4ccb0a99c9956273282ebb579b95d4ba96 F src/whereexpr.c 6f022d6cc9daf56495f191b199352f783aff5cf268ba136b4d8cea3fb62d8c7d -F src/window.c a5ebf5b119f50a1c886a900cf817ada2d9e3cf30633471e1444423424443756c +F src/window.c 5fc1e9a4367bdd6c5afd318a36ec0b1f702fa1e3384621501873ec6b3e94651a F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2 F test/affinity2.test a6d901b436328bd67a79b41bb0ac2663918fe3bd F test/affinity3.test 6a101af2fc945ce2912f6fe54dd646018551710d @@ -1622,8 +1622,8 @@ F test/window2.tcl 0983de5eade5eeda49469244799d5331bfe3199fca3f6c6d2a836aa08f4fb F test/window2.test 79747b2edde4ad424e0752b27529aedc86e91f3d8d88846fa17ff0cb67f65086 F test/window3.tcl 654d61d73e10db089b22514d498bb23ec310f720c0f4b5f69f67fda83d672048 F test/window3.test 41727668ee31d2ba50f78efcb5bf1bda2c5cffd889aa65243511004669d1ac25 -F test/window4.tcl bfea0c4f65dff22568032ae3a0cf3be0910c4160314e5ac3f895eca11b068cb1 -F test/window4.test ca7c63f27604a0eb432cab1673da75498c69d66b9093ef80a5d4c7287b95906b +F test/window4.tcl e64db87bac34d9a726f2b97c40f4adbfc21650a26b7015b18f357062266a0062 +F test/window4.test 36df9adf8b305b427fee8c9604a958e8fecb85c2ba21f6819ad3e87610253001 F test/with1.test 58475190cd8caaeebea8cfeb2a264ec97a0c492b8ffe9ad20cefbb23df462f96 F test/with2.test e0030e2f0267a910d6c0e4f46f2dfe941c1cc0d4f659ba69b3597728e7e8f1ab F test/with3.test 5e8ce2c585170bbbc0544e2a01a4941fa0be173ba5265e5c92eb588cd99a232d @@ -1740,7 +1740,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P b6d9c7eda853420ae46a05bd432711e8bf9ebaa448c7d90ccfc0bcc338a87706 -R 118974de7b2e01c35e697f59c5a4477b +P 0cd55e98a478740032f5569ddc00fa5b0e063e90db6e00ac7598c9b7c2fffeee +R a9e6944f8f988df57ad1f0cd1beda118 U dan -Z 1841ea3c1d7c82f1a89ee2ea7c51ae4b +Z e98e199ae454a383acf0272ef0289805 diff --git a/manifest.uuid b/manifest.uuid index 4bf55168e2..1fb6535a25 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0cd55e98a478740032f5569ddc00fa5b0e063e90db6e00ac7598c9b7c2fffeee \ No newline at end of file +fe7081e0952950f577234fcbb58f3c1efa4579267654fd2f713dc4804e470e7e \ No newline at end of file diff --git a/src/select.c b/src/select.c index 48e1afd4ad..c90a978d1a 100644 --- a/src/select.c +++ b/src/select.c @@ -5464,7 +5464,6 @@ int sqlite3Select( goto select_end; } assert( p->pEList!=0 ); - isAgg = (p->selFlags & SF_Aggregate)!=0; #if SELECTTRACE_ENABLED if( sqlite3SelectTrace & 0x104 ){ SELECTTRACE(0x104,pParse,p, ("after name resolution:\n")); @@ -5486,6 +5485,7 @@ int sqlite3Select( } #endif pTabList = p->pSrc; + isAgg = (p->selFlags & SF_Aggregate)!=0; /* Try to various optimizations (flattening subqueries, and strength ** reduction of join operators) in the FROM clause up into the main query diff --git a/src/window.c b/src/window.c index a627be1bc6..6d66fb82d7 100644 --- a/src/window.c +++ b/src/window.c @@ -12,6 +12,125 @@ */ #include "sqliteInt.h" +/* +** SELECT REWRITING +** +** Any SELECT statement that contains one or more window functions in +** either the select list or ORDER BY clause (the only two places window +** functions may be used) is transformed by function sqlite3WindowRewrite() +** in order to support window function processing. For example, with the +** schema: +** +** CREATE TABLE t1(a, b, c, d, e, f, g); +** +** the statement: +** +** SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM t1 ORDER BY e; +** +** is transformed to: +** +** SELECT a+1, max(b) OVER (PARTITION BY c ORDER BY d) FROM ( +** SELECT a, e, c, d, b FROM t1 ORDER BY c, d +** ) ORDER BY e; +** +** The flattening optimization is disabled when processing this transformed +** SELECT statement. This allows the implementation of the window function +** (in this case max()) to process rows sorted in order of (c, d), which +** makes things easier for obvious reasons. More generally: +** +** * FROM, WHERE, GROUP BY and HAVING clauses are all moved to +** the sub-query. +** +** * ORDER BY, LIMIT and OFFSET remain part of the parent query. +** +** * Terminals from each of the expression trees that make up the +** select-list and ORDER BY expressions in the parent query are +** selected by the sub-query. For the purposes of the transformation, +** terminals are column references and aggregate functions. +** +** If there is more than one window function in the SELECT that uses +** the same window declaration (the OVER bit), then a single scan may +** be used to process more than one window function. For example: +** +** SELECT max(b) OVER (PARTITION BY c ORDER BY d), +** min(e) OVER (PARTITION BY c ORDER BY d) +** FROM t1; +** +** is transformed in the same way as the example above. However: +** +** SELECT max(b) OVER (PARTITION BY c ORDER BY d), +** min(e) OVER (PARTITION BY a ORDER BY b) +** FROM t1; +** +** Must be transformed to: +** +** SELECT max(b) OVER (PARTITION BY c ORDER BY d) FROM ( +** SELECT e, min(e) OVER (PARTITION BY a ORDER BY b), c, d, b FROM +** SELECT a, e, c, d, b FROM t1 ORDER BY a, b +** ) ORDER BY c, d +** ) ORDER BY e; +** +** so that both min() and max() may process rows in the order defined by +** their respective window declarations. +** +** INTERFACE WITH SELECT.C +** +** When processing the rewritten SELECT statement, code in select.c calls +** sqlite3WhereBegin() to begin iterating through the results of the +** sub-query, which is always implemented as a co-routine. It then calls +** sqlite3WindowCodeStep() to process rows and finish the scan by calling +** sqlite3WhereEnd(). +** +** sqlite3WindowCodeStep() generates VM code so that, for each row returned +** by the sub-query a sub-routine (OP_Gosub) coded by select.c is invoked. +** When the sub-routine is invoked: +** +** * The results of all window-functions for the row are stored +** in the associated Window.regResult registers. +** +** * The required terminal values are stored in the current row of +** temp table Window.iEphCsr. +** +** In some cases, depending on the window frame and the specific window +** functions invoked, sqlite3WindowCodeStep() caches each entire partition +** in a temp table before returning any rows. In other cases it does not. +** This detail is encapsulated within this file, the code generated by +** select.c is the same in either case. +** +** BUILT-IN WINDOW FUNCTIONS +** +** This implementation features the following built-in window functions: +** +** row_number() +** rank() +** dense_rank() +** percent_rank() +** cume_dist() +** ntile(N) +** lead(expr [, offset [, default]]) +** lag(expr [, offset [, default]]) +** first_value(expr) +** last_value(expr) +** nth_value(expr, N) +** +** These are the same built-in window functions supported by Postgres. +** Although the behaviour of aggregate window functions (functions that +** can be used as either aggregates or window funtions) allows them to +** be implemented using an API, built-in window functions are much more +** esoteric. Additionally, some window functions (e.g. nth_value()) +** may only be implemented by caching the entire partition in memory. +** As such, some built-in window functions use the same API as aggregate +** window functions and some are implemented directly using VDBE +** instructions. Additionally, for those functions that use the API, the +** window frame is sometimes modified before the SELECT statement is +** rewritten. For example, regardless of the specified window frame, the +** row_number() function always uses: +** +** ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +** +** See sqlite3WindowUpdate() for details. +*/ + /* ** Implementation of built-in window function row_number(). Assumes that the ** window frame has been coerced to: @@ -434,6 +553,7 @@ static int selectWindowRewriteExprCb(Walker *pWalker, Expr *pExpr){ } /* Fall through. */ + case TK_AGG_FUNCTION: case TK_COLUMN: { Expr *pDup = sqlite3ExprDup(pParse->db, pExpr, 0); p->pSub = sqlite3ExprListAppend(pParse, p->pSub, pDup); @@ -595,6 +715,8 @@ int sqlite3WindowRewrite(Parse *pParse, Select *p){ rc = SQLITE_NOMEM; }else{ pSub->selFlags |= SF_Expanded; + p->selFlags &= ~SF_Aggregate; + sqlite3SelectPrep(pParse, pSub, 0); } } diff --git a/test/window4.tcl b/test/window4.tcl index 6a85a2a290..c0eb2b7abb 100644 --- a/test/window4.tcl +++ b/test/window4.tcl @@ -128,6 +128,27 @@ execsql_test 3.6.3 { FROM t5 } +#========================================================================= +execsql_test 4.0 { + DROP TABLE IF EXISTS ttt; + CREATE TABLE ttt(a INTEGER PRIMARY KEY, b INTEGER, c INTEGER); + INSERT INTO ttt VALUES(1, 1, 1); + INSERT INTO ttt VALUES(2, 2, 2); + INSERT INTO ttt VALUES(3, 3, 3); + + INSERT INTO ttt VALUES(4, 1, 2); + INSERT INTO ttt VALUES(5, 2, 3); + INSERT INTO ttt VALUES(6, 3, 4); + + INSERT INTO ttt VALUES(7, 1, 3); + INSERT INTO ttt VALUES(8, 2, 4); + INSERT INTO ttt VALUES(9, 3, 5); +} + +execsql_test 4.1 { + SELECT max(c), max(b) OVER (ORDER BY b) FROM ttt GROUP BY b; +} + finish_test diff --git a/test/window4.test b/test/window4.test index 317824e4c4..307465e843 100644 --- a/test/window4.test +++ b/test/window4.test @@ -211,4 +211,24 @@ do_execsql_test 3.6.3 { FROM t5 } {1 one 2 two 3 three 4 four 5 five} +do_execsql_test 4.0 { + DROP TABLE IF EXISTS ttt; + CREATE TABLE ttt(a INTEGER PRIMARY KEY, b INTEGER, c INTEGER); + INSERT INTO ttt VALUES(1, 1, 1); + INSERT INTO ttt VALUES(2, 2, 2); + INSERT INTO ttt VALUES(3, 3, 3); + + INSERT INTO ttt VALUES(4, 1, 2); + INSERT INTO ttt VALUES(5, 2, 3); + INSERT INTO ttt VALUES(6, 3, 4); + + INSERT INTO ttt VALUES(7, 1, 3); + INSERT INTO ttt VALUES(8, 2, 4); + INSERT INTO ttt VALUES(9, 3, 5); +} {} + +do_execsql_test 4.1 { + SELECT max(c), max(b) OVER (ORDER BY b) FROM ttt GROUP BY b; +} {3 1 4 2 5 3} + finish_test