From: drh <> Date: Wed, 8 Oct 2025 16:29:14 +0000 (+0000) Subject: This is how the percentile() family of functions might be integrated into X-Git-Tag: major-release~99^2~3 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ba5cce9f0878f93993d44a173946d4643af07f73;p=thirdparty%2Fsqlite.git This is how the percentile() family of functions might be integrated into the amalgamation, should we elect to do such a thing. FossilOrigin-Name: c2cfc9c92e35105e26a4c0887b0401ace15a874fd18046135de4b375ba968795 --- diff --git a/Makefile.msc b/Makefile.msc index d850923ceb..0814c8ada9 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -1655,7 +1655,6 @@ TESTEXT = \ $(TOP)\ext\misc\mmapwarm.c \ $(TOP)\ext\misc\nextchar.c \ $(TOP)\ext\misc\normalize.c \ - $(TOP)\ext\misc\percentile.c \ $(TOP)\ext\misc\prefixes.c \ $(TOP)\ext\misc\qpvtab.c \ $(TOP)\ext\misc\randomjson.c \ @@ -1824,7 +1823,6 @@ FUZZCHECK_SRC = $(FUZZCHECK_SRC) $(TOP)\test\fuzzinvariants.c FUZZCHECK_SRC = $(FUZZCHECK_SRC) $(TOP)\test\vt02.c FUZZCHECK_SRC = $(FUZZCHECK_SRC) $(TOP)\ext\recover\dbdata.c FUZZCHECK_SRC = $(FUZZCHECK_SRC) $(TOP)\ext\recover\sqlite3recover.c -FUZZCHECK_SRC = $(FUZZCHECK_SRC) $(TOP)\ext\misc\percentile.c FUZZCHECK_SRC = $(FUZZCHECK_SRC) $(TOP)\ext\misc\randomjson.c OSSSHELL_SRC = $(TOP)\test\ossshell.c $(TOP)\test\ossfuzz.c @@ -2388,7 +2386,6 @@ SHELL_DEP = \ $(TOP)\ext\misc\ieee754.c \ $(TOP)\ext\misc\memtrace.c \ $(TOP)\ext\misc\pcachetrace.c \ - $(TOP)\ext\misc\percentile.c \ $(TOP)\ext\misc\regexp.c \ $(TOP)\ext\misc\series.c \ $(TOP)\ext\misc\sha1.c \ diff --git a/main.mk b/main.mk index 3704176e1b..0fb75e9aad 100644 --- a/main.mk +++ b/main.mk @@ -809,7 +809,6 @@ TESTSRC += \ $(TOP)/ext/misc/mmapwarm.c \ $(TOP)/ext/misc/nextchar.c \ $(TOP)/ext/misc/normalize.c \ - $(TOP)/ext/misc/percentile.c \ $(TOP)/ext/misc/prefixes.c \ $(TOP)/ext/misc/qpvtab.c \ $(TOP)/ext/misc/randomjson.c \ @@ -1011,7 +1010,6 @@ FUZZCHECK_SRC += $(TOP)/test/fuzzinvariants.c FUZZCHECK_SRC += $(TOP)/ext/recover/dbdata.c FUZZCHECK_SRC += $(TOP)/ext/recover/sqlite3recover.c FUZZCHECK_SRC += $(TOP)/test/vt02.c -FUZZCHECK_SRC += $(TOP)/ext/misc/percentile.c FUZZCHECK_SRC += $(TOP)/ext/misc/randomjson.c DBFUZZ_OPT = ST_OPT = -DSQLITE_OS_KV_OPTIONAL @@ -2362,7 +2360,6 @@ SHELL_DEP = \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/memtrace.c \ $(TOP)/ext/misc/pcachetrace.c \ - $(TOP)/ext/misc/percentile.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/series.c \ $(TOP)/ext/misc/sha1.c \ diff --git a/manifest b/manifest index 2a34646fa7..5a33c363ee 100644 --- a/manifest +++ b/manifest @@ -1,12 +1,12 @@ -C Were\swe\sto\schoose\sto\sintegrate\sthe\scarray()\stable-valued\sfunction\sinto\sthe\namalgamation,\sthat\sintegration\smight\slook\ssomething\slike\sthis. -D 2025-10-08T15:32:55.195 +C This\sis\show\sthe\spercentile()\sfamily\sof\sfunctions\smight\sbe\sintegrated\sinto\nthe\samalgamation,\sshould\swe\select\sto\sdo\ssuch\sa\sthing. +D 2025-10-08T16:29:14.772 F .fossil-settings/binary-glob 61195414528fb3ea9693577e1980230d78a1f8b0a54c78cf1b9b24d0a409ed6a x F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md e108e1e69ae8e8a59e93c455654b8ac9356a11720d3345df2a4743e9590fb20d F Makefile.in 3ce07126d7e87c7464301482e161fdae6a51d0a2aa06b200b8f0000ef4d6163b F Makefile.linux-generic bd3e3cacd369821a6241d4ea1967395c962dfe3057e38cb0a435cee0e8b789d0 -F Makefile.msc a401fdcb7553e04f6e1ec4af0cfe43e0e2bfeff09efdfb6e13be482523a7387a +F Makefile.msc e4df474d652d526dd4cbd37aeb5ca35d191a70d9c6131cec538c904b212fd324 F README.md e28077cfbef795e99c9c75ed95aa7257a1166709b562076441a8506ac421b7c1 F VERSION 16eddb43056a79c1977427ab7a05f3457c373fa159dcdced8754eb89ce7e06b8 F art/icon-243x273.gif 9750b734f82fdb3dc43127753d5e6fbf3b62c9f4e136c2fbf573b2f57ea87af5 @@ -657,7 +657,7 @@ F ext/wasm/tests/opfs/sahpool/index.html be736567fd92d3ecb9754c145755037cbbd2bca F ext/wasm/tests/opfs/sahpool/sahpool-pausing.js f264925cfc82155de38cecb3d204c36e0f6991460fff0cb7c15079454679a4e2 F ext/wasm/tests/opfs/sahpool/sahpool-worker.js bd25a43fc2ab2d1bafd8f2854ad3943ef673f7c3be03e95ecf1612ff6e8e2a61 F magic.txt 5ade0bc977aa135e79e3faaea894d5671b26107cc91e70783aa7dc83f22f3ba0 -F main.mk a97cb12fad3905f2b823c86a17599a2e1baeda10988ac28ab5e8b4b286aa3542 +F main.mk 28270ff27e4257efca52880b8beccff3a644a92ff76bd93e1b27cd20aa7ebc4a F mptest/config01.test 3c6adcbc50b991866855f1977ff172eb6d901271 F mptest/config02.test 4415dfe36c48785f751e16e32c20b077c28ae504 F mptest/crash01.test 61e61469e257df0850df4293d7d4d6c2af301421 @@ -679,7 +679,7 @@ F src/btree.h e823c46d87f63d904d735a24b76146d19f51f04445ea561f71cc3382fd1307f0 F src/btreeInt.h 9c0f9ea5c9b5f4dcaea18111d43efe95f2ac276cd86d770dce10fd99ccc93886 F src/build.c b014128de988a83bc8a2ad3d778018b698aeb06bff0089b7d263a3a45be8e8bf F src/callback.c acae8c8dddda41ee85cfdf19b926eefe830f371069f8aadca3aa39adf5b1c859 -F src/carray.c 104766efc2199f3afa23bd42ef207d07de5b3287531352fb86476d3c85939109 w ext/misc/carray.c +F src/carray.c 104766efc2199f3afa23bd42ef207d07de5b3287531352fb86476d3c85939109 F src/complete.c a3634ab1e687055cd002e11b8f43eb75c17da23e F src/date.c e19e0cfff9a41bfdd884c655755f6f00bca4c1a22272b56e0dd6667b7ea893a2 F src/dbpage.c 081c59d84f187aa0eb48d98faf9578a00bde360f68438d646a86b618653d2479 @@ -688,7 +688,7 @@ F src/delete.c 03a77ba20e54f0f42ebd8eddf15411ed6bdb06a2c472ac4b6b336521bf7cea42 F src/expr.c 4d63c8f6d50fe20637de8bdaf57757a0e424e4ac5e2c3313e621d64727a48a1c F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007 F src/fkey.c 928ed2517e8732113d2b9821aa37af639688d752f4ea9ac6e0e393d713eeb76f -F src/func.c 8a42be1ee66e6257858d0de257033344bf143dbcf2cead69c4b4ccab7976f6b1 +F src/func.c 24040085a0986f6efc4f77194b946568ef1345655efca7a1b5bf46f50c4f49ea F src/global.c a19e4b1ca1335f560e9560e590fc13081e21f670643367f99cb9e8f9dc7d615b F src/hash.c 73934a7f7ab1cb110614a9388cb516893b0cf5b7b69e4fd1a0780ac4ce166be7 F src/hash.h 46b92795a95bfefb210f52f0c316e9d7cdbcdd7e7fcfb0d8be796d3a5767cddf @@ -735,7 +735,7 @@ F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c F src/resolve.c f8d1d011aba0964ff1bdccd049d4d2c2fec217efd90d202a4bb775e926b2c25d F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97 F src/select.c b95181711d59c36d9789e67f76c4cfec64b99f9629a50be5e6566e117b87d957 -F src/shell.c.in a7f6691c887e7efc1a8466c03d14447dd44e9542642c0d2a5f8366047ba70247 +F src/shell.c.in 03c988b6b089628fd0333fb517cf579453e464851941757cf249371332a5edbc F src/sqlite.h.in 5e15c4eec691b4e26a7a740627b4ebe0dc8f75565e6484500992c62fe528674b F src/sqlite3.rc 015537e6ac1eec6c7050e17b616c2ffe6f70fca241835a84a4f0d5937383c479 F src/sqlite3ext.h 3f0c4ed6934e7309a61c6f3c30f70a30a5b869f785bb3d9f721a36c5e4359126 @@ -745,7 +745,7 @@ F src/status.c 0e72e4f6be6ccfde2488eb63210297e75f569f3ce9920f6c3d77590ec6ce5ffd F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1 F src/tclsqlite.c 3c604c49e6cf4211960a9ddb9505280fd22cde32175f40884c641c0f5a286036 F src/tclsqlite.h 614b3780a62522bc9f8f2b9fb22689e8009958e7aa77e572d0f3149050af348a -F src/test1.c c7c0f304ffa927fd31dac97e48bce5ec8f9466fe5b9e7e303f2a18e1c7b53139 +F src/test1.c 2f326c29fac4b345ab107b07a57b142c08e9bf09165ae8ed0a67b0ba10f7be1b F src/test2.c 62f0830958f9075692c29c6de51b495ae8969e1bef85f239ffcd9ba5fb44a5ff F src/test3.c 432646f581d8af1bb495e58fc98234380250954f5d5535e507fc785eccc3987a F src/test4.c 0ac87fc13cdb334ab3a71823f99b6c32a6bebe5d603cd6a71d84c823d43a25a0 @@ -802,10 +802,10 @@ F src/utf.c 7267c3fb9e2467020507601af3354c2446c61f444387e094c779dccd5ca62165 F src/util.c 36fb1150062957280777655976f3f9a75db236cb8207a0770ceae8d5ec17fcd3 F src/vacuum.c 1bacdd0a81d2b5dc1c508fbf0d938c89fa78dd8d5b46ec92686d44030d4f4789 F src/vdbe.c 07084aa0152c8d4e74e7b86c0cf744732f0f9b1fd62bd7098252cadb741c630a -F src/vdbe.h ea1f1b52f0efe422f80d88da3c57e4eadc72856e29a22f1ff08e502ec6ba5f08 +F src/vdbe.h d47e3891dba2259fc74a5deb114578f94e467cdade51153faca252ea5b5e6790 F src/vdbeInt.h 52896dd4d5b62190c53db14b09fc2484434eb594c963df0fa66eb8a94527b02e F src/vdbeapi.c f9a4881a9674fec3fa13da35044a1484d3c4b95f9ec891cc8ffb02ef2b7a41df -F src/vdbeaux.c b701e5920fe74b907eb8211d1f63fef96adc65dfd6e1ad6ed0843c71d8c65205 +F src/vdbeaux.c d2020a310d065f49c9d76291f912effa6b083ae6a8cbc0fd9d9e8b2f6735e507 F src/vdbeblob.c b3f0640db9642fbdc88bd6ebcc83d6009514cafc98f062f675f2c8d505d82692 F src/vdbemem.c e67d9c6484d868c879d20c70d00bf4a9058082f1d4058607ca15d50eb3aebc21 F src/vdbesort.c cb6f472e83ca12c46aa7de0ac0a9d11458b357986f2617a1c90dfb19a542ecbe @@ -1237,7 +1237,7 @@ F test/fuzz3.test 70ba57260364b83e964707b9d4b5625284239768ab907dd387c740c0370ce3 F test/fuzz4.test c229bcdb45518a89e1d208a21343e061503460ac69fae1539320a89f572eb634 F test/fuzz_common.tcl b7197de6ed1ee8250a4f82d67876f4561b42ee8cbbfc6160dcb66331bad3f830 F test/fuzz_malloc.test f348276e732e814802e39f042b1f6da6362a610af73a528d8f76898fde6b22f2 -F test/fuzzcheck.c c2d8a1fd5762bc3bea337360cbd98f481137f533ae9db563a1e581dfbe2901ba +F test/fuzzcheck.c 6cb29fbcf02b666fc99a0069d0026728738157d2c26ba2007bbc88010036b618 F test/fuzzdata1.db 3e86d9cf5aea68ddb8e27c02d7dfdaa226347426c7eb814918e4d95475bf8517 F test/fuzzdata2.db 128b3feeb78918d075c9b14b48610145a0dd4c8d6f1ca7c2870c7e425f5bf31f F test/fuzzdata3.db c6586d3e3cef0fbc18108f9bb649aa77bfc38aba @@ -1490,7 +1490,7 @@ F test/parser1.test 131f4733472252d53d8ed681115257866f55740ab697fa05900d76604934 F test/pcache.test c8acbedd3b6fd0f9a7ca887a83b11d24a007972b F test/pcache2.test af7f3deb1a819f77a6d0d81534e97d1cf62cd442 F test/pendingrace.test e99efc5ab3584da3dfc8cd6a0ec4e5a42214820574f5ea24ee93f1d84655f463 -F test/percentile.test 52ba89d6ee6b65f770972b67dace358bab7cdbd532803d3db157845268e789cd +F test/percentile.test eaee1ff3e35d5fe933ac98d927c9a7d2f4f1a1a19ee22e7d45e97a6d9ee32077 F test/permutations.test e6de4f5777f7785737ac3d1d964b8656e5477a134665b2fe8a91884ab9b685b3 F test/pg_common.tcl 3b27542224db1e713ae387459b5d117c836a5f6e328846922993b6d2b7640d9f F test/pragma.test 7d07b7bb76e273215d6a20c4f83c3062cc28976c737ccb70a686025801e86c8f @@ -2169,11 +2169,8 @@ F tool/version-info.c 33d0390ef484b3b1cb685d59362be891ea162123cea181cb8e6d2cf6dd F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh 1ad0169b022b280bcaaf94a7fa231591be96b514230ab5c98fbf15cd7df842dd F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8bf337e6e609dd38de6002f2d3d3ebf52dd0462ed98a50e4b002a7c6860d30c5 -R 1ad26611eede06cd84cba8f9535e3010 -T *branch * ext-to-core -T *sym-ext-to-core * -T -sym-trunk * +P 2b43d5d7dcc5aee14a050c824a5e09b6a38a9c78ddcc25a994eba5d4c5ad9ba2 +R 7399c53b55454580299093f70cc998e4 U drh -Z a83e0018e9cc2d7f757c0280294b2197 +Z 0e8a13b71992926ffe6a7764cecb4f69 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 6d06686020..7464b763a6 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2b43d5d7dcc5aee14a050c824a5e09b6a38a9c78ddcc25a994eba5d4c5ad9ba2 +c2cfc9c92e35105e26a4c0887b0401ace15a874fd18046135de4b375ba968795 diff --git a/src/func.c b/src/func.c index 49cb197b53..0795b4b98f 100644 --- a/src/func.c +++ b/src/func.c @@ -2636,6 +2636,450 @@ static void signFunc( sqlite3_result_int(context, x<0.0 ? -1 : x>0.0 ? +1 : 0); } +/*********************************************************************** +** This section implements the percentile(Y,P) SQL function and similar. +** Requirements: +** +** (1) The percentile(Y,P) function is an aggregate function taking +** exactly two arguments. +** +** (2) If the P argument to percentile(Y,P) is not the same for every +** row in the aggregate then an error is thrown. The word "same" +** in the previous sentence means that the value differ by less +** than 0.001. +** +** (3) If the P argument to percentile(Y,P) evaluates to anything other +** than a number in the range of 0.0 to 100.0 inclusive then an +** error is thrown. +** +** (4) If any Y argument to percentile(Y,P) evaluates to a value that +** is not NULL and is not numeric then an error is thrown. +** +** (5) If any Y argument to percentile(Y,P) evaluates to plus or minus +** infinity then an error is thrown. (SQLite always interprets NaN +** values as NULL.) +** +** (6) Both Y and P in percentile(Y,P) can be arbitrary expressions, +** including CASE WHEN expressions. +** +** (7) The percentile(Y,P) aggregate is able to handle inputs of at least +** one million (1,000,000) rows. +** +** (8) If there are no non-NULL values for Y, then percentile(Y,P) +** returns NULL. +** +** (9) If there is exactly one non-NULL value for Y, the percentile(Y,P) +** returns the one Y value. +** +** (10) If there N non-NULL values of Y where N is two or more and +** the Y values are ordered from least to greatest and a graph is +** drawn from 0 to N-1 such that the height of the graph at J is +** the J-th Y value and such that straight lines are drawn between +** adjacent Y values, then the percentile(Y,P) function returns +** the height of the graph at P*(N-1)/100. +** +** (11) The percentile(Y,P) function always returns either a floating +** point number or NULL. +** +** (12) The percentile(Y,P) is implemented as a single C99 source-code +** file that compiles into a shared-library or DLL that can be loaded +** into SQLite using the sqlite3_load_extension() interface. +** +** (13) A separate median(Y) function is the equivalent percentile(Y,50). +** +** (14) A separate percentile_cont(Y,P) function is equivalent to +** percentile(Y,P/100.0). In other words, the fraction value in +** the second argument is in the range of 0 to 1 instead of 0 to 100. +** +** (15) A separate percentile_disc(Y,P) function is like +** percentile_cont(Y,P) except that instead of returning the weighted +** average of the nearest two input values, it returns the next lower +** value. So the percentile_disc(Y,P) will always return a value +** that was one of the inputs. +** +** (16) All of median(), percentile(Y,P), percentile_cont(Y,P) and +** percentile_disc(Y,P) can be used as window functions. +** +** Differences from standard SQL: +** +** * The percentile_cont(X,P) function is equivalent to the following in +** standard SQL: +** +** (percentile_cont(P) WITHIN GROUP (ORDER BY X)) +** +** The SQLite syntax is much more compact. The standard SQL syntax +** is also supported if SQLite is compiled with the +** -DSQLITE_ENABLE_ORDERED_SET_AGGREGATES option. +** +** * No median(X) function exists in the SQL standard. App developers +** are expected to write "percentile_cont(0.5)WITHIN GROUP(ORDER BY X)". +** +** * No percentile(Y,P) function exists in the SQL standard. Instead of +** percential(Y,P), developers must write this: +** "percentile_cont(P/100.0) WITHIN GROUP (ORDER BY Y)". Note that +** the fraction parameter to percentile() goes from 0 to 100 whereas +** the fraction parameter in SQL standard percentile_cont() goes from +** 0 to 1. +** +** Implementation notes as of 2024-08-31: +** +** * The regular aggregate-function versions of these routines work +** by accumulating all values in an array of doubles, then sorting +** that array using quicksort before computing the answer. Thus +** the runtime is O(NlogN) where N is the number of rows of input. +** +** * For the window-function versions of these routines, the array of +** inputs is sorted as soon as the first value is computed. Thereafter, +** the array is kept in sorted order using an insert-sort. This +** results in O(N*K) performance where K is the size of the window. +** One can imagine alternative implementations that give O(N*logN*logK) +** performance, but they require more complex logic and data structures. +** The developers have elected to keep the asymptotically slower +** algorithm for now, for simplicity, under the theory that window +** functions are seldom used and when they are, the window size K is +** often small. The developers might revisit that decision later, +** should the need arise. +*/ + +/* The following object is the group context for a single percentile() +** aggregate. Remember all input Y values until the very end. +** Those values are accumulated in the Percentile.a[] array. +*/ +typedef struct Percentile Percentile; +struct Percentile { + unsigned nAlloc; /* Number of slots allocated for a[] */ + unsigned nUsed; /* Number of slots actually used in a[] */ + char bSorted; /* True if a[] is already in sorted order */ + char bKeepSorted; /* True if advantageous to keep a[] sorted */ + char bPctValid; /* True if rPct is valid */ + double rPct; /* Fraction. 0.0 to 1.0 */ + double *a; /* Array of Y values */ +}; + +/* +** Return TRUE if the input floating-point number is an infinity. +*/ +static int percentIsInfinity(double r){ + sqlite3_uint64 u; + assert( sizeof(u)==sizeof(r) ); + memcpy(&u, &r, sizeof(u)); + return ((u>>52)&0x7ff)==0x7ff; +} + +/* +** Return TRUE if two doubles differ by 0.001 or less. +*/ +static int percentSameValue(double a, double b){ + a -= b; + return a>=-0.001 && a<=0.001; +} + +/* +** Search p (which must have p->bSorted) looking for an entry with +** value y. Return the index of that entry. +** +** If bExact is true, return -1 if the entry is not found. +** +** If bExact is false, return the index at which a new entry with +** value y should be insert in order to keep the values in sorted +** order. The smallest return value in this case will be 0, and +** the largest return value will be p->nUsed. +*/ +static int percentBinarySearch(Percentile *p, double y, int bExact){ + int iFirst = 0; /* First element of search range */ + int iLast = p->nUsed - 1; /* Last element of search range */ + while( iLast>=iFirst ){ + int iMid = (iFirst+iLast)/2; + double x = p->a[iMid]; + if( xy ){ + iLast = iMid - 1; + }else{ + return iMid; + } + } + if( bExact ) return -1; + return iFirst; +} + +/* +** Generate an error for a percentile function. +** +** The error format string must have exactly one occurrence of "%%s()" +** (with two '%' characters). That substring will be replaced by the name +** of the function. +*/ +static void percentError(sqlite3_context *pCtx, const char *zFormat, ...){ + char *zMsg1; + char *zMsg2; + va_list ap; + + va_start(ap, zFormat); + zMsg1 = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + zMsg2 = zMsg1 ? sqlite3_mprintf(zMsg1, sqlite3VdbeFuncName(pCtx)) : 0; + sqlite3_result_error(pCtx, zMsg2, -1); + sqlite3_free(zMsg1); + sqlite3_free(zMsg2); +} + +/* +** The "step" function for percentile(Y,P) is called once for each +** input row. +*/ +static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){ + Percentile *p; + double rPct; + int eType; + double y; + assert( argc==2 || argc==1 ); + + if( argc==1 ){ + /* Requirement 13: median(Y) is the same as percentile(Y,50). */ + rPct = 0.5; + }else{ + /* P must be a number between 0 and 100 for percentile() or between + ** 0.0 and 1.0 for percentile_cont() and percentile_disc(). + ** + ** The user-data is an integer which is 10 times the upper bound. + */ + double mxFrac = (SQLITE_PTR_TO_INT(sqlite3_user_data(pCtx))&2)? 100.0 : 1.0; + eType = sqlite3_value_numeric_type(argv[1]); + rPct = sqlite3_value_double(argv[1])/mxFrac; + if( (eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT) + || rPct<0.0 || rPct>1.0 + ){ + percentError(pCtx, "the fraction argument to %%s()" + " is not between 0.0 and %.1f", + (double)mxFrac); + return; + } + } + + /* Allocate the session context. */ + p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p)); + if( p==0 ) return; + + /* Remember the P value. Throw an error if the P value is different + ** from any prior row, per Requirement (2). */ + if( !p->bPctValid ){ + p->rPct = rPct; + p->bPctValid = 1; + }else if( !percentSameValue(p->rPct,rPct) ){ + percentError(pCtx, "the fraction argument to %%s()" + " is not the same for all input rows"); + return; + } + + /* Ignore rows for which Y is NULL */ + eType = sqlite3_value_type(argv[0]); + if( eType==SQLITE_NULL ) return; + + /* If not NULL, then Y must be numeric. Otherwise throw an error. + ** Requirement 4 */ + if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){ + percentError(pCtx, "input to %%s() is not numeric"); + return; + } + + /* Throw an error if the Y value is infinity or NaN */ + y = sqlite3_value_double(argv[0]); + if( percentIsInfinity(y) ){ + percentError(pCtx, "Inf input to %%s()"); + return; + } + + /* Allocate and store the Y */ + if( p->nUsed>=p->nAlloc ){ + unsigned n = p->nAlloc*2 + 250; + double *a = sqlite3_realloc64(p->a, sizeof(double)*n); + if( a==0 ){ + sqlite3_free(p->a); + memset(p, 0, sizeof(*p)); + sqlite3_result_error_nomem(pCtx); + return; + } + p->nAlloc = n; + p->a = a; + } + if( p->nUsed==0 ){ + p->a[p->nUsed++] = y; + p->bSorted = 1; + }else if( !p->bSorted || y>=p->a[p->nUsed-1] ){ + p->a[p->nUsed++] = y; + }else if( p->bKeepSorted ){ + int i; + i = percentBinarySearch(p, y, 0); + if( i<(int)p->nUsed ){ + memmove(&p->a[i+1], &p->a[i], (p->nUsed-i)*sizeof(p->a[0])); + } + p->a[i] = y; + p->nUsed++; + }else{ + p->a[p->nUsed++] = y; + p->bSorted = 0; + } +} + +/* +** Interchange two doubles. +*/ +#define SWAP_DOUBLE(X,Y) {double ttt=(X);(X)=(Y);(Y)=ttt;} + +/* +** Sort an array of doubles. +** +** Algorithm: quicksort +** +** This is implemented separately rather than using the qsort() routine +** from the standard library because: +** +** (1) To avoid a dependency on qsort() +** (2) To avoid the function call to the comparison routine for each +** comparison. +*/ +static void percentSort(double *a, unsigned int n){ + int iLt; /* Entries before a[iLt] are less than rPivot */ + int iGt; /* Entries at or after a[iGt] are greater than rPivot */ + int i; /* Loop counter */ + double rPivot; /* The pivot value */ + + assert( n>=2 ); + if( a[0]>a[n-1] ){ + SWAP_DOUBLE(a[0],a[n-1]) + } + if( n==2 ) return; + iGt = n-1; + i = n/2; + if( a[0]>a[i] ){ + SWAP_DOUBLE(a[0],a[i]) + }else if( a[i]>a[iGt] ){ + SWAP_DOUBLE(a[i],a[iGt]) + } + if( n==3 ) return; + rPivot = a[i]; + iLt = i = 1; + do{ + if( a[i]iLt ) SWAP_DOUBLE(a[i],a[iLt]) + iLt++; + i++; + }else if( a[i]>rPivot ){ + do{ + iGt--; + }while( iGt>i && a[iGt]>rPivot ); + SWAP_DOUBLE(a[i],a[iGt]) + }else{ + i++; + } + }while( i=2 ) percentSort(a, iLt); + if( n-iGt>=2 ) percentSort(a+iGt, n-iGt); + +/* Uncomment for testing */ +#if 0 + for(i=0; ibSorted==0 ){ + assert( p->nUsed>1 ); + percentSort(p->a, p->nUsed); + p->bSorted = 1; + } + p->bKeepSorted = 1; + + /* Find and remove the row */ + i = percentBinarySearch(p, y, 1); + if( i>=0 ){ + p->nUsed--; + if( i<(int)p->nUsed ){ + memmove(&p->a[i], &p->a[i+1], (p->nUsed - i)*sizeof(p->a[0])); + } + } +} + +/* +** Compute the final output of percentile(). Clean up all allocated +** memory if and only if bIsFinal is true. +*/ +static void percentCompute(sqlite3_context *pCtx, int bIsFinal){ + Percentile *p; + int settings = SQLITE_PTR_TO_INT(sqlite3_user_data(pCtx))&1; /* Discrete? */ + unsigned i1, i2; + double v1, v2; + double ix, vx; + p = (Percentile*)sqlite3_aggregate_context(pCtx, 0); + if( p==0 ) return; + if( p->a==0 ) return; + if( p->nUsed ){ + if( p->bSorted==0 ){ + assert( p->nUsed>1 ); + percentSort(p->a, p->nUsed); + p->bSorted = 1; + } + ix = p->rPct*(p->nUsed-1); + i1 = (unsigned)ix; + if( settings & 1 ){ + vx = p->a[i1]; + }else{ + i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1; + v1 = p->a[i1]; + v2 = p->a[i2]; + vx = v1 + (v2-v1)*(ix-i1); + } + sqlite3_result_double(pCtx, vx); + } + if( bIsFinal ){ + sqlite3_free(p->a); + memset(p, 0, sizeof(*p)); + }else{ + p->bKeepSorted = 1; + } +} +static void percentFinal(sqlite3_context *pCtx){ + percentCompute(pCtx, 1); +} +static void percentValue(sqlite3_context *pCtx){ + percentCompute(pCtx, 0); +} +/****** End of percentile family of functions ******/ + + #ifdef SQLITE_DEBUG /* ** Implementation of fpdecode(x,y,z) function. @@ -2865,6 +3309,19 @@ void sqlite3RegisterBuiltinFunctions(void){ groupConcatFinalize, groupConcatValue, groupConcatInverse, 0), WAGGREGATE(string_agg, 2, 0, 0, groupConcatStep, groupConcatFinalize, groupConcatValue, groupConcatInverse, 0), + + WAGGREGATE(median, 1, 0,0, percentStep, + percentFinal, percentValue, percentInverse, + SQLITE_INNOCUOUS|SQLITE_SELFORDER1), + WAGGREGATE(percentile, 2, 0x2,0, percentStep, + percentFinal, percentValue, percentInverse, + SQLITE_INNOCUOUS|SQLITE_SELFORDER1), + WAGGREGATE(percentile_cont, 2, 0,0, percentStep, + percentFinal, percentValue, percentInverse, + SQLITE_INNOCUOUS|SQLITE_SELFORDER1), + WAGGREGATE(percentile_disc, 2, 0x1,0, percentStep, + percentFinal, percentValue, percentInverse, + SQLITE_INNOCUOUS|SQLITE_SELFORDER1), LIKEFUNC(glob, 2, &globInfo, SQLITE_FUNC_LIKE|SQLITE_FUNC_CASE), #ifdef SQLITE_CASE_SENSITIVE_LIKE @@ -2917,6 +3374,7 @@ void sqlite3RegisterBuiltinFunctions(void){ INLINE_FUNC(coalesce, -4, INLINEFUNC_coalesce, 0 ), INLINE_FUNC(iif, -4, INLINEFUNC_iif, 0 ), INLINE_FUNC(if, -4, INLINEFUNC_iif, 0 ), + }; #ifndef SQLITE_OMIT_ALTERTABLE sqlite3AlterFunctions(); diff --git a/src/shell.c.in b/src/shell.c.in index 0f7f9710c6..df80872853 100644 --- a/src/shell.c.in +++ b/src/shell.c.in @@ -1398,7 +1398,6 @@ INCLUDE ../ext/misc/shathree.c INCLUDE ../ext/misc/sha1.c INCLUDE ../ext/misc/uint.c INCLUDE ../ext/misc/decimal.c -INCLUDE ../ext/misc/percentile.c #undef sqlite3_base_init #define sqlite3_base_init sqlite3_base64_init INCLUDE ../ext/misc/base64.c @@ -5873,7 +5872,6 @@ static void open_db(ShellState *p, int openFlags){ sqlite3_uint_init(p->db, 0, 0); sqlite3_stmtrand_init(p->db, 0, 0); sqlite3_decimal_init(p->db, 0, 0); - sqlite3_percentile_init(p->db, 0, 0); sqlite3_base64_init(p->db, 0, 0); sqlite3_base85_init(p->db, 0, 0); sqlite3_regexp_init(p->db, 0, 0); diff --git a/src/test1.c b/src/test1.c index 331fc657dd..285165a574 100644 --- a/src/test1.c +++ b/src/test1.c @@ -8298,7 +8298,6 @@ static int SQLITE_TCLAPI tclLoadStaticExtensionCmd( extern int sqlite3_fuzzer_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_ieee_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_nextchar_init(sqlite3*,char**,const sqlite3_api_routines*); - extern int sqlite3_percentile_init(sqlite3*,char**,const sqlite3_api_routines*); #ifndef SQLITE_OMIT_VIRTUALTABLE extern int sqlite3_prefixes_init(sqlite3*,char**,const sqlite3_api_routines*); #endif @@ -8331,7 +8330,6 @@ static int SQLITE_TCLAPI tclLoadStaticExtensionCmd( { "fuzzer", sqlite3_fuzzer_init }, { "ieee754", sqlite3_ieee_init }, { "nextchar", sqlite3_nextchar_init }, - { "percentile", sqlite3_percentile_init }, #ifndef SQLITE_OMIT_VIRTUALTABLE { "prefixes", sqlite3_prefixes_init }, #endif diff --git a/src/vdbe.h b/src/vdbe.h index 9a9cf3dec1..3e6465698d 100644 --- a/src/vdbe.h +++ b/src/vdbe.h @@ -300,6 +300,7 @@ void sqlite3VdbeSetVarmask(Vdbe*, int); #endif int sqlite3MemCompare(const Mem*, const Mem*, const CollSeq*); int sqlite3BlobCompare(const Mem*, const Mem*); +const char *sqlite3VdbeFuncName(const sqlite3_context*); void sqlite3VdbeRecordUnpack(int,const void*,UnpackedRecord*); int sqlite3VdbeRecordCompare(int,const void*,UnpackedRecord*); diff --git a/src/vdbeaux.c b/src/vdbeaux.c index c8b86e6f6d..f4b9e192f1 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -5571,3 +5571,12 @@ void sqlite3VdbePreUpdateHook( } } #endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ + +/* +** Return the name of an SQL function associated with the sqlite3_context. +*/ +const char *sqlite3VdbeFuncName(const sqlite3_context *pCtx){ + assert( pCtx!=0 ); + assert( pCtx->pFunc!=0 ); + return pCtx->pFunc->zName; +} diff --git a/test/fuzzcheck.c b/test/fuzzcheck.c index ba64504575..4f5189f848 100644 --- a/test/fuzzcheck.c +++ b/test/fuzzcheck.c @@ -171,8 +171,6 @@ static struct GlobalVars { */ extern int sqlite3_vt02_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_randomjson_init(sqlite3*,char**,const sqlite3_api_routines*); -extern int sqlite3_percentile_init(sqlite3*,char**,const sqlite3_api_routines*); - /* ** Print an error message and quit. @@ -1362,7 +1360,6 @@ int runCombinedDbSqlInput( sqlite3_vt02_init(cx.db, 0, 0); /* Activate extensions */ - sqlite3_percentile_init(cx.db, 0, 0); sqlite3_randomjson_init(cx.db, 0, 0); /* Add support for sqlite_dbdata and sqlite_dbptr virtual tables used diff --git a/test/percentile.test b/test/percentile.test index a6a29da34e..25096a953c 100644 --- a/test/percentile.test +++ b/test/percentile.test @@ -19,7 +19,6 @@ source $testdir/tester.tcl # Basic test of the percentile() function. # do_test percentile-1.0 { - load_static_extension db percentile execsql { CREATE TABLE t1(x); INSERT INTO t1 VALUES(1),(4),(6),(7),(8),(9),(11),(11),(11);