From: dan Date: Sat, 13 Jun 2026 20:16:54 +0000 (+0000) Subject: Speed up SQL aggregate functions percentile() and median() by avoiding a full sort... X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=36e9e015de5a001a324c6996a5ebbdaf826fb4f6;p=thirdparty%2Fsqlite.git Speed up SQL aggregate functions percentile() and median() by avoiding a full sort of the array of arguments. FossilOrigin-Name: 9232b5f21d76dc702ccda5c689d61bcd0057fb1f2a4db6a70c607c25ad03a027 --- diff --git a/manifest b/manifest index daed8e5048..bb9a1c55f8 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C When\sreading\sa\ssuper-journal\sname\sfrom\sa\sjournal\sfile,\sallocate\sa\snew\sbuffer\srather\sthan\susing\sPager.pTmpSpace.\sThis\sprevents\sa\sbuffer\soverrun\sthat\scould\soccur\swhen\susing\sa\sVFS\swith\sa\slarge\ssqlite3_vfs.mxPathname\svalue\swith\sa\sdatabase\swith\sa\ssmall\spage\ssize. -D 2026-06-12T18:35:24.464 +C Speed\sup\sSQL\saggregate\sfunctions\spercentile()\sand\smedian()\sby\savoiding\sa\sfull\ssort\sof\sthe\sarray\sof\sarguments. +D 2026-06-13T20:16:54.656 F .fossil-settings/binary-glob 61195414528fb3ea9693577e1980230d78a1f8b0a54c78cf1b9b24d0a409ed6a x F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea @@ -692,7 +692,7 @@ F src/delete.c 59eeca3fb88c29329afc41bb803ee568b120d9dd7470b5f38ab55cc38390b451 F src/expr.c e97dd9f6ada4c448764e225d8963091bf630b3efb2c92e4d0762571cca2a14e5 F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007 F src/fkey.c 931f74cec1dc8038a0217ef340c91ce147dd1bbed08dc40c47ee0ec6edfffb08 -F src/func.c 555d5f7686f9eef20fe3574889403f307a6cb16ada5b05cbc6a9288dcf75aaeb +F src/func.c 038e454176de1729cc2a1fccce5629d86cc3fa6860546ba54827b394b1a5b4ed F src/global.c a19e4b1ca1335f560e9560e590fc13081e21f670643367f99cb9e8f9dc7d615b F src/hash.c 03c8c0f4be9e8bcb6de65aa26d34a61d48a9430747084a69f9469fbb00ea52ca F src/hash.h 46b92795a95bfefb210f52f0c316e9d7cdbcdd7e7fcfb0d8be796d3a5767cddf @@ -2209,9 +2209,11 @@ F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee F tool/warnings.sh a554d13f6e5cf3760f041b87939e3d616ec6961859c3245e8ef701d1eafc2ca2 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f F tool/winmain.c 00c8fb88e365c9017db14c73d3c78af62194d9644feaf60e220ab0f411f3604c -P f15d076820f4a0d9e7fa32d99575bce27bde5e987bb415e6cfb69b06f0da7e1e ac17669e840dbac5c7c75e9b6014cca2d289aba8de36f9d1d0d61186e8cc6207 -R b4784016cdc66719536104e64f4ddb51 -T +closed ac17669e840dbac5c7c75e9b6014cca2d289aba8de36f9d1d0d61186e8cc6207 +P 7f71859841af7cb0806f58e9c8013a990fcca72b807a0513156d7127ce5c7b62 +R 440eb116549bb7b7e06b073447b72a9b +T *branch * pecentile-partial-sort +T *sym-pecentile-partial-sort * +T -sym-trunk * U dan -Z ae30c5c61ab231f09fb1164a547a0499 +Z 2032cb49443943956dfcd7d4c638a43c # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.tags b/manifest.tags index bec971799f..3847f152e1 100644 --- a/manifest.tags +++ b/manifest.tags @@ -1,2 +1,2 @@ -branch trunk -tag trunk +branch pecentile-partial-sort +tag pecentile-partial-sort diff --git a/manifest.uuid b/manifest.uuid index e7fdd067d6..1ace09b978 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7f71859841af7cb0806f58e9c8013a990fcca72b807a0513156d7127ce5c7b62 +9232b5f21d76dc702ccda5c689d61bcd0057fb1f2a4db6a70c607c25ad03a027 diff --git a/src/func.c b/src/func.c index f5935fdb44..6439451fab 100644 --- a/src/func.c +++ b/src/func.c @@ -2967,8 +2967,17 @@ static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){ ** (1) To avoid a dependency on qsort() ** (2) To avoid the function call to the comparison routine for each ** comparison. +** +** If parameter iReq is non-negative, then the caller will only access +** elements a[iReq] and a[iReq+1] (if it exists) of the sorted array and +** so it is not necessary to position any other elements. Or if iReq is +** negative, then the final array must be fully sorted. */ -static void percentSort(double *a, unsigned int n){ +static void percentSort( + double *a, /* Array to sort */ + unsigned int n, /* Number of elements in array a[] */ + int iReq /* Element caller cares about (or -ve) */ +){ int iLt; /* Entries before a[iLt] are less than rPivot */ int iGt; /* Entries at or after a[iGt] are greater than rPivot */ int i; /* Loop counter */ @@ -3005,17 +3014,36 @@ static void percentSort(double *a, unsigned int n){ } }while( i(int)(n/2) ){ - if( n-iGt>=2 ) percentSort(a+iGt, n-iGt); - n = iLt; + assert( a[iLt]==rPivot ); + assert( iGt>iLt ); + + if( iReq>=0 ){ + /* In this case, the only elements that the caller requires sorted into + ** the correct positions are elements a[iReq] and a[iReq+1]. At this + ** point we know that element a[iLt] is in the correct position, so if + ** iReq is less than iLt, it is only necessary to sort the first + ** partition. Or, if iReq is greater than or equal to iLt, it is only + ** necessary to sort the second. */ + if( iReq=2 ) percentSort(a, iLt); - a += iGt; - n -= iGt; + /* Recurse on the smaller partition only. The smaller partition + ** will hold n/2 or fewer entries, which assures that the stack + ** depth will not exceed O(log(n)), even for pathological cases. + ** Loop without recursion for the larger partition. */ + if( iLt>(int)(n/2) ){ + if( n-iGt>=2 ) percentSort(a+iGt, n-iGt, -1); + n = iLt; + }else{ + if( iLt>=2 ) percentSort(a, iLt, -1); + a += iGt; + n -= iGt; + } } }while( n>=2 ); } @@ -3052,7 +3080,7 @@ static void percentInverse(sqlite3_context *pCtx,int argc,sqlite3_value **argv){ } if( p->bSorted==0 ){ assert( p->nUsed>1 ); - percentSort(p->a, p->nUsed); + percentSort(p->a, p->nUsed, -1); p->bSorted = 1; } p->bKeepSorted = 1; @@ -3081,13 +3109,13 @@ static void percentCompute(sqlite3_context *pCtx, int bIsFinal){ if( p==0 ) return; if( p->a==0 ) return; if( p->nUsed ){ + ix = p->rPct*(p->nUsed-1); + i1 = (unsigned)ix; if( p->bSorted==0 ){ assert( p->nUsed>1 ); - percentSort(p->a, p->nUsed); + percentSort(p->a, p->nUsed, (bIsFinal ? (int)i1 : -1)); p->bSorted = 1; } - ix = p->rPct*(p->nUsed-1); - i1 = (unsigned)ix; if( settings & 1 ){ vx = p->a[i1]; }else{