From: drh <> Date: Sat, 3 Dec 2022 17:09:15 +0000 (+0000) Subject: Further improvements to the estimated cost of sorting. Take into account X-Git-Tag: version-3.41.0~312^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e4fa4794be9a938ae28bf4208eac272956691574;p=thirdparty%2Fsqlite.git Further improvements to the estimated cost of sorting. Take into account the number of columns to be sorted. FossilOrigin-Name: f3290cf83b7c02d17d85d8942954f052b486c370cd5ec732969da9061dc1d19a --- diff --git a/manifest b/manifest index a763dc6801..dc964b2b92 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Tuning\sthe\squery\splanner\sby\sadjusting\sthe\sweights\sthat\spredict\sthe\srelative\nperformance\sof\ssorting\sand\sindex\slookup. -D 2022-12-03T00:52:21.776 +C Further\simprovements\sto\sthe\sestimated\scost\sof\ssorting.\s\sTake\sinto\saccount\nthe\snumber\sof\scolumns\sto\sbe\ssorted. +D 2022-12-03T17:09:15.127 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -732,7 +732,7 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 F src/wal.c b9df133a705093da8977da5eb202eaadb844839f1c7297c08d33471f5491843d F src/wal.h c3aa7825bfa2fe0d85bef2db94655f99870a285778baa36307c0a16da32b226a F src/walker.c f890a3298418d7cba3b69b8803594fdc484ea241206a8dfa99db6dd36f8cbb3b -F src/where.c 5826b62ddcfc92979669cb5fb80f73d0df86bbfeefa1d757f5dc1f857cd628e7 +F src/where.c 32875f4f738b1b32f648e8fd52df23a5ba28744a2b4209b262cac09efc2a8569 F src/whereInt.h e25203e5bfee149f5f1225ae0166cfb4f1e65490c998a024249e98bb0647377c F src/wherecode.c ee52c2781c36004d23c85bf111063b78fc16e5e1b6a0d424326af8bf90babb0b F src/whereexpr.c 05295b44b54eea76d1ba766f0908928d0e20e990c249344c9521454d3d09c7ae @@ -2065,11 +2065,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 57dd593ef0efa17dfb3a9f4eac36d5b8b879e271de817d8cd94a8c8b56d31870 -R e47495a3cfe52da04033a615c7da71c2 -T *branch * qp-tuning -T *sym-qp-tuning * -T -sym-trunk * +P 9f2806da4d88beceac2e81e05421f00481dd3dd100b096cd2ae6c828adb42ca7 +R 7460b69c2cad54c6cd480cae6fbd9d9a U drh -Z bdcd0fcd9740989c0fc73b883156ab70 +Z b965371360d6f8e63aa93b345d42429e # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index d25f3c5d8c..b086c5dd4e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9f2806da4d88beceac2e81e05421f00481dd3dd100b096cd2ae6c828adb42ca7 \ No newline at end of file +f3290cf83b7c02d17d85d8942954f052b486c370cd5ec732969da9061dc1d19a \ No newline at end of file diff --git a/src/where.c b/src/where.c index f1d1df44c5..a9bb092ef2 100644 --- a/src/where.c +++ b/src/where.c @@ -4803,12 +4803,12 @@ static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){ ** order. */ static LogEst whereSortingCost( - WhereInfo *pWInfo, - LogEst nRow, - int nOrderBy, - int nSorted + WhereInfo *pWInfo, /* Query planning context */ + LogEst nRow, /* Estimated number of rows to sort */ + int nOrderBy, /* Number of ORDER BY clause terms */ + int nSorted /* Number of initial ORDER BY terms naturally in order */ ){ - /* TUNING: Estimated cost of a full external sort, where N is + /* Estimated cost of a full external sort, where N is ** the number of rows to sort is: ** ** cost = (K * N * log(N)). @@ -4819,27 +4819,40 @@ static LogEst whereSortingCost( ** ** cost = (K * N * log(N)) * (Y/X) ** - ** The constant K is 2.0 for an external sort that is built around - ** the OP_SorterInsert, OP_SorterSort, and OP_SorterData opcodes. - ** For a sort built using OP_IdxInsert and OP_Sort (which is slower - ** by a constant factor), the constant K is 4.0. + ** The constant K is at least 2.0 but will be larger if there are a + ** large number of columns to be sorted, as the sorting time is + ** proportional to the amount of content to be sorted. The algorithm + ** does not currently distinguish between fat columns (BLOBs and TEXTs) + ** and skinny columns (INTs). It just uses the number of columns as + ** an approximation for the row width. ** - ** The (Y/X) term is implemented using stack variable rScale - ** below. + ** And extra factor of 2.0 or 3.0 is added to the sorting cost if the sort + ** is built using OP_IdxInsert and OP_Sort rather than with OP_SorterInsert. */ - LogEst rScale, rSortCost; - assert( nOrderBy>0 && 66==sqlite3LogEst(100) ); - rScale = sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66; - rSortCost = nRow + rScale + 10; - if( pWInfo->wctrlFlags & WHERE_USE_LIMIT ) rSortCost += 10; + LogEst rSortCost, nCol; + assert( pWInfo->pSelect!=0 ); + assert( pWInfo->pSelect->pEList!=0 ); + /* TUNING: sorting cost proportional to the number of output columns: */ + nCol = sqlite3LogEst((pWInfo->pSelect->pEList->nExpr+59)/30); + rSortCost = nRow + nCol; + if( nSorted>0 ){ + /* Scale the result by (Y/X) */ + rSortCost += sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66; + } /* Multiple by log(M) where M is the number of output rows. ** Use the LIMIT for M if it is smaller. Or if this sort is for ** a DISTINCT operator, M will be the number of distinct output ** rows, so fudge it downwards a bit. */ - if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 && pWInfo->iLimitiLimit; + if( (pWInfo->wctrlFlags & WHERE_USE_LIMIT)!=0 ){ + rSortCost += 10; /* TUNING: Extra 2.0x if using LIMIT */ + if( nSorted!=0 ){ + rSortCost += 6; /* TUNING: Extra 1.5x if also using partial sort */ + } + if( pWInfo->iLimitiLimit; + } }else if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT) ){ /* TUNING: In the sort for a DISTINCT operator, assume that the DISTINCT ** reduces the number of output rows by a factor of 2 */