-C Merge\stogether\sthe\sfork\sin\sthe\squery-planner-tweaks\sbranch.
-D 2011-08-06T02:03:10.239
+C The\ssqlite_stat2.cnt\sfield\sis\sparsed\sif\sit\sis\spresent.\s\sBut\sit\sis\snot\syet\nused.\s\sA\slarge\scomment\sadded\sto\sanalyze.c\sto\sexplain\sthe\sformat\sof\sthe\nANALYZE\ssystem\stables.
+D 2011-08-06T19:48:53.187
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 1e6988b3c11dee9bd5edc0c804bd4468d74a9cdc
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F sqlite3.1 6be1ad09113570e1fc8dcaff84c9b0b337db5ffc
F sqlite3.pc.in ae6f59a76e862f5c561eb32a380228a02afc3cad
F src/alter.c ac80a0f31189f8b4a524ebf661e47e84536ee7f5
-F src/analyze.c 19894783b6f833337ae7e56001c96dc3795d2a83
+F src/analyze.c 8a5343a20ae69fd27247249cf40262b0021e379e
F src/attach.c 12c6957996908edc31c96d7c68d4942c2474405f
F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34
F src/backup.c 986c15232757f2873dff35ee3b35cbf935fc573c
F src/shell.c bbe7818ff5bc8614105ceb81ad67b8bdc0b671dd
F src/sqlite.h.in 0b3cab7b2ea51f58396e8871fa5f349cfece5330
F src/sqlite3ext.h 1a1a4f784aa9c3b00edd287940197de52487cd93
-F src/sqliteInt.h be1ab8bc2c295a1bf0ddc9241ea879f455b72df6
+F src/sqliteInt.h a01882eb98520566f039017232290dde2d0cbeed
F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d
F src/status.c 7ac64842c86cec2fc1a1d0e5c16d3beb8ad332bf
F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e
F src/wal.c 3154756177d6219e233d84291d5b05f4e06ff5e9
F src/wal.h 66b40bd91bc29a5be1c88ddd1f5ade8f3f48728a
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
-F src/where.c b8ebb50b82578761e5e764c5c16d7693049a523f
+F src/where.c 67ad221f87aa1b12123444cfd9b338e91a50ed7a
F test/8_3_names.test 631ea964a3edb091cf73c3b540f6bcfdb36ce823
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
F test/analyze5.test f6c250012b173f9b10ae5d577e9592767b376b1c
F test/analyze6.test c125622a813325bba1b4999040ddc213773c2290
F test/analyze7.test ed672a2c09c4b5a5b528ca38951318af463bb17e
-F test/analyze8.test ff7a3b4631c896a51e7404268796c61d74117e9f
+F test/analyze8.test 8a9c83bf735311daf812c9f762d79234f9069404
F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b
F test/async2.test c0a9bd20816d7d6a2ceca7b8c03d3d69c28ffb8b
F test/async3.test d73a062002376d7edc1fe3edff493edbec1fc2f7
F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262
-P 7e914aa999d9f3f7be58a4494d33a7876af56603 b9d41c3490bf325915227a32b612e6772fb62fa9
-R c43dfa67e269269636fb7d00fce4d038
+P 2daab6bd42b34b32de46db513437cc4d6ca17975
+R cec8d2f4d5cd3f2eecb5e9f36d91b967
U drh
-Z 2a4dca5bf1206d663b012fe85305a05e
+Z deea39c8beb81cdf79c7b9366ab4e7a9
**
*************************************************************************
** This file contains code associated with the ANALYZE command.
+**
+** The ANALYZE command gather statistics about the content of tables
+** and indices. These statistics are made available to the query planner
+** to help it make better decisions about the best way to implement a
+** query.
+**
+** Two system tables are created as follows:
+**
+** CREATE TABLE sqlite_stat1(tbl, idx, stat);
+** CREATE TABLE sqlite_stat2(tbl, idx, sampleno, sample, cnt);
+**
+** Additional tables might be added in future releases of SQLite.
+** The sqlite_stat2 table is only created and used if SQLite is
+** compiled with SQLITE_ENABLE_STAT2. Older versions of SQLite
+** omit the sqlite_stat2.cnt column. Newer versions of SQLite are
+** able to use older versions of the stat2 table that lack the cnt
+** column.
+**
+** Format of sqlite_stat1:
+**
+** There is normally one row per index, with the index identified by the
+** name in the idx column. The tbl column is the name of the table to
+** which the index belongs. In each such row, the stat column will be
+** a string consisting of a list of integers. The first integer in this
+** list is the number of rows in the index and in the table. The second
+** integer is the average number of rows in the index that have the same
+** value in the first column of the index. The third integer is the average
+** number of rows in the index that have the same value for the first two
+** columns. The N-th integer (for N>1) is the average number of rows in
+** the index which have the same value for the first N-1 columns. For
+** a K-column index, there will be K+1 integers in the stat column. If
+** the index is unique, then the last integer will be 1.
+**
+** The list of integers in the stat column can optionally be followed
+** by the keyword "unordered". The "unordered" keyword, if it is present,
+** must be separated from the last integer by a single space. If the
+** "unordered" keyword is present, then the query planner assumes that
+** the index is unordered and will not use the index for a range query.
+**
+** If the sqlite_stat1.idx column is NULL, then the sqlite_stat1.stat
+** column contains a single integer which is the (estimated) number of
+** rows in the table identified by sqlite_stat1.tbl.
+**
+** Format of sqlite_stat2:
+**
+** The sqlite_stat2 is only created and is only used if SQLite is compiled
+** with SQLITE_ENABLE_STAT2. The "stat2" table contains additional information
+** about the key distribution within an index. The index is identified by
+** the "idx" column and the "tbl" column is the name of the table to which
+** the index belongs. There are usually multiple rows in the sqlite_stat2
+** table for each index.
+**
+** The sqlite_stat2 entires for an index that have sampleno>=0 are
+** sampled key values for the first column of the index taken at
+** intervals along the index. The sqlite_stat2.sample column holds
+** the value of the key in the left-most column of the index.
+**
+** The samples are numbered from 0 to S-1
+** where S is 10 by default. The number of samples created by the
+** ANALYZE command can be adjusted at compile-time using the
+** SQLITE_INDEX_SAMPLES macro. The maximum number of samples is
+** SQLITE_MAX_SAMPLES, currently set to 100. There are places in the
+** code that use an unsigned character to count samples, so an upper
+** bound on SQLITE_MAX_SAMPLES is 255.
+**
+** Suppose the index contains C rows. And let the number
+** of samples be S. SQLite assumes that the samples are taken from the
+** following rows for i between 0 and S-1:
+**
+** rownumber = (i*C*2 + C)/(S*2)
+**
+** Conceptually, the index is divided into S bins and the sample is
+** taken from the middle of each bin. The ANALYZE will not attempt
+** to populate sqlite_stat2 for an index that holds fewer than S*2
+** entries.
+**
+** If the key value for a sample (the sqlite_stat2.sample column) is a
+** large string or blob, SQLite will only use the first 255 bytes of
+** that string or blob.
+**
+** The sqlite_stat2.cnt column contains the number of entries in the
+** index for which sqlite_stat2.sample matches the left-most column
+** of the index. In other words, sqlite_stat2.cnt holds the number of
+** times the sqlite_stat2.sample value appears in the index.. Many
+** older versions of SQLite omit the sqlite_stat2.cnt column.
+**
+** If the sqlite_stat2.sampleno value is -1, then that row holds a first-
+** column key that is a frequently used key in the index. The
+** sqlite_stat2.cnt column will hold the number of occurrances of that key.
+** This information is useful to the query planner in cases where a
+** large percentage of the rows in indexed field have one of a small
+** handful of value but the balance of the rows in the index have
+** distinct or nearly distinct keys.
*/
#ifndef SQLITE_OMIT_ANALYZE
#include "sqliteInt.h"
return 0;
}
+#if SQLITE_ENABLE_STAT2
/*
-** If the Index.aSample variable is not NULL, delete the aSample[] array
-** and its contents.
+** Delete an array of IndexSample objects
*/
-void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
-#ifdef SQLITE_ENABLE_STAT2
- if( pIdx->aSample ){
- int j;
- for(j=0; j<pIdx->nSample; j++){
- IndexSample *p = &pIdx->aSample[j];
- if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){
- sqlite3DbFree(db, p->u.z);
- }
+static void deleteIndexSampleArray(
+ sqlite3 *db, /* The database connection */
+ IndexSampleArray *pArray /* Array of IndexSample objects */
+){
+ int j;
+ if( pArray->a==0 ) return;
+ for(j=0; j<pArray->n; j++){
+ IndexSample *p = &pArray->a[j];
+ if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){
+ sqlite3_free(p->u.z);
}
- sqlite3DbFree(db, pIdx->aSample);
}
+ sqlite3_free(pArray->a);
+ memset(pArray, 0, sizeof(*pArray));
+}
+#endif
+
+/*
+** Delete the sample and common-key arrays from the index.
+*/
+void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
+#ifdef SQLITE_ENABLE_STAT2
+ deleteIndexSampleArray(db, &pIdx->sample);
+ deleteIndexSampleArray(db, &pIdx->comkey);
#else
UNUSED_PARAMETER(db);
UNUSED_PARAMETER(pIdx);
#endif
}
+#ifdef SQLITE_ENABLE_STAT2
+/*
+** Enlarge an array of IndexSample objects.
+*/
+static IndexSample *allocIndexSample(
+ sqlite3 *db, /* Database connection to malloc against */
+ IndexSampleArray *pArray, /* The array to enlarge */
+ int i /* Return this element */
+){
+ IndexSample *p;
+ if( i>=pArray->nAlloc ){
+ int szNew = i+1;
+ p = (IndexSample*)sqlite3_realloc(pArray->a, szNew*sizeof(IndexSample));
+ if( p==0 ) return 0;
+ pArray->a = p;
+ memset(&pArray->a[pArray->n], 0, (szNew-(pArray->n))*sizeof(IndexSample));
+ pArray->nAlloc = szNew;
+ }
+ if( i>=pArray->n ) pArray->n = i+1;
+ return &pArray->a[i];
+}
+#endif
+
/*
** Load the content of the sqlite_stat1 and sqlite_stat2 tables. The
** contents of sqlite_stat1 are used to populate the Index.aiRowEst[]
** arrays. The contents of sqlite_stat2 are used to populate the
-** Index.aSample[] arrays.
+** Index.sample and Index.comkey arrays.
**
** If the sqlite_stat1 table is not present in the database, SQLITE_ERROR
** is returned. In this case, even if SQLITE_ENABLE_STAT2 was defined
HashElem *i;
char *zSql;
int rc;
+ Table *pTab; /* Stat1 or Stat2 table */
assert( iDb>=0 && iDb<db->nDb );
assert( db->aDb[iDb].pBt!=0 );
Index *pIdx = sqliteHashData(i);
sqlite3DefaultRowEst(pIdx);
sqlite3DeleteIndexSamples(db, pIdx);
- pIdx->aSample = 0;
- pIdx->nSample = 0;
}
/* Check to make sure the sqlite_stat1 table exists */
sInfo.db = db;
sInfo.zDatabase = db->aDb[iDb].zName;
- if( sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)==0 ){
+ if( (pTab=sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase))==0 ){
return SQLITE_ERROR;
}
/* Load the statistics from the sqlite_stat2 table. */
#ifdef SQLITE_ENABLE_STAT2
- if( rc==SQLITE_OK && !sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase) ){
+ if( rc==SQLITE_OK
+ && (pTab=sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase))==0 ){
rc = SQLITE_ERROR;
}
if( rc==SQLITE_OK ){
sqlite3_stmt *pStmt = 0;
zSql = sqlite3MPrintf(db,
- "SELECT idx, sampleno, sample FROM %Q.sqlite_stat2"
- " ORDER BY rowid DESC", sInfo.zDatabase);
+ "SELECT idx, sampleno, sample, %s FROM %Q.sqlite_stat2"
+ " ORDER BY rowid DESC",
+ pTab->nCol>=5 ? "cnt" : "0", sInfo.zDatabase);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
pIdx = sqlite3FindIndex(db, zIndex, sInfo.zDatabase);
if( pIdx==0 ) continue;
iSample = sqlite3_column_int(pStmt, 1);
- if( iSample>=SQLITE_MAX_SAMPLES || iSample<0 ) continue;
- if( pIdx->nSample<=iSample ){
- IndexSample *pNew;
- int sz = sizeof(IndexSample)*(iSample+1);
- pNew = (IndexSample*)sqlite3Realloc(pIdx->aSample, sz);
- if( pNew==0 ){
- db->mallocFailed = 1;
- break;
- }
- pIdx->aSample = pNew;
- pIdx->nSample = iSample+1;
+ if( iSample>=SQLITE_MAX_SAMPLES ) continue;
+ if( iSample<0 ){
+ pSample = allocIndexSample(db, &pIdx->comkey, pIdx->comkey.n);
+ }else{
+ pSample = allocIndexSample(db, &pIdx->sample, iSample);
}
+ if( pSample==0 ) break;
eType = sqlite3_column_type(pStmt, 2);
- pSample = &pIdx->aSample[iSample];
pSample->eType = (u8)eType;
+ pSample->nCopy = sqlite3_column_int(pStmt, 4);
if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
pSample->u.r = sqlite3_column_double(pStmt, 2);
}else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
sqlite3_column_text(pStmt, 2)
);
int n = sqlite3_column_bytes(pStmt, 2);
- if( n>24 ) n = 24;
+ if( n>255 ) n = 255;
pSample->nByte = (u8)n;
if( n < 1){
pSample->u.z = 0;
typedef struct IdList IdList;
typedef struct Index Index;
typedef struct IndexSample IndexSample;
+typedef struct IndexSampleArray IndexSampleArray;
typedef struct KeyClass KeyClass;
typedef struct KeyInfo KeyInfo;
typedef struct Lookaside Lookaside;
#define UNPACKED_PREFIX_MATCH 0x0010 /* A prefix match is considered OK */
#define UNPACKED_PREFIX_SEARCH 0x0020 /* A prefix match is considered OK */
+/*
+** Each sample stored in the sqlite_stat2 table is represented in memory
+** using a structure of this type.
+*/
+struct IndexSample {
+ union {
+ char *z; /* Value if eType is SQLITE_TEXT or SQLITE_BLOB */
+ double r; /* Value if eType is SQLITE_FLOAT or SQLITE_INTEGER */
+ } u;
+ u8 eType; /* SQLITE_NULL, SQLITE_INTEGER ... etc. */
+ u8 nByte; /* Size in byte of text or blob. */
+ u32 nCopy; /* How many copies of this sample are in the database */
+};
+
+/*
+** An array of IndexSample elements is as follows:
+*/
+struct IndexSampleArray {
+ u16 n; /* Number of elements in the array */
+ u16 nAlloc; /* Space allocated to a[] */
+ IndexSample *a; /* The samples */
+};
+
/*
** Each SQL index is represented in memory by an
** instance of the following structure.
u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
u8 autoIndex; /* True if is automatically created (ex: by UNIQUE) */
u8 bUnordered; /* Use this index for == or IN queries only */
- u8 nSample; /* Number of slots in aSample[] */
char *zColAff; /* String defining the affinity of each column */
Index *pNext; /* The next index associated with the same table */
Schema *pSchema; /* Schema containing this index */
u8 *aSortOrder; /* Array of size Index.nColumn. True==DESC, False==ASC */
char **azColl; /* Array of collation sequence names for index */
- IndexSample *aSample; /* Array of SQLITE_INDEX_SAMPLES samples */
-};
-
-/*
-** Each sample stored in the sqlite_stat2 table is represented in memory
-** using a structure of this type.
-*/
-struct IndexSample {
- union {
- char *z; /* Value if eType is SQLITE_TEXT or SQLITE_BLOB */
- double r; /* Value if eType is SQLITE_FLOAT or SQLITE_INTEGER */
- } u;
- u8 eType; /* SQLITE_NULL, SQLITE_INTEGER ... etc. */
- u8 nByte; /* Size in byte of text or blob. */
+#ifdef SQLITE_ENABLE_STAT2
+ IndexSampleArray sample; /* Sampled histogram for the first column */
+ IndexSampleArray comkey; /* The most common keys */
+#endif
};
/*
/*
** Argument pIdx is a pointer to an index structure that has an array of
-** pIdx->nSample evenly spaced samples of the first indexed column
-** stored in Index.aSample. These samples divide the domain of values stored
-** the index into (pIdx->nSample+1) regions.
+** pIdx->sample.n evenly spaced samples of the first indexed column
+** stored in Index.sample. These samples divide the domain of values stored
+** the index into (pIdx->sample.n+1) regions.
** Region 0 contains all values less than the first sample value. Region
** 1 contains values between the first and second samples. Region 2 contains
-** values between samples 2 and 3. And so on. Region pIdx->nSample
+** values between samples 2 and 3. And so on. Region pIdx->sample.n
** contains values larger than the last sample.
**
** If the index contains many duplicates of a single value, then it is
){
assert( roundUp==0 || roundUp==1 );
if( ALWAYS(pVal) ){
- IndexSample *aSample = pIdx->aSample;
- int nSample = pIdx->nSample;
+ IndexSample *aSample = pIdx->sample.a;
+ int nSample = pIdx->sample.n;
int i = 0;
int eType = sqlite3_value_type(pVal);
}
}
- assert( i>=0 && i<=pIdx->nSample );
+ assert( i>=0 && i<=pIdx->sample.n );
*piRegion = i;
}
return SQLITE_OK;
#ifdef SQLITE_ENABLE_STAT2
- if( nEq==0 && p->aSample ){
+ if( nEq==0 && p->sample.a ){
sqlite3_value *pLowerVal = 0;
sqlite3_value *pUpperVal = 0;
int iEst;
int iLower = 0;
- int nSample = p->nSample;
- int iUpper = p->nSample;
+ int nSample = p->sample.n;
+ int iUpper = p->sample.n;
int roundUpUpper = 0;
int roundUpLower = 0;
u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;
if( pLower ) iLower = iUpper/2;
}else if( pUpperVal==0 ){
rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
- if( pUpper ) iUpper = (iLower + p->nSample + 1)/2;
+ if( pUpper ) iUpper = (iLower + p->sample.n + 1)/2;
}else{
rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
if( rc==SQLITE_OK ){
int rc; /* Subfunction return code */
double nRowEst; /* New estimate of the number of rows */
- assert( p->aSample!=0 );
- assert( p->nSample>0 );
+ assert( p->sample.a!=0 );
+ assert( p->sample.n>0 );
aff = p->pTable->aCol[p->aiColumn[0]].affinity;
if( pExpr ){
rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
if( rc ) goto whereEqualScanEst_cancel;
WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
if( iLower>=iUpper ){
- nRowEst = p->aiRowEst[0]/(p->nSample*3);
+ nRowEst = p->aiRowEst[0]/(p->sample.n*3);
if( nRowEst<*pnRow ) *pnRow = nRowEst;
}else{
- nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->nSample;
+ nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->sample.n;
*pnRow = nRowEst;
}
int nSingle = 0; /* Histogram regions hit by a single value */
int nNotFound = 0; /* Count of values that are not constants */
int i; /* Loop counter */
- int nSample = p->nSample; /* Number of samples */
+ int nSample = p->sample.n; /* Number of samples */
u8 aSpan[SQLITE_MAX_SAMPLES+1]; /* Histogram regions that are spanned */
u8 aSingle[SQLITE_MAX_SAMPLES+1]; /* Histogram regions hit once */
- assert( p->aSample!=0 );
+ assert( p->sample.a!=0 );
assert( nSample>0 );
aff = p->pTable->aCol[p->aiColumn[0]].affinity;
memset(aSpan, 0, nSample+1);
wsFlags |= WHERE_COLUMN_NULL;
}
#ifdef SQLITE_ENABLE_STAT2
- if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm;
+ if( nEq==0 && pProbe->sample.a ) pFirstTerm = pTerm;
#endif
used |= pTerm->prereqRight;
}