From: drh Date: Mon, 26 Mar 2012 21:57:53 +0000 (+0000) Subject: Add the fts3view utility program. X-Git-Tag: mountain-lion~3^2~9^2~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=36f6b891e6fcf6c7900d65372f790389eefbfbc5;p=thirdparty%2Fsqlite.git Add the fts3view utility program. FossilOrigin-Name: f936c8ea16d21345fd1622272dc7e9850acb2493 --- diff --git a/ext/fts3/tool/fts3view.c b/ext/fts3/tool/fts3view.c new file mode 100644 index 0000000000..fbda5016cc --- /dev/null +++ b/ext/fts3/tool/fts3view.c @@ -0,0 +1,562 @@ +/* +** This program is a debugging and analysis utility that displays +** information about an FTS3 or FTS4 index. +** +** Link this program against the SQLite3 amalgamation with the +** SQLITE_ENABLE_FTS4 compile-time option. Then run it as: +** +** fts3view DATABASE +** +** to get a list of all FTS3/4 tables in DATABASE, or do +** +** fts3view DATABASE TABLE COMMAND .... +** +** to see various aspects of the TABLE table. Type fts3view with no +** arguments for a list of available COMMANDs. +*/ +#include +#include +#include +#include +#include "sqlite3.h" + +/* +** Extra command-line arguments: +*/ +int nExtra; +char **azExtra; + +/* +** Look for a command-line argument. +*/ +const char *findOption(const char *zName, int hasArg, const char *zDefault){ + int i; + const char *zResult = zDefault; + for(i=0; i=2000 ){ + n = 0; + pStmt = prepare(db, "SELECT count(*) FROM %s" + " WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + n = sqlite3_column_int(pStmt, 0); + } + sqlite3_finalize(pStmt); + printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n", + n, n*100.0/nToken); + } + + if( nDoc>=200 ){ + n = 0; + pStmt = prepare(db, "SELECT count(*) FROM %s" + " WHERE col='*' AND occurrences<=%d", zAux, nDoc/100); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + n = sqlite3_column_int(pStmt, 0); + } + sqlite3_finalize(pStmt); + printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n", + n, n*100.0/nToken); + } + + nTop = atoi(findOption("top", 1, "25")); + printf("The %d most common tokens:\n", nTop); + pStmt = prepare(db, + "SELECT term, documents FROM %s" + " WHERE col='*'" + " ORDER BY documents DESC, term" + " LIMIT %d", zAux, nTop); + i = 0; + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + i++; + n = sqlite3_column_int(pStmt, 1); + printf(" %2d. %-30s %9d docs %5.2f%%\n", i, + sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc); + } + sqlite3_finalize(pStmt); + +end_vocab: + runSql(db, "ROLLBACK"); + sqlite3_free(zAux); +} + +/* +** Report on the number and sizes of segments +*/ +static void showSegmentStats(sqlite3 *db, const char *zTab){ + sqlite3_stmt *pStmt; + int nSeg = 0; + sqlite3_int64 szSeg = 0, mxSeg = 0; + int nIdx = 0; + sqlite3_int64 szIdx = 0, mxIdx = 0; + int nRoot = 0; + sqlite3_int64 szRoot = 0, mxRoot = 0; + sqlite3_int64 mx; + int nLeaf; + int n; + int pgsz; + int mxLevel; + int i; + + pStmt = prepare(db, + "SELECT count(*), sum(length(block)), max(length(block))" + " FROM '%q_segments'", + zTab); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + nSeg = sqlite3_column_int(pStmt, 0); + szSeg = sqlite3_column_int64(pStmt, 1); + mxSeg = sqlite3_column_int64(pStmt, 2); + } + sqlite3_finalize(pStmt); + pStmt = prepare(db, + "SELECT count(*), sum(length(block)), max(length(block))" + " FROM '%q_segments' a JOIN '%q_segdir' b" + " WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block", + zTab, zTab); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + nIdx = sqlite3_column_int(pStmt, 0); + szIdx = sqlite3_column_int64(pStmt, 1); + mxIdx = sqlite3_column_int64(pStmt, 2); + } + sqlite3_finalize(pStmt); + pStmt = prepare(db, + "SELECT count(*), sum(length(root)), max(length(root))" + " FROM '%q_segdir'", + zTab); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + nRoot = sqlite3_column_int(pStmt, 0); + szRoot = sqlite3_column_int64(pStmt, 1); + mxRoot = sqlite3_column_int64(pStmt, 2); + } + sqlite3_finalize(pStmt); + + printf("Number of segments....................... %9d\n", nSeg+nRoot); + printf("Number of leaf segments.................. %9d\n", nSeg-nIdx); + printf("Number of index segments................. %9d\n", nIdx); + printf("Number of root segments.................. %9d\n", nRoot); + printf("Total size of all segments............... %9lld\n", szSeg+szRoot); + printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx); + printf("Total size of all index segments......... %9lld\n", szIdx); + printf("Total size of all root segments.......... %9lld\n", szRoot); + if( nSeg>0 ){ + printf("Average size of all segments............. %11.1f\n", + (double)(szSeg+szRoot)/(double)(nSeg+nRoot)); + printf("Average size of leaf segments............ %11.1f\n", + (double)(szSeg-szIdx)/(double)(nSeg-nIdx)); + } + if( nIdx>0 ){ + printf("Average size of index segments........... %11.1f\n", + (double)szIdx/(double)nIdx); + } + if( nRoot>0 ){ + printf("Average size of root segments............ %11.1f\n", + (double)szRoot/(double)nRoot); + } + mx = mxSeg; + if( mx%d", + zTab, zTab, pgsz-45); + n = 0; + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + n = sqlite3_column_int(pStmt, 0); + } + sqlite3_finalize(pStmt); + nLeaf = nSeg - nIdx; + printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n", + pgsz-45, n, n*100.0/nLeaf); + + pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab); + mxLevel = 0; + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + mxLevel = sqlite3_column_int(pStmt, 0); + } + sqlite3_finalize(pStmt); + + for(i=0; i<=mxLevel; i++){ + pStmt = prepare(db, + "SELECT count(*), sum(len), avg(len), max(len), sum(len>%d)," + " count(distinct idx)" + " FROM (SELECT length(a.block) AS len, idx" + " FROM '%q_segments' a JOIN '%q_segdir' b" + " WHERE (a.blockid BETWEEN b.start_block" + " AND b.leaves_end_block)" + " AND (b.level%%1024)==%d)", + pgsz-45, zTab, zTab, i); + if( sqlite3_step(pStmt)==SQLITE_ROW + && (nLeaf = sqlite3_column_int(pStmt, 0))>0 + ){ + int nIdx = sqlite3_column_int(pStmt, 5); + sqlite3_int64 sz; + printf("For level %d:\n", i); + printf(" Number of indexes...................... %9d\n", nIdx); + printf(" Number of leaf segments................ %9d\n", nLeaf); + if( nIdx>1 ){ + printf(" Average leaf segments per index........ %11.1f\n", + (double)nLeaf/(double)nIdx); + } + printf(" Total size of all leaf segments........ %9lld\n", + (sz = sqlite3_column_int64(pStmt, 1))); + printf(" Average size of leaf segments.......... %11.1f\n", + sqlite3_column_double(pStmt, 2)); + if( nIdx>1 ){ + printf(" Average leaf segment size per index.... %11.1f\n", + (double)sz/(double)nIdx); + } + printf(" Maximum leaf segment size.............. %9lld\n", + sqlite3_column_int64(pStmt, 3)); + n = sqlite3_column_int(pStmt, 4); + printf(" Leaf segments larger than %5d bytes.. %9d %5.2f%%\n", + pgsz-45, n, n*100.0/nLeaf); + } + sqlite3_finalize(pStmt); + } +} + +/* +** Print a single "tree" line of the segdir map output. +*/ +static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){ + printf(" tree %9lld", iLower); + if( iUpper>iLower ){ + printf(" thru %9lld (%lld blocks)", iUpper, iUpper-iLower+1); + } + printf("\n"); +} + +/* +** Show a map of segments derived from the %_segdir table. +*/ +static void showSegdirMap(sqlite3 *db, const char *zTab){ + int mxIndex, iIndex; + sqlite3_stmt *pStmt = 0; + sqlite3_stmt *pStmt2 = 0; + int prevLevel; + + pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab); + if( sqlite3_step(pStmt)==SQLITE_ROW ){ + mxIndex = sqlite3_column_int(pStmt, 0); + }else{ + mxIndex = 0; + } + sqlite3_finalize(pStmt); + + printf("Number of inverted indices............... %3d\n", mxIndex+1); + pStmt = prepare(db, + "SELECT level, idx, start_block, leaves_end_block, end_block" + " FROM '%q_segdir'" + " WHERE level/1024==?" + " ORDER BY level DESC, idx", + zTab); + pStmt2 = prepare(db, + "SELECT blockid FROM '%q_segments'" + " WHERE blockid BETWEEN ? AND ? ORDER BY blockid", + zTab); + for(iIndex=0; iIndex<=mxIndex; iIndex++){ + if( mxIndex>0 ){ + printf("**************************** Index %d " + "****************************\n", iIndex); + } + sqlite3_bind_int(pStmt, 1, iIndex); + prevLevel = -1; + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + int iLevel = sqlite3_column_int(pStmt, 0)%1024; + int iIdx = sqlite3_column_int(pStmt, 1); + sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2); + sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3); + sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4); + if( iLevel!=prevLevel ){ + printf("level %2d idx %2d", iLevel, iIdx); + prevLevel = iLevel; + }else{ + printf(" idx %2d", iIdx); + } + if( iLEnd>iStart ){ + sqlite3_int64 iLower, iPrev, iX; + printf(" leaves %9lld thru %9lld (%lld blocks)\n", + iStart, iLEnd, iLEnd - iStart + 1); + if( iLEnd+1<=iEnd ){ + sqlite3_bind_int64(pStmt2, 1, iLEnd+1); + sqlite3_bind_int64(pStmt2, 2, iEnd); + iLower = -1; + while( sqlite3_step(pStmt2)==SQLITE_ROW ){ + iX = sqlite3_column_int64(pStmt2, 0); + if( iLower<0 ){ + iLower = iPrev = iX; + }else if( iX==iPrev+1 ){ + iPrev = iX; + }else{ + printTreeLine(iLower, iPrev); + iLower = iPrev = iX; + } + } + sqlite3_reset(pStmt2); + if( iLower>=0 ) printTreeLine(iLower, iPrev); + } + }else{ + printf(" root only\n"); + } + } + sqlite3_reset(pStmt); + } + sqlite3_finalize(pStmt); + sqlite3_finalize(pStmt2); +} + + +static void usage(const char *argv0){ + fprintf(stderr, "Usage: %s DATABASE\n" + " or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0); + fprintf(stderr, + "ARGS:\n" + " schema FTS table schema\n" + " segdir directory of segments\n" + " segment-stats information about segment sizes\n" + " stat content of the %%_stat table\n" + " vocabulary --top N information on the document vocabulary\n" + ); + exit(1); +} + +int main(int argc, char **argv){ + sqlite3 *db; + int rc; + const char *zTab; + const char *zCmd; + if( argc<2 ) usage(argv[0]); + rc = sqlite3_open(argv[1], &db); + if( rc ){ + fprintf(stderr, "Cannot open %s\n", argv[1]); + exit(1); + } + if( argc==2 ){ + sqlite3_stmt *pStmt; + int cnt = 0; + pStmt = prepare(db, "SELECT b.sql" + " FROM sqlite_master a, sqlite_master b" + " WHERE a.name GLOB '*_segdir'" + " AND b.name=substr(a.name,1,length(a.name)-7)" + " ORDER BY 1"); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + cnt++; + printf("%s;\n", sqlite3_column_text(pStmt, 0)); + } + sqlite3_finalize(pStmt); + if( cnt==0 ){ + printf("/* No FTS3/4 tables found in database %s */\n", argv[1]); + } + return 0; + } + if( argc<4 ) usage(argv[0]); + zTab = argv[2]; + zCmd = argv[3]; + nExtra = argc-4; + azExtra = argv+4; + if( strcmp(zCmd,"schema")==0 ){ + showSchema(db, zTab); + }else if( strcmp(zCmd,"segdir")==0 ){ + showSegdirMap(db, zTab); + }else if( strcmp(zCmd,"segment-stats")==0 ){ + showSegmentStats(db, zTab); + }else if( strcmp(zCmd,"stat")==0 ){ + showStat(db, zTab); + }else if( strcmp(zCmd,"vocabulary")==0 ){ + showVocabulary(db, zTab); + }else{ + usage(argv[0]); + } + return 0; +} diff --git a/manifest b/manifest index 4621b80011..42ffd4a36c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sFTS3\sso\sthat\sit\sworks\seven\swithout\sSQLITE_DEBUG. -D 2012-03-26T14:36:42.908 +C Add\sthe\sfts3view\sutility\sprogram. +D 2012-03-26T21:57:53.278 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2f37e468503dbe79d35c9f6dffcf3fae1ae9ec20 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -81,6 +81,7 @@ F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 F ext/fts3/fts3_write.c 6014014cf0257d314d29d7eb50e0c88d85356d65 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 +F ext/fts3/tool/fts3view.c 005efba99f4de1ab104456f7652f955603a81d7f F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 F ext/icu/icu.c eb9ae1d79046bd7871aa97ee6da51eb770134b5a F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37 @@ -999,7 +1000,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P f907fc3fb387e74bb66babcbf050748cb253a6fa -R c007d11e260643139d517e8dc5ea9d00 +P a18c103121529c2e3c6a8ada16a4c40d14080670 +R ad0002e6aa190f63417466651f9099fc U drh -Z d7453c1ea386bbab453c1602af9f7003 +Z 38d7fb610b5e9fceb6186edb4298f4d3 diff --git a/manifest.uuid b/manifest.uuid index 87e220b2bf..2fd4d0cb57 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a18c103121529c2e3c6a8ada16a4c40d14080670 \ No newline at end of file +f936c8ea16d21345fd1622272dc7e9850acb2493 \ No newline at end of file