From: adamd Date: Sat, 2 Sep 2006 00:23:01 +0000 (+0000) Subject: Miscellaneous restructuring and cleanup based on suggestions from shess. (CVS 3382) X-Git-Tag: version-3.6.10~2785 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9eb3997b02222a23b06f6c84471c2b9e1ee2410e;p=thirdparty%2Fsqlite.git Miscellaneous restructuring and cleanup based on suggestions from shess. (CVS 3382) FossilOrigin-Name: e98b0cf292f6dc9deb6ae9b773c52b16867f7556 --- diff --git a/ext/fts1/fts1.c b/ext/fts1/fts1.c index 0d6fe27c09..6b193665bd 100644 --- a/ext/fts1/fts1.c +++ b/ext/fts1/fts1.c @@ -194,34 +194,43 @@ static void appendVarint(DocList *d, sqlite_int64 i){ static void docListAddDocid(DocList *d, sqlite_int64 iDocid){ appendVarint(d, iDocid); - d->iLastPos = 0; + if( d->iType>=DL_POSITIONS ){ + appendVarint(d, 0); /* initially empty position list */ + d->iLastPos = 0; + } } -/* Add a position to the last position list in a doclist. */ -static void docListAddPos(DocList *d, int iPos){ - assert( d->iType>=DL_POSITIONS ); +/* helper function for docListAddPos and docListAddPosOffset */ +static void addPos(DocList *d, int iPos) { appendVarint(d, iPos-d->iLastPos+1); d->iLastPos = iPos; } +/* Add a position to the last position list in a doclist. */ +static void docListAddPos(DocList *d, int iPos){ + assert( d->iType==DL_POSITIONS ); + assert( d->nData>0 ); + --d->nData; /* remove previous terminator */ + addPos(d, iPos); + appendVarint(d, 0); /* add new terminator */ +} + static void docListAddPosOffset(DocList *d, int iPos, int iStartOffset, int iEndOffset){ assert( d->iType==DL_POSITIONS_OFFSETS ); - docListAddPos(d, iPos); + assert( d->nData>0 ); + --d->nData; /* remove previous terminator */ + addPos(d, iPos); appendVarint(d, iStartOffset-d->iLastOffset); d->iLastOffset = iStartOffset; appendVarint(d, iEndOffset-iStartOffset); -} - -/* Terminate the last position list in the given doclist. */ -static void docListAddEndPos(DocList *d){ - appendVarint(d, 0); + appendVarint(d, 0); /* add new terminator */ } typedef struct DocListReader { DocList *pDoclist; char *p; - int iLastPos; /* the last position read */ + int iLastPos; /* the last position read, or -1 when not in a position list */ } DocListReader; static void readerInit(DocListReader *r, DocList *pDoclist){ @@ -229,17 +238,18 @@ static void readerInit(DocListReader *r, DocList *pDoclist){ if( pDoclist!=NULL ){ r->p = pDoclist->pData; } - r->iLastPos = 0; + r->iLastPos = -1; } -static int readerAtEnd(DocListReader *pReader){ +static int atEnd(DocListReader *pReader){ return pReader->p >= docListEnd(pReader->pDoclist); } /* Peek at the next docid without advancing the read pointer. */ static sqlite_int64 peekDocid(DocListReader *pReader){ sqlite_int64 ret; - assert( !readerAtEnd(pReader) ); + assert( !atEnd(pReader) ); + assert( pReader->iLastPos==-1 ); getVarint(pReader->p, &ret); return ret; } @@ -247,9 +257,12 @@ static sqlite_int64 peekDocid(DocListReader *pReader){ /* Read the next docid. */ static sqlite_int64 readDocid(DocListReader *pReader){ sqlite_int64 ret; - assert( !readerAtEnd(pReader) ); + assert( !atEnd(pReader) ); + assert( pReader->iLastPos==-1 ); pReader->p += getVarint(pReader->p, &ret); - pReader->iLastPos = 0; + if( pReader->pDoclist->iType>=DL_POSITIONS ){ + pReader->iLastPos = 0; + } return ret; } @@ -259,7 +272,8 @@ static int readPosition(DocListReader *pReader){ int i; int iType = pReader->pDoclist->iType; assert( iType>=DL_POSITIONS ); - assert( !readerAtEnd(pReader) ); + assert( !atEnd(pReader) ); + assert( pReader->iLastPos!=-1 ); pReader->p += getVarint32(pReader->p, &i); if( i==0 ){ @@ -286,11 +300,21 @@ static void skipPositionList(DocListReader *pReader){ * positions. */ static void skipDocument(DocListReader *pReader){ readDocid(pReader); - if( pReader->pDoclist->iType >= DL_POSITIONS ){ + if( pReader->pDoclist->iType>=DL_POSITIONS ){ skipPositionList(pReader); } } +/* Skip past all docids which are less than [iDocid]. Returns 1 if a docid + * matching [iDocid] was found. */ +static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){ + sqlite_int64 d = 0; + while( !atEnd(pReader) && (d=peekDocid(pReader))nData -= (reader.p - p); @@ -327,7 +350,6 @@ static int docListUpdate(DocList *d, sqlite_int64 iDocid, DocList *pUpdate){ /* Insert if indicated. */ if( pUpdate!=NULL ){ int iDoclist = p-d->pData; - docListAddEndPos(pUpdate); d->pData = realloc(d->pData, d->nData+pUpdate->nData); p = d->pData + iDoclist; @@ -352,7 +374,7 @@ static int docListSplit(DocList *d, DocList *d2){ while( reader.piType, reader.p, docListEnd(d) - reader.p); d->nData = reader.p - d->pData; d->pData = realloc(d->pData, d->nData); @@ -374,8 +396,8 @@ static int docListSplit(DocList *d, DocList *d2){ * * If [in] is NULL, then the on-disk doclist is copied to [out] directly. * - * A merge is performed using an integer [iOffset] provided by the caller. - * [iOffset] is subtracted from each position in the on-disk doclist for the + * A merge is performed using an integer [iPhrasePos] provided by the caller. + * [iPhrasePos] is subtracted from each position in the on-disk doclist for the * purpose of position comparison; this is helpful in implementing phrase * searches. * @@ -385,18 +407,18 @@ static int docListSplit(DocList *d, DocList *d2){ typedef struct DocListMerge { DocListReader in; DocList *pOut; - int iOffset; + int iPhrasePos; } DocListMerge; static void mergeInit(DocListMerge *m, - DocList *pIn, int iOffset, DocList *pOut){ + DocList *pIn, int iPhrasePos, DocList *pOut){ readerInit(&m->in, pIn); m->pOut = pOut; - m->iOffset = iOffset; + m->iPhrasePos = iPhrasePos; /* can't handle offsets yet */ - assert( pIn==NULL || pIn->iType <= DL_POSITIONS ); - assert( pOut->iType <= DL_POSITIONS ); + assert( pIn==NULL || pIn->iType<=DL_POSITIONS ); + assert( pOut->iType<=DL_POSITIONS ); } /* A helper function for mergeBlock(), below. Merge the position lists @@ -404,67 +426,68 @@ static void mergeInit(DocListMerge *m, * If the merge matches, write [iDocid] to m->pOut; if m->pOut * has positions then write all matching positions as well. */ static void mergePosList(DocListMerge *m, sqlite_int64 iDocid, - DocListReader *pBlockReader){ - int block_pos = readPosition(pBlockReader); - int in_pos = readPosition(&m->in); + DocListReader *pBlockReader){ + int iBlockPos = readPosition(pBlockReader); + int iInPos = readPosition(&m->in); int match = 0; - while( block_pos!=-1 || in_pos!=-1 ){ - if( block_pos-m->iOffset==in_pos ){ + + /* Loop until we've reached the end of both position lists. */ + while( iBlockPos!=-1 || iInPos!=-1 ){ + if( iBlockPos-m->iPhrasePos==iInPos ){ if( !match ){ docListAddDocid(m->pOut, iDocid); match = 1; } - if( m->pOut->iType >= DL_POSITIONS ){ - docListAddPos(m->pOut, in_pos); + if( m->pOut->iType>=DL_POSITIONS ){ + docListAddPos(m->pOut, iInPos); } - block_pos = readPosition(pBlockReader); - in_pos = readPosition(&m->in); - } else if( in_pos==-1 || (block_pos!=-1 && block_pos-m->iOffsetin); + } else if( iInPos==-1 || (iBlockPos!=-1 && iBlockPos-m->iPhrasePosin); + iInPos = readPosition(&m->in); } } - if( m->pOut->iType >= DL_POSITIONS && match ){ - docListAddEndPos(m->pOut); +} + +/* A helper function for mergeBlock(), below. Copy the docid and + * position list (if wanted) from pBlockReader to pOut. */ +static void copyDocument(DocList *pOut, sqlite_int64 iDocid, + DocListReader *pBlockReader){ + docListAddDocid(pOut, iDocid); + if( pOut->iTypeiType >= DL_POSITIONS ); + assert( pBlock->iType>=DL_POSITIONS ); readerInit(&blockReader, pBlock); - while( !readerAtEnd(&blockReader) ){ + while( !atEnd(&blockReader) ){ sqlite_int64 iDocid = readDocid(&blockReader); - if( m->in.pDoclist!=NULL ){ - while( 1 ){ - if( readerAtEnd(&m->in) ) return; /* nothing more to merge */ - if( peekDocid(&m->in)>=iDocid ) break; - skipDocument(&m->in); - } - if( peekDocid(&m->in)>iDocid ){ /* [pIn] has no match with iDocid */ - skipPositionList(&blockReader); /* skip this docid in the block */ - continue; - } - readDocid(&m->in); - } - /* We have a document match. */ - if( m->in.pDoclist==NULL || m->in.pDoclist->iType < DL_POSITIONS ){ - /* We don't need to do a poslist merge. */ - docListAddDocid(m->pOut, iDocid); - if( m->pOut->iType >= DL_POSITIONS ){ - /* Copy all positions to the output doclist. */ - while( 1 ){ - int pos = readPosition(&blockReader); - if( pos==-1 ) break; - docListAddPos(m->pOut, pos); - } - docListAddEndPos(m->pOut); - } else skipPositionList(&blockReader); + if( m->in.pDoclist==NULL ){ + copyDocument(m->pOut, iDocid, &blockReader); continue; } - mergePosList(m, iDocid, &blockReader); + if( skipToDocid(&m->in, iDocid) ){ /* we have a docid match */ + readDocid(&m->in); + if( m->in.pDoclist->iType>=DL_POSITIONS ){ + mergePosList(m, iDocid, &blockReader); + } else { + copyDocument(m->pOut, iDocid, &blockReader); + } + } else if( !atEnd(&m->in) ){ + skipPositionList(&blockReader); /* skip this docid in the block */ + } else return; /* nothing more to merge */ } } @@ -1030,7 +1053,7 @@ static int fulltextNext(sqlite3_vtab_cursor *pCursor){ rc = sqlite3_reset(c->pStmt); if( rc!=SQLITE_OK ) return rc; - if( readerAtEnd(&c->result)){ + if( atEnd(&c->result)){ c->eof = 1; return SQLITE_OK; } @@ -1068,28 +1091,25 @@ static int term_select_doclist(fulltext_vtab *v, const char *pTerm, int nTerm, return sqlite3_step(*ppStmt); /* TODO(adamd): handle schema error */ } -/* Read the posting list for [zTerm]; AND it with the doclist [in] to - * produce the doclist [out], using the given offset [iOffset] for phrase - * matching. +/* Read the posting list for [pTerm]; AND it with the doclist [pIn] to + * produce the doclist [out], using the given phrase position [iPhrasePos]. * (*pSelect) is used to hold an SQLite statement used inside this function; * the caller should initialize *pSelect to NULL before the first call. */ -static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect, +static int mergeQuery(fulltext_vtab *v, sqlite3_stmt **pSelect, const char *pTerm, int nTerm, - DocList *pIn, int iOffset, DocList *out){ + DocList *pIn, int iPhrasePos, DocList *out){ int rc; DocListMerge merge; - if( pIn!=NULL && !pIn->nData ){ - /* If [pIn] is already empty, there's no point in reading the - * posting list to AND it in; return immediately. */ - return SQLITE_OK; - } + /* If [pIn] is already empty, there's no point in reading the + * posting list to AND it in; return immediately. */ + if( pIn!=NULL && !pIn->nData ) return SQLITE_OK; rc = term_select_doclist(v, pTerm, nTerm, pSelect); if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc; - mergeInit(&merge, pIn, iOffset, out); + mergeInit(&merge, pIn, iPhrasePos, out); while( rc==SQLITE_ROW ){ DocList block; docListInit(&block, DL_POSITIONS_OFFSETS, @@ -1099,16 +1119,14 @@ static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect, docListDestroy(&block); rc = sqlite3_step(*pSelect); - if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ){ - return rc; - } + if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc; } return SQLITE_OK; } typedef struct QueryTerm { - int is_phrase; /* true if this term begins a new phrase */ + int isPhrase; /* true if this term begins a new phrase */ char *pTerm; int nTerm; } QueryTerm; @@ -1117,38 +1135,38 @@ typedef struct QueryTerm { * * As an example, parsing the query ["four score" years "new nation"] will * yield a Query with 5 terms: - * "four", is_phrase = 1 - * "score", is_phrase = 0 - * "years", is_phrase = 1 - * "new", is_phrase = 1 - * "nation", is_phrase = 0 + * "four", isPhrase = 1 + * "score", isPhrase = 0 + * "years", isPhrase = 1 + * "new", isPhrase = 1 + * "nation", isPhrase = 0 */ typedef struct Query { int nTerms; - QueryTerm *pTerm; + QueryTerm *pTerms; } Query; -static void query_add(Query *q, int is_phrase, const char *pTerm, int nTerm){ +static void queryAdd(Query *q, int isPhrase, const char *pTerm, int nTerm){ QueryTerm *t; ++q->nTerms; - q->pTerm = realloc(q->pTerm, q->nTerms * sizeof(q->pTerm[0])); - t = &q->pTerm[q->nTerms - 1]; - t->is_phrase = is_phrase; + q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); + t = &q->pTerms[q->nTerms - 1]; + t->isPhrase = isPhrase; t->pTerm = malloc(nTerm); memcpy(t->pTerm, pTerm, nTerm); t->nTerm = nTerm; } -static void query_free(Query *q){ +static void queryDestroy(Query *q){ int i; for(i = 0; i < q->nTerms; ++i){ - free(q->pTerm[i].pTerm); + free(q->pTerms[i].pTerm); } - free(q->pTerm); + free(q->pTerms); } -static int tokenize_segment(sqlite3_tokenizer *pTokenizer, - const char *pSegment, int nSegment, int in_phrase, +static int tokenizeSegment(sqlite3_tokenizer *pTokenizer, + const char *pSegment, int nSegment, int inPhrase, Query *pQuery){ sqlite3_tokenizer_module *pModule = pTokenizer->pModule; sqlite3_tokenizer_cursor *pCursor; @@ -1160,45 +1178,54 @@ static int tokenize_segment(sqlite3_tokenizer *pTokenizer, while( 1 ){ const char *pToken; - int nToken, iStartOffset, iEndOffset, dummy_pos; + int nToken, iDummyOffset, iDummyPos; rc = pModule->xNext(pCursor, &pToken, &nToken, - &iStartOffset, &iEndOffset, - &dummy_pos); + &iDummyOffset, &iDummyOffset, + &iDummyPos); if( rc!=SQLITE_OK ) break; - query_add(pQuery, !in_phrase || is_first, pToken, nToken); + queryAdd(pQuery, !inPhrase || is_first, pToken, nToken); is_first = 0; } return pModule->xClose(pCursor); } -/* Parse a query string, yielding a Query object. */ -static int parse_query(fulltext_vtab *v, const char *pInput, int nInput, - Query *pQuery){ - int iInput, in_phrase = 0; +/* Parse a query string, yielding a Query object [pQuery], which the caller + * must free. */ +static int parseQuery(fulltext_vtab *v, const char *pInput, int nInput, + Query *pQuery){ + int iInput, inPhrase = 0; if( nInput<0 ) nInput = strlen(pInput); pQuery->nTerms = 0; - pQuery->pTerm = NULL; + pQuery->pTerms = NULL; for(iInput=0; iInputiInput ){ - tokenize_segment(v->pTokenizer, pInput+iInput, i-iInput, in_phrase, + tokenizeSegment(v->pTokenizer, pInput+iInput, i-iInput, inPhrase, pQuery); } iInput = i; - in_phrase = !in_phrase; + if( iresult, pResult); zStatement = "select rowid, content from %_content where rowid = ?"; @@ -1294,7 +1321,7 @@ static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ } /* Build a hash table containing all terms in pText. */ -static int build_terms(fts1Hash *terms, sqlite3_tokenizer *pTokenizer, +static int buildTerms(fts1Hash *terms, sqlite3_tokenizer *pTokenizer, const char *pText, int nText, sqlite_int64 iDocid){ sqlite3_tokenizer_cursor *pCursor; const char *pToken; @@ -1398,7 +1425,7 @@ static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, if( !pText || !nText ) return SQLITE_OK; /* nothing to index */ - rc = build_terms(&terms, v->pTokenizer, pText, nText, *piRowid); + rc = buildTerms(&terms, v->pTokenizer, pText, nText, *piRowid); if( rc!=SQLITE_OK ) return rc; for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){ @@ -1451,7 +1478,7 @@ static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){ int rc = content_select(v, iRow, &pText, &nText); if( rc!=SQLITE_OK ) return rc; - rc = build_terms(&terms, v->pTokenizer, pText, nText, iRow); + rc = buildTerms(&terms, v->pTokenizer, pText, nText, iRow); free(pText); if( rc!=SQLITE_OK ) return rc; diff --git a/manifest b/manifest index ee9ed8cbb5..5dc4e78d47 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Automatically\scompute\sthe\ssqlite3.def\sand\stclsqlite3.def\sfiles\swhen\nbuilding\swindows\sDLLs.\s\sThis\swill\s(hopefully)\skeep\sthe\s.def\sfiles\sin\nperfect\ssynchronization\swith\sthe\sDLLs.\s\sTicket\s#1951.\s(CVS\s3381) -D 2006-09-01T17:06:20 +C Miscellaneous\srestructuring\sand\scleanup\sbased\son\ssuggestions\sfrom\sshess.\s(CVS\s3382) +D 2006-09-02T00:23:02 F Makefile.in 659b63368cfbb95a224c9d2f2a9897802d96a4ea F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -21,7 +21,7 @@ F ext/README.txt 913a7bd3f4837ab14d7e063304181787658b14e1 F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5 -F ext/fts1/fts1.c 6ac8a4d6203431b16ea36681101ee146bea6abcc +F ext/fts1/fts1.c 98f1b10b6af53dc665eee9bccf7179c817b54b3c F ext/fts1/fts1.h fe8e8f38dd6d2d2645b9b0d6972e80985249575f F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114 F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089 @@ -395,7 +395,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 -P bedbac54db173ceb74bd78acd021209f28aa30a8 -R 95867c71124f856bb75de299ad025124 -U drh -Z 2f626a5cf47e2b61edd21d8619c7423b +P 1f6d79266a7f8d0e909e47d9858557e3f95407db +R 9f47934fbcba4f9a0e1f8df18feb2204 +U adamd +Z fba5a153a6d748159bc7d61ee5016242 diff --git a/manifest.uuid b/manifest.uuid index 3fafed8749..55ec8643b0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1f6d79266a7f8d0e909e47d9858557e3f95407db \ No newline at end of file +e98b0cf292f6dc9deb6ae9b773c52b16867f7556 \ No newline at end of file