From 0a62730d3fa12612a651c2fef54acc4fac46640b Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 28 Nov 2009 21:33:21 +0000 Subject: [PATCH] Updates to snippet() and offsets() functions of FTS3 so that they work sanely following an OOM fault. FossilOrigin-Name: b939a37a8ce296785a300e79ab9d3d87ad91343f --- ext/fts3/fts3_snippet.c | 265 ++++++++++++++++++---------------------- ext/fts3/fts3_write.c | 6 +- manifest | 20 +-- manifest.uuid | 2 +- 4 files changed, 137 insertions(+), 156 deletions(-) diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index f802b16ad2..6038b18a00 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -46,144 +46,94 @@ struct Snippet { ** hi-bit-set characters. This is the same solution used in the ** tokenizer. */ -/* TODO(shess) The snippet-generation code should be using the -** tokenizer-generated tokens rather than doing its own local -** tokenization. -*/ -/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ -static int safe_isspace(char c){ +static int fts3snippetIsspace(char c){ return (c&0x80)==0 ? isspace(c) : 0; } -static int safe_isalnum(char c){ - return (c&0x80)==0 ? isalnum(c) : 0; -} - -/*******************************************************************/ -/* DataBuffer is used to collect data into a buffer in piecemeal -** fashion. It implements the usual distinction between amount of -** data currently stored (nData) and buffer capacity (nCapacity). -** -** dataBufferInit - create a buffer with given initial capacity. -** dataBufferReset - forget buffer's data, retaining capacity. -** dataBufferSwap - swap contents of two buffers. -** dataBufferExpand - expand capacity without adding data. -** dataBufferAppend - append data. -** dataBufferAppend2 - append two pieces of data at once. -** dataBufferReplace - replace buffer's data. -*/ -typedef struct DataBuffer { - char *pData; /* Pointer to malloc'ed buffer. */ - int nCapacity; /* Size of pData buffer. */ - int nData; /* End of data loaded into pData. */ -} DataBuffer; - -static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){ - assert( nCapacity>=0 ); - pBuffer->nData = 0; - pBuffer->nCapacity = nCapacity; - pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity); -} -static void dataBufferReset(DataBuffer *pBuffer){ - pBuffer->nData = 0; -} -static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){ - assert( nAddCapacity>0 ); - /* TODO(shess) Consider expanding more aggressively. Note that the - ** underlying malloc implementation may take care of such things for - ** us already. - */ - if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){ - pBuffer->nCapacity = pBuffer->nData+nAddCapacity; - pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity); - } -} -static void dataBufferAppend(DataBuffer *pBuffer, - const char *pSource, int nSource){ - assert( nSource>0 && pSource!=NULL ); - dataBufferExpand(pBuffer, nSource); - memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource); - pBuffer->nData += nSource; -} -static void dataBufferAppend2(DataBuffer *pBuffer, - const char *pSource1, int nSource1, - const char *pSource2, int nSource2){ - assert( nSource1>0 && pSource1!=NULL ); - assert( nSource2>0 && pSource2!=NULL ); - dataBufferExpand(pBuffer, nSource1+nSource2); - memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1); - memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2); - pBuffer->nData += nSource1+nSource2; -} -static void dataBufferReplace(DataBuffer *pBuffer, - const char *pSource, int nSource){ - dataBufferReset(pBuffer); - dataBufferAppend(pBuffer, pSource, nSource); -} -/* StringBuffer is a null-terminated version of DataBuffer. */ +/* +** A StringBuffer object holds a zero-terminated string that grows +** arbitrarily by appending. Space to hold the string is obtained +** from sqlite3_malloc(). After any memory allocation failure, +** StringBuffer.z is set to NULL and no further allocation is attempted. +*/ typedef struct StringBuffer { - DataBuffer b; /* Includes null terminator. */ + char *z; /* Text of the string. Space from malloc. */ + int nUsed; /* Number bytes of z[] used, not counting \000 terminator */ + int nAlloc; /* Bytes allocated for z[] */ } StringBuffer; -static void initStringBuffer(StringBuffer *sb){ - dataBufferInit(&sb->b, 100); - dataBufferReplace(&sb->b, "", 1); -} -static int stringBufferLength(StringBuffer *sb){ - return sb->b.nData-1; -} -static char *stringBufferData(StringBuffer *sb){ - return sb->b.pData; -} -static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ - assert( sb->b.nData>0 ); - if( nFrom>0 ){ - sb->b.nData--; - dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1); - } -} -static void append(StringBuffer *sb, const char *zFrom){ - nappend(sb, zFrom, strlen(zFrom)); +/* +** Initialize a new StringBuffer. +*/ +static void fts3SnippetSbInit(StringBuffer *p){ + p->nAlloc = 100; + p->nUsed = 0; + p->z = sqlite3_malloc( p->nAlloc ); } -static int endsInWhiteSpace(StringBuffer *p){ - return stringBufferLength(p)>0 && - safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]); +/* +** Append text to the string buffer. +*/ +static void fts3SnippetAppend(StringBuffer *p, const char *zNew, int nNew){ + if( p->z==0 ) return; + if( nNew<0 ) nNew = strlen(zNew); + if( p->nUsed + nNew >= p->nAlloc ){ + int nAlloc; + char *zNew; + + nAlloc = p->nUsed + nNew + p->nAlloc; + zNew = sqlite3_realloc(p->z, nAlloc); + if( zNew==0 ){ + sqlite3_free(p->z); + p->z = 0; + return; + } + p->z = zNew; + p->nAlloc = nAlloc; + } + memcpy(&p->z[p->nUsed], zNew, nNew); + p->nUsed += nNew; + p->z[p->nUsed] = 0; } /* If the StringBuffer ends in something other than white space, add a ** single space character to the end. */ -static void appendWhiteSpace(StringBuffer *p){ - if( stringBufferLength(p)==0 ) return; - if( !endsInWhiteSpace(p) ) append(p, " "); +static void fts3SnippetAppendWhiteSpace(StringBuffer *p){ + if( p->z && p->nUsed && !fts3snippetIsspace(p->z[p->nUsed-1]) ){ + fts3SnippetAppend(p, " ", 1); + } } /* Remove white space from the end of the StringBuffer */ -static void trimWhiteSpace(StringBuffer *p){ - while( endsInWhiteSpace(p) ){ - p->b.pData[--p->b.nData-1] = '\0'; +static void fts3SnippetTrimWhiteSpace(StringBuffer *p){ + if( p->z ){ + while( p->nUsed && fts3snippetIsspace(p->z[p->nUsed-1]) ){ + p->nUsed--; + } + p->z[p->nUsed] = 0; } } - /* ** Release all memory associated with the Snippet structure passed as ** an argument. */ static void fts3SnippetFree(Snippet *p){ - sqlite3_free(p->aMatch); - sqlite3_free(p->zOffset); - sqlite3_free(p->zSnippet); - sqlite3_free(p); + if( p ){ + sqlite3_free(p->aMatch); + sqlite3_free(p->zOffset); + sqlite3_free(p->zSnippet); + sqlite3_free(p); + } } /* ** Append a single entry to the p->aMatch[] log. */ -static void snippetAppendMatch( +static int snippetAppendMatch( Snippet *p, /* Append the entry to this snippet */ int iCol, int iTerm, /* The column and query term */ int iToken, /* Matching token in document */ @@ -192,13 +142,16 @@ static void snippetAppendMatch( int i; struct snippetMatch *pMatch; if( p->nMatch+1>=p->nAlloc ){ + struct snippetMatch *pNew; p->nAlloc = p->nAlloc*2 + 10; - p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); - if( p->aMatch==0 ){ + pNew = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); + if( pNew==0 ){ + p->aMatch = 0; p->nMatch = 0; p->nAlloc = 0; - return; + return SQLITE_NOMEM; } + p->aMatch = pNew; } i = p->nMatch++; pMatch = &p->aMatch[i]; @@ -207,6 +160,7 @@ static void snippetAppendMatch( pMatch->iToken = iToken; pMatch->iStart = iStart; pMatch->nByte = nByte; + return SQLITE_OK; } /* @@ -280,7 +234,7 @@ static int fts3ExprBeneathNot(Fts3Expr *p){ ** Add entries to pSnippet->aMatch[] for every match that occurs against ** document zDoc[0..nDoc-1] which is stored in column iColumn. */ -static void snippetOffsetsOfColumn( +static int snippetOffsetsOfColumn( Fts3Cursor *pCur, /* The fulltest search cursor */ Snippet *pSnippet, /* The Snippet object to be filled in */ int iColumn, /* Index of fulltext table column */ @@ -310,11 +264,12 @@ static void snippetOffsetsOfColumn( pTokenizer = pVtab->pTokenizer; pTModule = pTokenizer->pModule; rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); - if( rc ) return; + if( rc ) return rc; pTCursor->pTokenizer = pTokenizer; prevMatch = 0; - while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){ + while( (rc = pTModule->xNext(pTCursor, &zToken, &nToken, + &iBegin, &iEnd, &iPos))==SQLITE_OK ){ Fts3Expr *pIter = pCur->pExpr; int iIter = -1; iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin; @@ -339,15 +294,18 @@ static void snippetOffsetsOfColumn( if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){ for(j=nPhrase-1; j>=0; j--){ int k = (iRotor-j) & FTS3_ROTOR_MASK; - snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j, - iRotorBegin[k], iRotorLen[k]); + rc = snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j, + iRotorBegin[k], iRotorLen[k]); + if( rc ) goto end_offsets_of_column; } } } prevMatch = match<<1; iRotor++; } +end_offsets_of_column: pTModule->xClose(pTCursor); + return rc==SQLITE_DONE ? SQLITE_OK : rc; } /* @@ -489,6 +447,7 @@ static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){ int iFirst, iLast; int iTerm = 0; Snippet *pSnippet; + int rc = SQLITE_OK; if( pCsr->pExpr==0 ){ return SQLITE_OK; @@ -512,19 +471,23 @@ static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){ iFirst = iColumn; iLast = iColumn; } - for(i=iFirst; i<=iLast; i++){ + for(i=iFirst; rc==SQLITE_OK && i<=iLast; i++){ const char *zDoc; int nDoc; zDoc = (const char*)sqlite3_column_text(pCsr->pStmt, i+1); nDoc = sqlite3_column_bytes(pCsr->pStmt, i+1); - snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc); + if( zDoc==0 && sqlite3_column_type(pCsr->pStmt, i+1)!=SQLITE_NULL ){ + rc = SQLITE_NOMEM; + }else{ + rc = snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc); + } } while( trimSnippetOffsets(pCsr->pExpr, pSnippet, &iTerm) ){ iTerm = 0; } - return SQLITE_OK; + return rc; } /* @@ -538,7 +501,7 @@ static void snippetOffsetText(Snippet *p){ StringBuffer sb; char zBuf[200]; if( p->zOffset ) return; - initStringBuffer(&sb); + fts3SnippetSbInit(&sb); for(i=0; inMatch; i++){ struct snippetMatch *pMatch = &p->aMatch[i]; if( pMatch->iTerm>=0 ){ @@ -550,12 +513,12 @@ static void snippetOffsetText(Snippet *p){ zBuf[0] = ' '; sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); - append(&sb, zBuf); + fts3SnippetAppend(&sb, zBuf, -1); cnt++; } } - p->zOffset = stringBufferData(&sb); - p->nOffset = stringBufferLength(&sb); + p->zOffset = sb.z; + p->nOffset = sb.z ? sb.nUsed : 0; } /* @@ -593,10 +556,10 @@ static int wordBoundary( } } for(i=1; i<=10; i++){ - if( safe_isspace(zDoc[iBreak-i]) ){ + if( fts3snippetIsspace(zDoc[iBreak-i]) ){ return iBreak - i + 1; } - if( safe_isspace(zDoc[iBreak+i]) ){ + if( fts3snippetIsspace(zDoc[iBreak+i]) ){ return iBreak + i + 1; } } @@ -640,7 +603,7 @@ static void snippetText( pSnippet->zSnippet = 0; aMatch = pSnippet->aMatch; nMatch = pSnippet->nMatch; - initStringBuffer(&sb); + fts3SnippetSbInit(&sb); for(i=0; i=0) || iStart!=tailOffset ){ - trimWhiteSpace(&sb); - appendWhiteSpace(&sb); - append(&sb, zEllipsis); - appendWhiteSpace(&sb); + fts3SnippetTrimWhiteSpace(&sb); + fts3SnippetAppendWhiteSpace(&sb); + fts3SnippetAppend(&sb, zEllipsis, -1); + fts3SnippetAppendWhiteSpace(&sb); } iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); @@ -695,11 +658,11 @@ static void snippetText( } if( iMatchzSnippet = stringBufferData(&sb); - pSnippet->nSnippet = stringBufferLength(&sb); + pSnippet->zSnippet = sb.z; + pSnippet->nSnippet = sb.z ? sb.nUsed : 0; } void sqlite3Fts3Offsets( @@ -731,8 +694,16 @@ void sqlite3Fts3Offsets( ){ Snippet *p; /* Snippet structure */ int rc = snippetAllOffsets(pCsr, &p); - snippetOffsetText(p); - sqlite3_result_text(pCtx, p->zOffset, p->nOffset, SQLITE_TRANSIENT); + if( rc==SQLITE_OK ){ + snippetOffsetText(p); + if( p->zOffset ){ + sqlite3_result_text(pCtx, p->zOffset, p->nOffset, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_nomem(pCtx); + } + }else{ + sqlite3_result_error_nomem(pCtx); + } fts3SnippetFree(p); } @@ -745,8 +716,16 @@ void sqlite3Fts3Snippet( ){ Snippet *p; /* Snippet structure */ int rc = snippetAllOffsets(pCsr, &p); - snippetText(pCsr, p, zStart, zEnd, zEllipsis); - sqlite3_result_text(pCtx, p->zSnippet, p->nSnippet, SQLITE_TRANSIENT); + if( rc==SQLITE_OK ){ + snippetText(pCsr, p, zStart, zEnd, zEllipsis); + if( p->zSnippet ){ + sqlite3_result_text(pCtx, p->zSnippet, p->nSnippet, SQLITE_TRANSIENT); + }else{ + sqlite3_result_error_nomem(pCtx); + } + }else{ + sqlite3_result_error_nomem(pCtx); + } fts3SnippetFree(p); } diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 3007982c73..1e108d41a2 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -646,9 +646,11 @@ static int fts3DeleteTerms(Fts3Table *p, sqlite3_value **apVal){ } } } + rc = sqlite3_reset(pSelect); + }else{ + sqlite3_reset(pSelect); } - - return sqlite3_reset(pSelect); + return rc; } /* diff --git a/manifest b/manifest index 80cd2eb3d3..909d7cf25f 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Change\sFTS3\sto\sdetect\swhen\sthe\sRHS\sof\sthe\sMATCH\sopertor\sencounters\san\sOOM\nduring\sstring\sformat\sconversion\sand\sreport\sback\san\sSQLITE_NOMEM\serror. -D 2009-11-28T17:23:48 +C Updates\sto\ssnippet()\sand\soffsets()\sfunctions\sof\sFTS3\sso\sthat\sthey\swork\nsanely\sfollowing\san\sOOM\sfault. +D 2009-11-28T21:33:21 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -67,11 +67,11 @@ F ext/fts3/fts3_hash.c 29fba5a01e51c53e37040e53821e6b2cec18c8fb F ext/fts3/fts3_hash.h 39524725425078bf9e814e9569c74a8e5a21b9fb F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c 3063da945fb0a935781c135f7575f39166173eca -F ext/fts3/fts3_snippet.c 082f2906deaaa2656f19b88834e89d099352af6e +F ext/fts3/fts3_snippet.c b62144ea85f413b1226e6d8182320606d96e65ac F ext/fts3/fts3_tokenizer.c 36f78d1a43a29b0feaec1ced6da9e56b9c653d1f F ext/fts3/fts3_tokenizer.h 7ff73caa3327589bf6550f60d93ebdd1f6a0fb5c F ext/fts3/fts3_tokenizer1.c 0a5bcc579f35de5d24a9345d7908dc25ae403ee7 -F ext/fts3/fts3_write.c f1bffadc003b243bb804732c336c9ea55b85de09 +F ext/fts3/fts3_write.c 9b35ff9666b4867b406e63ca2277de6a81b53103 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33 F ext/icu/icu.c 12e763d288d23b5a49de37caa30737b971a2f1e2 @@ -778,14 +778,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 80754d383a0e890ea3f315dab941b9f166481ddd -R c701b62bb7bcebeb3cce64991b77dc95 +P 31eed4f8f95f0799d634eccbd9e09cb58172d250 +R 63b6d1158b26a83c6b254a0d208f841c U drh -Z 79bd1c9ce18f056385942b4e2103a96e +Z 8559238aab4fefe4430f422ac07d6956 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD8DBQFLEVynoxKgR168RlERAmNeAKCFnXvJIGrlx8Xq9FMVpghKHSc6IwCfWSxQ -fDg8QqRtiYK559S5hJk0sTo= -=pBpa +iD8DBQFLEZckoxKgR168RlERAilJAJ9fOhH9A66qH4Y5YOtTa1Ji3y2FSACcC046 +fNgY4pufsjXCsUyJbj/lIyo= +=K2mf -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index 6aaa5c1867..689f72bae2 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -31eed4f8f95f0799d634eccbd9e09cb58172d250 \ No newline at end of file +b939a37a8ce296785a300e79ab9d3d87ad91343f \ No newline at end of file -- 2.47.2