From 015020cd1a1b25ccf750590cc4443d05929c906d Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 12 Apr 2023 17:40:44 +0000 Subject: [PATCH] Add the 'secure-delete' option to fts5. Used to configure fts5 to aggressively remove old full-text-index entries belonging to deleted or updated rows. FossilOrigin-Name: 4240fd09b717dbc69dffe3b88ec9149777ca4c3efa12f282af65be3af6fa5bb0 --- ext/fts5/fts5Int.h | 10 +- ext/fts5/fts5_config.c | 25 +- ext/fts5/fts5_index.c | 557 ++++++++++++++++++++++++++++----- ext/fts5/fts5_main.c | 24 ++ ext/fts5/test/fts5_common.tcl | 4 + ext/fts5/test/fts5secure.test | 278 ++++++++++++++++ ext/fts5/test/fts5secure2.test | 87 +++++ ext/fts5/test/fts5secure3.test | 162 ++++++++++ ext/fts5/test/fts5version.test | 76 ++++- manifest | 30 +- manifest.uuid | 2 +- 11 files changed, 1152 insertions(+), 103 deletions(-) create mode 100644 ext/fts5/test/fts5secure.test create mode 100644 ext/fts5/test/fts5secure2.test create mode 100644 ext/fts5/test/fts5secure3.test diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index e7e7043c60..79a227cb44 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -199,6 +199,7 @@ struct Fts5Config { int ePattern; /* FTS_PATTERN_XXX constant */ /* Values loaded from the %_config table */ + int iVersion; /* fts5 file format 'version' */ int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ int nAutomerge; /* 'automerge' setting */ @@ -207,6 +208,7 @@ struct Fts5Config { int nHashSize; /* Bytes of memory for in-memory hash */ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ + int bSecureDelete; /* 'secure-delete' */ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; @@ -216,8 +218,11 @@ struct Fts5Config { #endif }; -/* Current expected value of %_config table 'version' field */ -#define FTS5_CURRENT_VERSION 4 +/* Current expected value of %_config table 'version' field. And +** the expected version if the 'secure-delete' option has ever been +** set on the table. */ +#define FTS5_CURRENT_VERSION 4 +#define FTS5_CURRENT_VERSION_SECUREDELETE 5 #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 @@ -383,6 +388,7 @@ struct Fts5IndexIter { ** above. */ #define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 #define FTS5INDEX_QUERY_NOOUTPUT 0x0020 +#define FTS5INDEX_QUERY_SKIPHASH 0x0040 /* ** Create/destroy an Fts5Index object. diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index ab1a846b12..df79605ca0 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -903,6 +903,18 @@ int sqlite3Fts5ConfigSetValue( rc = SQLITE_OK; *pbBadkey = 1; } + } + + else if( 0==sqlite3_stricmp(zKey, "secure-delete") ){ + int bVal = -1; + if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ + bVal = sqlite3_value_int(pVal); + } + if( bVal<0 ){ + *pbBadkey = 1; + }else{ + pConfig->bSecureDelete = (bVal ? 1 : 0); + } }else{ *pbBadkey = 1; } @@ -947,15 +959,20 @@ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ rc = sqlite3_finalize(p); } - if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ + if( rc==SQLITE_OK + && iVersion!=FTS5_CURRENT_VERSION + && iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE + ){ rc = SQLITE_ERROR; if( pConfig->pzErrmsg ){ assert( 0==*pConfig->pzErrmsg ); - *pConfig->pzErrmsg = sqlite3_mprintf( - "invalid fts5 file format (found %d, expected %d) - run 'rebuild'", - iVersion, FTS5_CURRENT_VERSION + *pConfig->pzErrmsg = sqlite3_mprintf("invalid fts5 file format " + "(found %d, expected %d or %d) - run 'rebuild'", + iVersion, FTS5_CURRENT_VERSION, FTS5_CURRENT_VERSION_SECUREDELETE ); } + }else{ + pConfig->iVersion = iVersion; } if( rc==SQLITE_OK ){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 694cc16e45..705be21846 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -302,6 +302,8 @@ struct Fts5Index { sqlite3_stmt *pIdxSelect; int nRead; /* Total number of blocks read */ + sqlite3_stmt *pDeleteFromIdx; + sqlite3_stmt *pDataVersion; i64 iStructVersion; /* data_version when pStruct read */ Fts5Structure *pStruct; /* Current db structure (or NULL) */ @@ -394,9 +396,6 @@ struct Fts5CResult { ** iLeafOffset: ** Byte offset within the current leaf that is the first byte of the ** position list data (one byte passed the position-list size field). -** rowid field of the current entry. Usually this is the size field of the -** position list data. The exception is if the rowid for the current entry -** is the last thing on the leaf page. ** ** pLeaf: ** Buffer containing current leaf page data. Set to NULL at EOF. @@ -1443,42 +1442,25 @@ static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ pLvl->bEof = 1; }else{ u8 *a = pLvl->pData->p; - i64 iVal; - int iLimit; - int ii; - int nZero = 0; - - /* Currently iOff points to the first byte of a varint. This block - ** decrements iOff until it points to the first byte of the previous - ** varint. Taking care not to read any memory locations that occur - ** before the buffer in memory. */ - iLimit = (iOff>9 ? iOff-9 : 0); - for(iOff--; iOff>iLimit; iOff--){ - if( (a[iOff-1] & 0x80)==0 ) break; - } - - fts5GetVarint(&a[iOff], (u64*)&iVal); - pLvl->iRowid -= iVal; - pLvl->iLeafPgno--; - - /* Skip backwards past any 0x00 varints. */ - for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ - nZero++; - } - if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ - /* The byte immediately before the last 0x00 byte has the 0x80 bit - ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 - ** bytes before a[ii]. */ - int bZero = 0; /* True if last 0x00 counts */ - if( (ii-8)>=pLvl->iFirstOff ){ - int j; - for(j=1; j<=8 && (a[ii-j] & 0x80); j++); - bZero = (j>8); + + pLvl->iOff = 0; + fts5DlidxLvlNext(pLvl); + while( 1 ){ + int nZero = 0; + int ii = pLvl->iOff; + u64 delta = 0; + + while( a[ii]==0 ){ + nZero++; + ii++; } - if( bZero==0 ) nZero--; + ii += sqlite3Fts5GetVarint(&a[ii], &delta); + + if( ii>=iOff ) break; + pLvl->iLeafPgno += nZero+1; + pLvl->iRowid += delta; + pLvl->iOff = ii; } - pLvl->iLeafPgno -= nZero; - pLvl->iOff = iOff - nZero; } return pLvl->bEof; @@ -1674,7 +1656,7 @@ static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ i64 iOff = pIter->iLeafOffset; ASSERT_SZLEAF_OK(pIter->pLeaf); - if( iOff>=pIter->pLeaf->szLeaf ){ + while( iOff>=pIter->pLeaf->szLeaf ){ fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ){ if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; @@ -1773,10 +1755,12 @@ static void fts5SegIterInit( fts5SegIterSetNext(p, pIter); pIter->pSeg = pSeg; pIter->iLeafPgno = pSeg->pgnoFirst-1; - fts5SegIterNextPage(p, pIter); + do { + fts5SegIterNextPage(p, pIter); + }while( p->rc==SQLITE_OK && pIter->pLeaf && pIter->pLeaf->nn==4 ); } - if( p->rc==SQLITE_OK ){ + if( p->rc==SQLITE_OK && pIter->pLeaf ){ pIter->iLeafOffset = 4; assert( pIter->pLeaf!=0 ); assert_nc( pIter->pLeaf->nn>4 ); @@ -2163,7 +2147,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ Fts5Data *pLast = 0; int pgnoLast = 0; - if( pDlidx ){ + if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); @@ -2724,7 +2708,8 @@ static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ /* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. -** It is an error if leaf iLeafPgno does not exist or contains no rowids. +** It is an error if leaf iLeafPgno does not exist. Unless the db is +** a 'secure-delete' db, if it contains no rowids then this is also an error. */ static void fts5SegIterGotoPage( Fts5Index *p, /* FTS5 backend object */ @@ -2739,21 +2724,23 @@ static void fts5SegIterGotoPage( fts5DataRelease(pIter->pNextLeaf); pIter->pNextLeaf = 0; pIter->iLeafPgno = iLeafPgno-1; - fts5SegIterNextPage(p, pIter); - assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); - if( p->rc==SQLITE_OK && ALWAYS(pIter->pLeaf!=0) ){ + while( p->rc==SQLITE_OK ){ int iOff; - u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->szLeaf; - + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf==0 ) break; iOff = fts5LeafFirstRowidOff(pIter->pLeaf); - if( iOff<4 || iOff>=n ){ - p->rc = FTS5_CORRUPT; - }else{ - iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); - pIter->iLeafOffset = iOff; - fts5SegIterLoadNPos(p, pIter); + if( iOff>0 ){ + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->szLeaf; + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + }else{ + iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + fts5SegIterLoadNPos(p, pIter); + } + break; } } } @@ -3468,7 +3455,7 @@ static void fts5MultiIterNew( if( iLevel<0 ){ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); nSeg = pStruct->nSegment; - nSeg += (p->pHash ? 1 : 0); + nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH)); }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } @@ -3489,7 +3476,7 @@ static void fts5MultiIterNew( if( p->rc==SQLITE_OK ){ if( iLevel<0 ){ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; - if( p->pHash ){ + if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH) ){ /* Add a segment iterator for the current contents of the hash table. */ Fts5SegIter *pIter = &pNew->aSeg[iIter++]; fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); @@ -4244,7 +4231,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); - fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]); + fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]); if( p->rc==SQLITE_OK ){ /* Set the szLeaf field */ fts5PutU16(&buf.p[2], (u16)buf.n); @@ -4565,6 +4552,368 @@ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ return ret; } +/* +** Execute the SQL statement: +** +** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno); +** +** This is used when a secure-delete operation removes the last term +** from a segment leaf page. In that case the %_idx entry is removed +** too. This is done to ensure that if all instances of a token are +** removed from an fts5 database in secure-delete mode, no trace of +** the token itself remains in the database. +*/ +static void fts5SecureDeleteIdxEntry( + Fts5Index *p, /* FTS5 backend object */ + int iSegid, /* Id of segment to delete entry for */ + int iPgno /* Page number within segment */ +){ + if( iPgno!=1 ){ + assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE ); + if( p->pDeleteFromIdx==0 ){ + fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf( + "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)", + p->pConfig->zDb, p->pConfig->zName + )); + } + if( p->rc==SQLITE_OK ){ + sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid); + sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno); + sqlite3_step(p->pDeleteFromIdx); + p->rc = sqlite3_reset(p->pDeleteFromIdx); + } + } +} + +/* +** This is called when a secure-delete operation removes a position-list +** that overflows onto segment page iPgno of segment pSeg. This function +** rewrites node iPgno, and possibly one or more of its right-hand peers, +** to remove this portion of the position list. +** +** Output variable (*pbLastInDoclist) is set to true if the position-list +** removed is followed by a new term or the end-of-segment, or false if +** it is followed by another rowid/position list. +*/ +static void fts5SecureDeleteOverflow( + Fts5Index *p, + Fts5StructureSegment *pSeg, + int iPgno, + int *pbLastInDoclist +){ + int pgno; + Fts5Data *pLeaf = 0; + assert( iPgno!=1 ); + + *pbLastInDoclist = 1; + for(pgno=iPgno; p->rc==SQLITE_OK && pgno<=pSeg->pgnoLast; pgno++){ + i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); + int iNext = 0; + u8 *aPg = 0; + + pLeaf = fts5DataRead(p, iRowid); + if( pLeaf==0 ) break; + aPg = pLeaf->p; + + iNext = fts5GetU16(&aPg[0]); + if( iNext!=0 ){ + *pbLastInDoclist = 0; + } + if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){ + fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext); + } + + if( iNext==0 ){ + /* The page contains no terms or rowids. Replace it with an empty + ** page and move on to the right-hand peer. */ + const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04}; + fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty)); + fts5DataRelease(pLeaf); + pLeaf = 0; + }else{ + int nShift = iNext - 4; + int nPg; + + int nIdx = 0; + u8 *aIdx = 0; + + /* Unless the current page footer is 0 bytes in size (in which case + ** the new page footer will be as well), allocate and populate a + ** buffer containing the new page footer. Set stack variables aIdx + ** and nIdx accordingly. */ + if( pLeaf->nn>pLeaf->szLeaf ){ + int iFirst = 0; + int i1 = pLeaf->szLeaf; + int i2 = 0; + + aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2); + if( aIdx==0 ) break; + i1 += fts5GetVarint32(&aPg[i1], iFirst); + i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift); + if( i1nn ){ + memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1); + i2 += (pLeaf->nn-i1); + } + nIdx = i2; + } + + /* Modify the contents of buffer aPg[]. Set nPg to the new size + ** in bytes. The new page is always smaller than the old. */ + nPg = pLeaf->szLeaf - nShift; + memmove(&aPg[4], &aPg[4+nShift], nPg-4); + fts5PutU16(&aPg[2], nPg); + if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4); + if( nIdx>0 ){ + memcpy(&aPg[nPg], aIdx, nIdx); + nPg += nIdx; + } + sqlite3_free(aIdx); + + /* Write the new page to disk and exit the loop */ + assert( nPg>4 || fts5GetU16(aPg)==0 ); + fts5DataWrite(p, iRowid, aPg, nPg); + break; + } + } + fts5DataRelease(pLeaf); +} + + +/* +** This is called as part of flushing a delete to disk in 'secure-delete' +** mode. It edits the segments within the database described by argument +** pStruct to remove the entries for term zTerm, rowid iRowid. +*/ +static void fts5FlushSecureDelete( + Fts5Index *p, + Fts5Structure *pStruct, + const char *zTerm, + i64 iRowid +){ + const int f = FTS5INDEX_QUERY_SKIPHASH; + int nTerm = strlen(zTerm); + Fts5Iter *pIter = 0; /* Used to find term instance */ + + fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter); + if( fts5MultiIterEof(p, pIter)==0 ){ + i64 iThis = fts5MultiIterRowid(pIter); + if( iThisrc==SQLITE_OK + && fts5MultiIterEof(p, pIter)==0 + && iRowid==fts5MultiIterRowid(pIter) + ){ + Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; + int iSegid = pSeg->pSeg->iSegid; + u8 *aPg = pSeg->pLeaf->p; + int nPg = pSeg->pLeaf->nn; + int iPgIdx = pSeg->pLeaf->szLeaf; + + u64 iDelta = 0; + u64 iNextDelta = 0; + int iNextOff = 0; + int iOff = 0; + int nMove = 0; + int nIdx = 0; + + u8 *aIdx = 0; + + nIdx = nPg-iPgIdx; + aIdx = sqlite3Fts5MallocZero(&p->rc, nIdx+16); + if( aIdx ){ + int bLastInDoclist = 0; + int iIdx = 0; + int iStart = 0; + int iKeyOff = 0; + int iPrevKeyOff = 0; + int nShift = 0; + int iDelKeyOff = 0; /* Offset of deleted key, if any */ + memcpy(aIdx, &aPg[iPgIdx], nIdx); + + /* At this point segment iterator pSeg points to the entry + ** this function should remove from the b-tree segment. + ** + ** More specifically, pSeg->iLeafOffset is the offset of the + ** first byte in the position-list for the entry to remove. + ** Immediately before this comes two varints that will also + ** need to be removed: + ** + ** + the rowid or delta rowid value for the entry, and + ** + the size of the position list in bytes. + */ + { + int iSOP; + int nPos = 0; + if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){ + iStart = pSeg->iTermLeafOffset; + }else{ + iStart = fts5GetU16(&aPg[0]); + } + + iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta); + iSOP += fts5GetVarint32(&aPg[iSOP], nPos); + assert_nc( iSOP<=pSeg->iLeafOffset ); + while( iSOPiLeafOffset ){ + iStart = iSOP + (nPos/2); + iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta); + iSOP += fts5GetVarint32(&aPg[iSOP], nPos); + } + assert_nc( iSOP==pSeg->iLeafOffset ); + } + + iOff = iStart; + iNextOff = pSeg->iLeafOffset + pSeg->nPos; + if( iNextOff>=iPgIdx ){ + int pgno = pSeg->iLeafPgno+1; + fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist); + iNextOff = iPgIdx; + }else{ + /* Set bLastInDoclist to true if the entry being removed is the last + ** in its doclist. */ + for(iIdx=0, iKeyOff=0; iIdxiTermLeafOffset && pSeg->iLeafPgno==pSeg->iTermLeafPgno + ){ + /* The entry being removed was the only position list in its + ** doclist. Therefore the term needs to be removed as well. */ + int iKey = 0; + for(iIdx=0, iKeyOff=0; iIdxiStart ) break; + iKeyOff += iVal; + } + + iDelKeyOff = iOff = iKeyOff; + if( iNextOff!=iPgIdx ){ + int nPrefix = 0; + int nSuffix = 0; + int nPrefix2 = 0; + int nSuffix2 = 0; + + iDelKeyOff = iNextOff; + iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2); + iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2); + + if( iKey!=1 ){ + iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix); + } + iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix); + + nPrefix = MIN(nPrefix, nPrefix2); + nSuffix = (nPrefix2 + nSuffix2) - nPrefix; + + if( iKey!=1 ){ + iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix); + } + iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix); + if( nPrefix2>nPrefix ){ + memcpy(&aPg[iOff], &zTerm[nPrefix], nPrefix2-nPrefix); + iOff += (nPrefix2-nPrefix); + } + memcpy(&aPg[iOff], &aPg[iNextOff], nSuffix2); + iOff += nSuffix2; + iNextOff += nSuffix2; + } + }else if( iStart==4 ){ + assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno ); + /* The entry being removed may be the only position list in + ** its doclist. */ + int iPgno = pSeg->iLeafPgno-1; + + for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){ + Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno)); + int bEmpty = (pPg && pPg->nn==4); + fts5DataRelease(pPg); + if( bEmpty==0 ) break; + } + + if( iPgno==pSeg->iTermLeafPgno ){ + i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno); + Fts5Data *pTerm = fts5DataRead(p, iId); + if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){ + u8 *aTermIdx = &pTerm->p[pTerm->szLeaf]; + int nTermIdx = pTerm->nn - pTerm->szLeaf; + int iTermIdx = 0; + int iTermOff = 0; + + while( 1 ){ + u32 iVal = 0; + int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal); + iTermOff += iVal; + if( (iTermIdx+nByte)>=nTermIdx ) break; + iTermIdx += nByte; + } + nTermIdx = iTermIdx; + + memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx); + fts5PutU16(&pTerm->p[2], iTermOff); + + fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx); + if( nTermIdx==0 ){ + fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno); + } + } + fts5DataRelease(pTerm); + } + } + + nMove = nPg - iNextOff; + memmove(&aPg[iOff], &aPg[iNextOff], nMove); + iPgIdx -= (iNextOff - iOff); + nPg = iPgIdx; + fts5PutU16(&aPg[2], iPgIdx); + + nShift = iNextOff - iOff; + for(iIdx=0, iKeyOff=0, iPrevKeyOff=0; iIdxiOff ){ + iKeyOff -= nShift; + nShift = 0; + } + nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOff - iPrevKeyOff); + iPrevKeyOff = iKeyOff; + } + } + + if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){ + fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno); + } + + assert( nPg>4 || fts5GetU16(aPg)==0 ); + fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid, pSeg->iLeafPgno), aPg, nPg); + sqlite3_free(aIdx); + } + } + } + + fts5MultiIterFree(pIter); +} + + /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. @@ -4587,6 +4936,7 @@ static void fts5FlushOneHash(Fts5Index *p){ if( iSegid ){ const int pgsz = p->pConfig->pgsz; int eDetail = p->pConfig->eDetail; + int bSecureDelete = p->pConfig->bSecureDelete; Fts5StructureSegment *pSeg; /* New segment within pStruct */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ @@ -4609,29 +4959,53 @@ static void fts5FlushOneHash(Fts5Index *p){ } while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; /* Buffer containing term */ + int nTerm; /* Size of zTerm in bytes */ const u8 *pDoclist; /* Pointer to doclist for this term */ int nDoclist; /* Size of doclist in bytes */ - /* Write the term for this entry to disk. */ + /* Get the term and doclist for this entry. */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); - fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm); - if( p->rc!=SQLITE_OK ) break; + nTerm = (int)strlen(zTerm); + if( bSecureDelete==0 ){ + fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); + if( p->rc!=SQLITE_OK ) break; + assert( writer.bFirstRowidInPage==0 ); + } - assert( writer.bFirstRowidInPage==0 ); - if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ + if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ + int bTermWritten = !bSecureDelete; i64 iRowid = 0; - u64 iDelta = 0; + i64 iPrev = 0; int iOff = 0; /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ while( p->rc==SQLITE_OK && iOffrc!=SQLITE_OK ) break; + bTermWritten = 1; + assert( writer.bFirstRowidInPage==0 ); + } if( writer.bFirstRowidInPage ){ fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ @@ -4640,9 +5014,10 @@ static void fts5FlushOneHash(Fts5Index *p){ fts5WriteDlidxAppend(p, &writer, iRowid); if( p->rc!=SQLITE_OK ) break; }else{ - pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); + pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid-iPrev); } assert( pBuf->n<=pBuf->nSpace ); + iPrev = iRowid; if( eDetail==FTS5_DETAIL_NONE ){ if( iOffnLevel==0 ){ - fts5StructureAddLevel(&p->rc, &pStruct); - } - fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); - if( p->rc==SQLITE_OK ){ - pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; - pSeg->iSegid = iSegid; - pSeg->pgnoFirst = 1; - pSeg->pgnoLast = pgnoLast; - pStruct->nSegment++; + assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 ); + if( pgnoLast>0 ){ + /* Update the Fts5Structure. It is written back to the database by the + ** fts5StructureRelease() call below. */ + if( pStruct->nLevel==0 ){ + fts5StructureAddLevel(&p->rc, &pStruct); + } + fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); + if( p->rc==SQLITE_OK ){ + pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; + pSeg->iSegid = iSegid; + pSeg->pgnoFirst = 1; + pSeg->pgnoLast = pgnoLast; + pStruct->nSegment++; + } + fts5StructurePromote(p, 0, pStruct); } - fts5StructurePromote(p, 0, pStruct); } fts5IndexAutomerge(p, &pStruct, pgnoLast); @@ -5455,6 +5833,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ sqlite3_finalize(p->pIdxDeleter); sqlite3_finalize(p->pIdxSelect); sqlite3_finalize(p->pDataVersion); + sqlite3_finalize(p->pDeleteFromIdx); sqlite3Fts5HashFree(p->pHash); sqlite3_free(p->zDataTbl); sqlite3_free(p); @@ -5834,6 +6213,7 @@ static void fts5TestDlidxReverse( int pgno = fts5DlidxIterPgno(pDlidx); assert( pgno>iLeaf ); cksum1 += iRowid + ((i64)pgno<<32); + // printf("1: rowid=%lld pgno=%d\n", iRowid, pgno); } fts5DlidxIterFree(pDlidx); pDlidx = 0; @@ -5846,6 +6226,7 @@ static void fts5TestDlidxReverse( int pgno = fts5DlidxIterPgno(pDlidx); assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); cksum2 += iRowid + ((i64)pgno<<32); + // printf("2: rowid=%lld pgno=%d\n", iRowid, pgno); } fts5DlidxIterFree(pDlidx); pDlidx = 0; @@ -6085,6 +6466,7 @@ static void fts5IndexIntegrityCheckSegment( Fts5StructureSegment *pSeg /* Segment to check internal consistency */ ){ Fts5Config *pConfig = p->pConfig; + int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE); sqlite3_stmt *pStmt = 0; int rc2; int iIdxPrevLeaf = pSeg->pgnoFirst-1; @@ -6120,7 +6502,19 @@ static void fts5IndexIntegrityCheckSegment( ** is also a rowid pointer within the leaf page header, it points to a ** location before the term. */ if( pLeaf->nn<=pLeaf->szLeaf ){ - p->rc = FTS5_CORRUPT; + + if( nIdxTerm==0 + && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE + && pLeaf->nn==pLeaf->szLeaf + && pLeaf->nn==4 + ){ + /* special case - the very first page in a segment keeps its %_idx + ** entry even if all the terms are removed from it by secure-delete + ** operations. */ + }else{ + p->rc = FTS5_CORRUPT; + } + }else{ int iOff; /* Offset of first term on leaf */ int iRowidOff; /* Offset of first rowid on leaf */ @@ -6184,9 +6578,12 @@ static void fts5IndexIntegrityCheckSegment( ASSERT_SZLEAF_OK(pLeaf); if( iRowidOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; - }else{ + }else if( bSecureDelete==0 || iRowidOff>0 ){ + i64 iDlRowid = fts5DlidxIterRowid(pDlidx); fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; + if( iRowidrc = FTS5_CORRUPT; + } } fts5DataRelease(pLeaf); } diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 5392b3ba0f..13921ce49e 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -1623,6 +1623,8 @@ static int fts5UpdateMethod( Fts5Config *pConfig = pTab->p.pConfig; int eType0; /* value_type() of apVal[0] */ int rc = SQLITE_OK; /* Return code */ + int bUpdateOrDelete = 0; + /* A transaction must be open when this is called. */ assert( pTab->ts.eState==1 || pTab->ts.eState==2 ); @@ -1633,6 +1635,11 @@ static int fts5UpdateMethod( || sqlite3_value_type(apVal[0])==SQLITE_NULL ); assert( pTab->p.pConfig->pzErrmsg==0 ); + if( pConfig->pgsz==0 ){ + rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); + if( rc!=SQLITE_OK ) return rc; + } + pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; /* Put any active cursors into REQUIRE_SEEK state. */ @@ -1685,6 +1692,7 @@ static int fts5UpdateMethod( else if( nArg==1 ){ i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0); + bUpdateOrDelete = 1; } /* INSERT or UPDATE */ @@ -1700,6 +1708,7 @@ static int fts5UpdateMethod( if( eConflict==SQLITE_REPLACE && eType1==SQLITE_INTEGER ){ i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); + bUpdateOrDelete = 1; } fts5StorageInsert(&rc, pTab, apVal, pRowid); } @@ -1728,10 +1737,24 @@ static int fts5UpdateMethod( rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); fts5StorageInsert(&rc, pTab, apVal, pRowid); } + bUpdateOrDelete = 1; } } } + if( rc==SQLITE_OK + && bUpdateOrDelete + && pConfig->bSecureDelete + && pConfig->iVersion==FTS5_CURRENT_VERSION + ){ + rc = sqlite3Fts5StorageConfigValue( + pTab->pStorage, "version", 0, FTS5_CURRENT_VERSION_SECUREDELETE + ); + if( rc==SQLITE_OK ){ + pConfig->iVersion = FTS5_CURRENT_VERSION_SECUREDELETE; + } + } + pTab->p.pConfig->pzErrmsg = 0; return rc; } @@ -2591,6 +2614,7 @@ static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); fts5TripCursors(pTab); + pTab->p.pConfig->pgsz = 0; return sqlite3Fts5StorageRollback(pTab->pStorage); } diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 0f371dcfd9..9c012932da 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -594,6 +594,10 @@ proc nearset_rc {aCol args} { list } +proc dump {tname} { + execsql_pp "SELECT * FROM ${tname}_idx" + execsql_pp "SELECT id, quote(block), fts5_decode(id,block) FROM ${tname}_data" +} #------------------------------------------------------------------------- # Code for a simple Tcl tokenizer that supports synonyms at query time. diff --git a/ext/fts5/test/fts5secure.test b/ext/fts5/test/fts5secure.test new file mode 100644 index 0000000000..50d84cef79 --- /dev/null +++ b/ext/fts5/test/fts5secure.test @@ -0,0 +1,278 @@ +# 2023 Feb 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +source [file join [file dirname [info script]] fts5_common.tcl] +ifcapable !fts5 { finish_test ; return } +set ::testprefix fts5secure + +proc dump {tname} { + execsql_pp "SELECT * FROM ${tname}_idx" + execsql_pp "SELECT id, quote(block), fts5_decode(id,block) FROM ${tname}_data" +} + + +do_execsql_test 0.0 { + CREATE VIRTUAL TABLE t1 USING fts5(ab); + CREATE VIRTUAL TABLE v1 USING fts5vocab('t1', 'instance'); + INSERT INTO t1(rowid, ab) VALUES + (0,'abc'), (1,'abc'), (2,'abc'), (3,'abc'), (4,'def'); +} + +do_execsql_test 0.1 { + INSERT INTO t1(t1, rank) VALUES('secure-delete', 1); +} + +do_execsql_test 0.2 { + DELETE FROM t1 WHERE rowid=2; +} + +do_execsql_test 0.3 { + SELECT count(*) FROM t1_data +} 3 + +do_execsql_test 0.4 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +do_execsql_test 0.5 { + DELETE FROM t1 WHERE rowid=3; +} + +do_execsql_test 0.6 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +do_execsql_test 0.7 { + DELETE FROM t1 WHERE rowid=0; +} + +do_execsql_test 0.8 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +#---------------------------------- + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t2 USING fts5(ab); + INSERT INTO t2(rowid, ab) VALUES (5, 'key'), (6, 'value'); + INSERT INTO t2(t2, rank) VALUES('secure-delete', 1); +} + +#execsql_pp { SELECT id, quote(block) FROM t1_data } +#execsql_pp { SELECT segid, quote(term), pgno FROM t1_idx } + +do_execsql_test 1.1 { + DELETE FROM t2 WHERE rowid = 5; +} + +do_execsql_test 1.2 { + INSERT INTO t2(t2) VALUES('integrity-check'); +} + +do_execsql_test 1.3 { + DELETE FROM t2 WHERE rowid = 6; +} + +do_execsql_test 1.4 { + INSERT INTO t2(t2) VALUES('integrity-check'); +} + +do_execsql_test 1.5 { + SELECT * FROM t2('value'); + SELECT * FROM t2('v*'); +} + +do_execsql_test 1.6 { + SELECT * FROM t2('value') ORDER BY rowid DESC; + SELECT * FROM t2('v*') ORDER BY rowid DESC; +} +execsql_pp { + SELECT id, quote(block) FROM t2_data; +} + +#---------------------------------- + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft USING fts5(ab); + CREATE VIRTUAL TABLE vocab USING fts5vocab('ft', 'instance'); + INSERT INTO ft(rowid, ab) VALUES + (1, 'one'), + (2, 'two'), + (3, 'three'), + (4, 'four'), + (5, 'one one'), + (6, 'one two'), + (7, 'one three'), + (8, 'one four'), + (9, 'two one'), + (10, 'two two'), + (11, 'two three'), + (12, 'two four'), + (13, 'three one'), + (14, 'three two'), + (15, 'three three'), + (16, 'three four'); +} + +do_execsql_test 2.1 { + SELECT count(*) FROM ft_data; +} {3} + +do_execsql_test 2.2 { + INSERT INTO ft(ft, rank) VALUES('secure-delete', 1); +} + +do_execsql_test 2.3 { + DELETE FROM ft WHERE rowid=9; +} + +do_execsql_test 2.4 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +do_execsql_test 2.5 { + DELETE FROM ft WHERE ab LIKE '%two%' +} + +do_execsql_test 2.6 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +do_execsql_test 2.7 { + SELECT count(*) FROM ft_data; +} {3} + +#---------------------------------- +reset_db + +set ::vocab { + one two three four five six seven eight nine ten + eleven twelve thirteen fourteen fifteen sixteen + seventeen eighteen nineteen twenty +} +proc rnddoc {} { + set nVocab [llength $::vocab] + set ret [list] + for {set ii 0} {$ii < 8} {incr ii} { + lappend ret [lindex $::vocab [expr int(abs(rand()) * $nVocab)]] + } + set ret +} + +proc contains {list val} { + expr {[lsearch $list $val]>=0} +} + +foreach {tn pgsz} { + 2 64 + 1 1000 +} { + reset_db + db function rnddoc rnddoc + db function contains contains + + expr srand(1) + + do_execsql_test 3.$tn.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz); + WITH s(i) AS ( + VALUES(1) UNION SELECT i+1 FROM s WHERE i<20 + ) + INSERT INTO t1 SELECT rnddoc() FROM s; + } + + do_execsql_test 3.$tn.1 { + INSERT INTO t1(t1, rank) VALUES('secure-delete', 1); + } + + foreach {rowid} { + 6 16 3 4 9 14 13 7 20 15 19 10 11 2 5 18 17 1 12 8 + } { + + do_execsql_test 3.$tn.2.$rowid { + DELETE FROM t1 WHERE rowid=$rowid; + } + do_execsql_test 3.$tn.2.$rowid.ic { + INSERT INTO t1(t1) VALUES('integrity-check'); + } + + foreach v $::vocab { + do_execsql_test 3.$tn.2.$rowid.q.$v { + SELECT rowid FROM t1($v) + } [db eval {SELECT rowid FROM t1 WHERE contains(x, $v)}] + + do_execsql_test 3.$tn.2.$rowid.q.$v.DESC { + SELECT rowid FROM t1($v) ORDER BY 1 DESC + } [db eval {SELECT rowid FROM t1 WHERE contains(x, $v) ORDER BY 1 DESC}] + } + } +} + +do_execsql_test 3.3 { + INSERT INTO t1(x) VALUES('optimize'); + INSERT INTO t1(t1) VALUES('optimize'); + SELECT count(*) FROM t1_data; +} {3} + +#---------------------------------- +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('secure-delete', 1); +} + +set L1 [string repeat abcdefghij 10] +set L2 [string repeat 1234567890 10] + +do_execsql_test 4.1 { + INSERT INTO t1 VALUES('aa' || $L1 || ' ' || $L2); +} +do_execsql_test 4.2 { + DELETE FROM t1 WHERE rowid=1 +} +do_execsql_test 4.3 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +#---------------------------------- +reset_db +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + INSERT INTO t1(t1, rank) VALUES('secure-delete', 1); +} + +set doc "aa [string repeat {abc } 60]" + +do_execsql_test 5.1 { + BEGIN; + INSERT INTO t1 VALUES($doc); + INSERT INTO t1 VALUES('aa abc'); + COMMIT; +} + +do_execsql_test 5.2 { + DELETE FROM t1 WHERE rowid = 1; +} + +do_execsql_test 5.3 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +do_execsql_test 5.4 { SELECT rowid FROM t1('abc'); } 2 +do_execsql_test 5.5 { SELECT rowid FROM t1('aa'); } 2 + + +finish_test + diff --git a/ext/fts5/test/fts5secure2.test b/ext/fts5/test/fts5secure2.test new file mode 100644 index 0000000000..04ff66219c --- /dev/null +++ b/ext/fts5/test/fts5secure2.test @@ -0,0 +1,87 @@ +# 2023 Feb 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +source [file join [file dirname [info script]] fts5_common.tcl] +ifcapable !fts5 { finish_test ; return } +set ::testprefix fts5secure2 + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5(col); + INSERT INTO ft VALUES('data for the table'); + INSERT INTO ft VALUES('more of the same'); + INSERT INTO ft VALUES('and extra data'); +} + +do_execsql_test 1.1 { + SELECT * FROM ft_config +} {version 4} + +do_execsql_test 1.2 { + INSERT INTO ft(ft, rank) VALUES('secure-delete', 1); + SELECT * FROM ft_config; +} {secure-delete 1 version 4} + +do_execsql_test 1.3 { + INSERT INTO ft(ft, rank) VALUES('secure-delete', 1); + SELECT * FROM ft_config; +} {secure-delete 1 version 4} + +do_execsql_test 1.4 { + DELETE FROM ft WHERE rowid=2; + SELECT * FROM ft_config; +} {secure-delete 1 version 5} + +do_execsql_test 1.5 { + SELECT rowid, col FROM ft('data'); +} {1 {data for the table} 3 {and extra data}} + +db close +sqlite3 db test.db + +do_execsql_test 1.6 { + SELECT rowid, col FROM ft('data'); +} {1 {data for the table} 3 {and extra data}} + +#------------------------------------------------------------------------ + +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE ft USING fts5(col); + INSERT INTO ft VALUES('one zero one one zero'); + INSERT INTO ft(ft, rank) VALUES('secure-delete', 1); +} + +do_execsql_test 2.1 { + SELECT count(*) FROM ft_data WHERE block=X'00000004'; +} {0} + +do_execsql_test 2.2 { + UPDATE ft SET col = 'zero one zero zero one' WHERE rowid=1; +} + +do_execsql_test 2.3 { + SELECT count(*) FROM ft_data WHERE block=X'00000004'; +} {1} + +do_execsql_test 2.4 { + INSERT INTO ft VALUES('one zero zero one'); + DELETE FROM ft WHERE rowid=1; +} + +do_execsql_test 2.5 { + SELECT count(*) FROM ft_data WHERE block=X'00000004'; +} {2} + + +finish_test + + diff --git a/ext/fts5/test/fts5secure3.test b/ext/fts5/test/fts5secure3.test new file mode 100644 index 0000000000..70c2028df6 --- /dev/null +++ b/ext/fts5/test/fts5secure3.test @@ -0,0 +1,162 @@ +# 2023 Feb 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +source [file join [file dirname [info script]] fts5_common.tcl] +ifcapable !fts5 { finish_test ; return } +set ::testprefix fts5secure3 + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5(col); + INSERT INTO ft VALUES('data for the table'); + INSERT INTO ft VALUES('more of the same'); + INSERT INTO ft VALUES('and extra data'); + + INSERT INTO ft(ft, rank) VALUES('secure-delete', 1); +} + +do_execsql_test 1.1 { + BEGIN; + INSERT INTO ft(rowid, col) VALUES(0, 'the next data'); + DELETE FROM ft WHERE rowid=1; + DELETE FROM ft WHERE rowid=2; + INSERT INTO ft(rowid, col) VALUES(6, 'with some more of the same data'); + COMMIT; +} + +do_execsql_test 1.2 { + INSERT INTO ft(ft) VALUES('integrity-check'); +} + +#------------------------------------------------------------------------- + +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 64); + INSERT INTO t1(t1, rank) VALUES('secure-delete', 1); + BEGIN; + INSERT INTO t1 VALUES('the start'); +} +do_test 2.1 { + for {set i 0} {$i < 1000} {incr i} { + execsql { INSERT INTO t1 VALUES('the ' || hex(randomblob(3))) } + } + execsql { + INSERT INTO t1 VALUES('the end'); + COMMIT; + } +} {} + +do_execsql_test 2.2 { + DELETE FROM t1 WHERE rowid BETWEEN 2 AND 1000; +} + +do_execsql_test 2.3 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +do_execsql_test 2.6 { + INSERT INTO t1(rowid, x) VALUES(500, 'middle'); + INSERT INTO t1(rowid, x) VALUES(501, 'value'); + SELECT * FROM t1('the middle'); +} + +do_execsql_test 2.7 { + INSERT INTO t1(t1) VALUES('optimize'); +} + +do_execsql_test 2.8 { + SELECT count(*) FROM t1_data +} 4 + +#execsql_pp { SELECT id, quote(block), fts5_decode(id, block) FROM t1_data; } + +#------------------------------------------------------------------------- +# Tests with large/small rowid values. +# + +reset_db + +expr srand(0) + +set vocab { + Popper Poppins Popsicle Porfirio Porrima Porsche + Porter Portia Portland Portsmouth Portugal Portuguese + Poseidon Post PostgreSQL Potemkin Potomac Potsdam + Pottawatomie Potter Potts Pound Poussin Powell + PowerPC PowerPoint Powers Powhatan Poznan Prada + Prado Praetorian Prague Praia Prakrit Pratchett + Pratt Pravda Praxiteles Preakness Precambrian Preminger + Premyslid Prensa Prentice Pres Presbyterian Presbyterianism +} +proc newdoc {} { + for {set i 0} {$i<8} {incr i} { + lappend ret [lindex $::vocab [expr int(abs(rand()) * [llength $::vocab])]] + } + set ret +} +db func newdoc newdoc + +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE fff USING fts5(y); + INSERT INTO fff(fff, rank) VALUES('pgsz', 64); + + WITH s(x) AS ( VALUES(1) UNION ALL SELECT x+1 FROM s WHERE x<1000 ) + INSERT INTO fff(rowid, y) SELECT random() , newdoc() FROM s; + + WITH s(x) AS ( VALUES(1) UNION ALL SELECT x+1 FROM s WHERE x<1000 ) + INSERT INTO fff(rowid, y) SELECT random() , newdoc() FROM s; + + WITH s(x) AS ( VALUES(1) UNION ALL SELECT x+1 FROM s WHERE x<1000 ) + INSERT INTO fff(rowid, y) SELECT random() , newdoc() FROM s; + + INSERT INTO fff(fff, rank) VALUES('secure-delete', 1); +} + +proc lshuffle {in} { + set out [list] + while {[llength $in]>0} { + set idx [expr int(abs(rand()) * [llength $in])] + lappend out [lindex $in $idx] + set in [lreplace $in $idx $idx] + } + set out +} + +#dump fff + +set iTest 1 +foreach ii [lshuffle [db eval {SELECT rowid FROM fff}]] { + #if {$iTest==1} { dump fff } + #if {$iTest==1} { breakpoint } + do_execsql_test 3.1.$iTest.$ii { + DELETE FROM fff WHERE rowid=$ii; + } + #if {$iTest==1} { dump fff } + do_execsql_test 3.1.$iTest.$ii.ic { + INSERT INTO fff(fff) VALUES('integrity-check'); + } + #if {$iTest==1} { break } + incr iTest +} + +#execsql_pp { SELECT rowid FROM fff('post') ORDER BY rowid ASC } +#breakpoint +#execsql_pp { +# SELECT rowid FROM fff('post') ORDER BY rowid DESC +#} +# +#dump fff + + +finish_test + diff --git a/ext/fts5/test/fts5version.test b/ext/fts5/test/fts5version.test index 60ec81c03d..79fd94e6bc 100644 --- a/ext/fts5/test/fts5version.test +++ b/ext/fts5/test/fts5version.test @@ -38,20 +38,20 @@ do_execsql_test 1.3 { sqlite3_db_config db DEFENSIVE 0 do_execsql_test 1.4 { - UPDATE t1_config set v=5 WHERE k='version'; + UPDATE t1_config set v=6 WHERE k='version'; } do_test 1.5 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 6, expected 4 or 5) - run 'rebuild'}} do_test 1.6 { db close sqlite3 db test.db catchsql { INSERT INTO t1 VALUES('x y z') } -} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 6, expected 4 or 5) - run 'rebuild'}} do_test 1.7 { sqlite3_db_config db DEFENSIVE 0 @@ -59,7 +59,75 @@ do_test 1.7 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 0, expected 4 or 5) - run 'rebuild'}} + +do_test 1.8 { + sqlite3_db_config db DEFENSIVE 0 + execsql { INSERT INTO t1_config VALUES('version', 4) } + execsql { INSERT INTO t1(t1, rank) VALUES('secure-delete', 1) } +} {} + +do_execsql_test 1.10 { + SELECT * FROM t1_config +} {secure-delete 1 version 4} + +do_execsql_test 1.11 { + INSERT INTO t1(rowid, one) VALUES(123, 'one two three'); + DELETE FROM t1 WHERE rowid=123; + SELECT * FROM t1_config +} {secure-delete 1 version 5} + +do_execsql_test 1.11 { + INSERT INTO t1(t1) VALUES('rebuild'); + SELECT * FROM t1_config +} {secure-delete 1 version 4} + +do_execsql_test 1.12 { + SELECT * FROM t1_config +} {secure-delete 1 version 4} + +#------------------------------------------------------------------------- +reset_db + +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE xyz USING fts5(x); + INSERT INTO xyz(rowid, x) VALUES + (1, 'one document'), + (2, 'two document'), + (3, 'three document'), + (4, 'four document'), + (5, 'five document'), + (6, 'six document'); + + INSERT INTO xyz(xyz, rank) VALUES('secure-delete', 1); + SELECT v FROM xyz_config WHERE k='version'; +} {4} + +do_execsql_test 2.1 { + BEGIN; + INSERT INTO xyz(rowid, x) VALUES(7, 'seven document'); + SAVEPOINT one; + DELETE FROM xyz WHERE rowid = 4; +} + +do_execsql_test 2.2 { + SELECT v FROM xyz_config WHERE k='version'; +} {5} + +do_execsql_test 2.3 { + ROLLBACK TO one; + SELECT v FROM xyz_config WHERE k='version'; +} {4} + + +do_execsql_test 2.4 { + DELETE FROM xyz WHERE rowid = 3; + COMMIT; + SELECT v FROM xyz_config WHERE k='version'; +} {5} + + finish_test + diff --git a/manifest b/manifest index bc517744b1..ebb9c13a4c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C New\s#ifdef\sto\senable\sbuilding\swith\s-DSQLITE_OMIT_WINDOWFUNC. -D 2023-04-11T19:38:47.332 +C Add\sthe\s'secure-delete'\soption\sto\sfts5.\sUsed\sto\sconfigure\sfts5\sto\saggressively\sremove\sold\sfull-text-index\sentries\sbelonging\sto\sdeleted\sor\supdated\srows. +D 2023-04-12T17:40:44.865 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -86,14 +86,14 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h c132a9323f22a972c4c93a8d5a3d901113a6e612faf30ca8e695788438c5ca2a -F ext/fts5/fts5Int.h c0d46e399e345e35985b72a1c1af025973bfaa5b1e3563b0ce3bb0ce144a7ca3 +F ext/fts5/fts5Int.h f473de2bdae0977af0d6c8cce96e3666821b85efba5f6006c7732662c3aabcb3 F ext/fts5/fts5_aux.c 572d5ec92ba7301df2fea3258576332f2f4d2dfd66d8263afd157d9deceac480 F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 -F ext/fts5/fts5_config.c 501e7d3566bc92766b0e11c0109a7c5a6146bc41144195459af5422f6c2078aa +F ext/fts5/fts5_config.c 46af0b3c3c3f00bfc5bdd307434d7c5f0fa0678a034dd48345cd83b20068efbd F ext/fts5/fts5_expr.c 48e8e45261c6030cf5c77f606217a22722b1a4d0b34e2ba6cbfc386581627989 F ext/fts5/fts5_hash.c d4fb70940359f2120ccd1de7ffe64cc3efe65de9e8995b822cd536ff64c96982 -F ext/fts5/fts5_index.c df5b29576a409f673e54b470723d817df9d5167cff208c48ab9a3773cba6fa89 -F ext/fts5/fts5_main.c fe67b6fb2ef134d9dbfa3941c63f777d755b075449be1863cb913a7f8754cb69 +F ext/fts5/fts5_index.c 111da0b3226461111bf014a4e0c57dc61bcd7947b90584699ca1090f409fef45 +F ext/fts5/fts5_main.c b4dba04a36aaf9b8e8cef0100b6dbb422cc74753eacc11d6401cac7a87c0f38d F ext/fts5/fts5_storage.c 76c6085239eb44424004c022e9da17a5ecd5aaec859fba90ad47d3b08f4c8082 F ext/fts5/fts5_tcl.c b1445cbe69908c411df8084a10b2485500ac70a9c747cdc8cda175a3da59d8ae F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -104,7 +104,7 @@ F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d0988 F ext/fts5/fts5_vocab.c 12138e84616b56218532e3e8feb1d3e0e7ae845e33408dbe911df520424dc9d6 F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl b01c584144b5064f30e6c648145a2dd6bc440841 +F ext/fts5/test/fts5_common.tcl a9de9c2209cc4e7ae3c753e783504e67206c6c1467d08f209cd0c5923d3e8d8b F ext/fts5/test/fts5aa.test 5bd43427b7d08ce2e19c488a26534be450538b9232d4d5305049e8de236e9aa9 F ext/fts5/test/fts5ab.test 9205c839332c908aaad2b01ab8670ece8b161e8f2ec8a9fabf18ca9385880bb7 F ext/fts5/test/fts5ac.test a7aa7e1fefc6e1918aa4d3111d5c44a09177168e962c5fd2cca9620de8a7ed6d @@ -187,6 +187,9 @@ F ext/fts5/test/fts5rebuild.test 55d6f17715cddbf825680dd6551efbc72ed916d8cf1cde4 F ext/fts5/test/fts5restart.test 835ecc8f449e3919f72509ab58056d0cedca40d1fe04108ccf8ac4c2ba41f415 F ext/fts5/test/fts5rowid.test b8790ec170a8dc1942a15aef3db926a5f3061b1ff171013003d8297203a20ad6 F ext/fts5/test/fts5savepoint.test fc02929f238d02a22df4172625704e029f7c1e0e92e332d654375690f8e6e43f +F ext/fts5/test/fts5secure.test 214a561519d1b1817f146efd1057e2a97cc896e75c2accc77157d874154bda64 +F ext/fts5/test/fts5secure2.test 2e961d7eef939f294c56b5d895cac7f1c3a60b934ee2cfd5e5e620bdf1ba6bbc +F ext/fts5/test/fts5secure3.test fd73b98a6e11038960b84109fed42f9e0a098a31338c94d07c163244d4bd7254 F ext/fts5/test/fts5simple.test a298670508c1458b88ce6030440f26a30673931884eb5f4094ac1773b3ba217b F ext/fts5/test/fts5simple2.test 258a1b0c590409bfa5271e872c79572b319d2a56554d0585f68f146a0da603f0 F ext/fts5/test/fts5simple3.test d5c74a9d3ca71bd5dd5cacb7c55b86ea12cdddfc8b1910e3de2995206898380f @@ -204,7 +207,7 @@ F ext/fts5/test/fts5unicode3.test 590c72e18195bda2446133f9d82d04a4e89d094bba58c7 F ext/fts5/test/fts5unicode4.test 6463301d669f963c83988017aa354108be0b947d325aef58d3abddf27147b687 F ext/fts5/test/fts5unindexed.test 9021af86a0fb9fc616f7a69a996db0116e7936d0db63892db6bafabbec21af4d F ext/fts5/test/fts5update.test b8affd796e45c94a4d19ad5c26606ea06065a0f162a9562d9f005b5a80ccf0bc -F ext/fts5/test/fts5version.test c8f2cc105f0abf0224965f93e584633dee3e06c91478bc67e468f7cfdf97fd6a +F ext/fts5/test/fts5version.test d6e5a5897550afeccc2f8531d87404dc1c289ee89385dd4318dbdd75e71d7a67 F ext/fts5/test/fts5vocab.test 7ed80d9af1ddaaa1637da05e406327b5aac250848bc604c1c1cc667908b87760 F ext/fts5/test/fts5vocab2.test 681980e92e031c9f3fe8d9c149189e876c108da2fb0fb3a25bd8a9b94bff8f68 F ext/fts5/tool/fts5speed.tcl b0056f91a55b2d1a3684ec05729de92b042e2f85 @@ -2052,8 +2055,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c8fb143d64d8e823684cd26799080da4b42bef121ca3c6315b1803a593490926 -R 23a3c5b92c8f88b7b4f263831382c52f -U drh -Z 51e0cafc6ddabd2f402c0ebee1449b15 +P e1ff83fa2565334b28bd0d6582088c4ae0d2d9a590d973615a4a598683fe419c +R 77520373799013e82dff45638b253c47 +T *branch * fts5-secure-delete +T *sym-fts5-secure-delete * +T -sym-trunk * +U dan +Z 0c15d52b2e44a1574709121e2f6a7b6b # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 497bd4caa0..5f639c5e40 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e1ff83fa2565334b28bd0d6582088c4ae0d2d9a590d973615a4a598683fe419c \ No newline at end of file +4240fd09b717dbc69dffe3b88ec9149777ca4c3efa12f282af65be3af6fa5bb0 \ No newline at end of file -- 2.47.2