From: drh Date: Wed, 30 Mar 2011 01:43:00 +0000 (+0000) Subject: Move to an O(NlogN) algorithm for the priority queue. An insertion sort X-Git-Tag: version-3.7.6~60^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2a0e00b436f94594307cb989cc1235be3011864e;p=thirdparty%2Fsqlite.git Move to an O(NlogN) algorithm for the priority queue. An insertion sort was way too slow. FossilOrigin-Name: 7958cbba736a599c1293b06602eec43dfe4fd7d1 --- diff --git a/manifest b/manifest index 10d2ec544b..7aec386c02 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\ssupport\sfor\srowid. -D 2011-03-29T23:41:31.447 +C Move\sto\san\sO(NlogN)\salgorithm\sfor\sthe\spriority\squeue.\s\sAn\sinsertion\ssort\nwas\sway\stoo\sslow. +D 2011-03-30T01:43:00.780 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 6c96e694f446500449f683070b906de9fce17b88 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -202,7 +202,7 @@ F src/test_config.c 62f0f8f934b1d5c7e4cd4f506ae453a1117b47d7 F src/test_demovfs.c 0aed671636735116fc872c5b03706fd5612488b5 F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc F src/test_func.c cbdec5cededa0761daedde5baf06004a9bf416b5 -F src/test_fuzzer.c dcb1e78badcf6f469ae386ecbed0e287920699c6 +F src/test_fuzzer.c edc2aaa0f75ce49efef39bcd2df45138479b0992 F src/test_hexio.c 1237f000ec7a491009b1233f5c626ea71bce1ea2 F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c F src/test_intarray.c d879bbf8e4ce085ab966d1f3c896a7c8b4f5fc99 @@ -918,7 +918,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P dd41155bc7459cafc1a2d5c75233193abfbac05d -R 580275be924edc1c171a4ef2518a45c4 +P 2cf4158ff051916717fc2c0f4b6332d5f6ea6e3d +R 9250685bf3b3b21491f862d4fc8952d0 U drh -Z c835c92c0dd7f7913ad85caf0a4ef7ac +Z 0b9911f371ac67382b47460caefb0a30 diff --git a/manifest.uuid b/manifest.uuid index 398d9aafa8..dfc8e61028 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2cf4158ff051916717fc2c0f4b6332d5f6ea6e3d \ No newline at end of file +7958cbba736a599c1293b06602eec43dfe4fd7d1 \ No newline at end of file diff --git a/src/test_fuzzer.c b/src/test_fuzzer.c index 5287b1f457..ffdd61c9f6 100644 --- a/src/test_fuzzer.c +++ b/src/test_fuzzer.c @@ -66,6 +66,7 @@ struct fuzzer_stem { const fuzzer_rule *pRule; /* Current rule to apply */ int n; /* Apply pRule at this character offset */ fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */ + fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */ fuzzer_stem *pNext; /* Next stem in rCost order */ fuzzer_stem *pHash; /* Next stem with same hash on zBasis */ }; @@ -82,6 +83,7 @@ struct fuzzer_vtab { }; #define FUZZER_HASH 4001 /* Hash table size */ +#define FUZZER_NQUEUE 20 /* Number of slots on the stem queue */ /* A fuzzer cursor object */ struct fuzzer_cursor { @@ -89,10 +91,13 @@ struct fuzzer_cursor { sqlite3_int64 iRowid; /* The rowid of the current word */ fuzzer_vtab *pVtab; /* The virtual table this cursor belongs to */ fuzzer_cost rLimit; /* Maximum cost of any term */ - fuzzer_stem *pStem; /* Sorted list of stems for generating new terms */ + fuzzer_stem *pStem; /* Stem with smallest rCostX */ fuzzer_stem *pDone; /* Stems already processed to completion */ + fuzzer_stem *aQueue[FUZZER_NQUEUE]; /* Queue of stems with higher rCostX */ + int mxQueue; /* Largest used index in aQueue[] */ char *zBuf; /* Temporary use buffer */ int nBuf; /* Bytes allocated for zBuf */ + int nStem; /* Number of stems allocated */ fuzzer_rule nullRule; /* Null rule used first */ fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */ }; @@ -205,23 +210,35 @@ static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ return SQLITE_OK; } +/* +** Free all stems in a list. +*/ +static void fuzzerClearStemList(fuzzer_stem *pStem){ + while( pStem ){ + fuzzer_stem *pNext = pStem->pNext; + sqlite3_free(pStem); + pStem = pNext; + } +} + /* ** Free up all the memory allocated by a cursor. Set it rLimit to 0 ** to indicate that it is at EOF. */ static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){ - if( pCur->pStem==0 && pCur->pDone==0 ) clearHash = 0; - do{ - while( pCur->pStem ){ - fuzzer_stem *pStem = pCur->pStem; - pCur->pStem = pStem->pNext; - sqlite3_free(pStem); - } - pCur->pStem = pCur->pDone; - pCur->pDone = 0; - }while( pCur->pStem ); + int i; + fuzzerClearStemList(pCur->pStem); + fuzzerClearStemList(pCur->pDone); + for(i=0; iaQueue[i]); pCur->rLimit = (fuzzer_cost)0; - if( clearHash ) memset(pCur->apHash, 0, sizeof(pCur->apHash)); + if( clearHash && pCur->nStem ){ + pCur->mxQueue = 0; + pCur->pStem = 0; + pCur->pDone = 0; + memset(pCur->aQueue, 0, sizeof(pCur->aQueue)); + memset(pCur->apHash, 0, sizeof(pCur->apHash)); + } + pCur->nStem = 0; } /* @@ -280,7 +297,7 @@ static unsigned int fuzzerHash(const char *z){ ** Current cost of a stem */ static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){ - return pStem->rBaseCost + pStem->pRule->rCost; + return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost; } #if 0 @@ -304,7 +321,7 @@ static void fuzzerStemPrint( if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return; fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s", zPrefix, - pStem->zBasis, pStem->rBaseCost, zBuf, fuzzerCost(pStem), + pStem->zBasis, pStem->rBaseCost, zBuf, pStem->, zSuffix ); sqlite3_free(zBuf); @@ -349,6 +366,7 @@ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){ int rc = fuzzerSeen(pCur, pStem); if( rc<0 ) return -1; if( rc==0 ){ + fuzzerCost(pStem); return 1; } } @@ -361,31 +379,106 @@ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){ } /* -** Insert pNew into the list at pList. Return a pointer to the new +** The two input stem lists are both sorted in order of increasing +** rCostX. Merge them together into a single list, sorted by rCostX, and +** return a pointer to the head of that new list. +*/ +static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){ + fuzzer_stem head; + fuzzer_stem *pTail; + + pTail = &head; + while( pA && pB ){ + if( pA->rCostX<=pB->rCostX ){ + pTail->pNext = pA; + pTail = pA; + pA = pA->pNext; + }else{ + pTail->pNext = pB; + pTail = pB; + pB = pB->pNext; + } + } + if( pA==0 ){ + pTail->pNext = pB; + }else{ + pTail->pNext = pA; + } + return head.pNext; +} + +/* +** Load pCur->pStem with the lowest-cost stem. Return a pointer +** to the lowest-cost stem. +*/ +static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){ + fuzzer_stem *pBest, *pX; + int iBest; + int i; + + if( pCur->pStem==0 ){ + iBest = -1; + pBest = 0; + for(i=0; i<=pCur->mxQueue; i++){ + pX = pCur->aQueue[i]; + if( pX==0 ) continue; + if( pBest==0 || pBest->rCostX>pX->rCostX ){ + pBest = pX; + iBest = i; + } + } + if( pBest ){ + pCur->aQueue[iBest] = pBest->pNext; + pBest->pNext = 0; + pCur->pStem = pBest; + } + } + return pCur->pStem; +} + +/* +** Insert pNew into queue of pending stems. Then find the stem +** with the lowest rCostX and move it into pCur->pStem. ** list. The insert is done such the pNew is in the correct order ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost. */ -static fuzzer_stem *fuzzerInsert(fuzzer_stem *pList, fuzzer_stem *pNew){ - fuzzer_cost c1; +static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){ + fuzzer_stem *pX; + int i; - if( pList==0 ){ + /* If pCur->pStem exists and is greater than pNew, then make pNew + ** the new pCur->pStem and insert the old pCur->pStem instead. + */ + if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){ pNew->pNext = 0; - return pNew; + pCur->pStem = pNew; + pNew = pX; } - c1 = fuzzerCost(pNew); - if( c1 <= fuzzerCost(pList) ){ - pNew->pNext = pList; - return pNew; - }else{ - fuzzer_stem *pPrev; - pPrev = pList; - while( pPrev->pNext && fuzzerCost(pPrev->pNext)pNext; + + /* Insert the new value */ + pNew->pNext = 0; + pX = pNew; + for(i=0; i<=pCur->mxQueue; i++){ + if( pCur->aQueue[i] ){ + pX = fuzzerMergeStems(pX, pCur->aQueue[i]); + pCur->aQueue[i] = 0; + }else{ + pCur->aQueue[i] = pX; + break; + } + } + if( i>pCur->mxQueue ){ + if( imxQueue = i; + pCur->aQueue[i] = pX; + }else{ + assert( pCur->mxQueue==FUZZER_NQUEUE-1 ); + pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]); + pCur->aQueue[FUZZER_NQUEUE-1] = pX; } - pNew->pNext = pPrev->pNext; - pPrev->pNext = pNew; - return pList; } + + return fuzzerLowestCostStem(pCur); } /* @@ -408,10 +501,11 @@ static fuzzer_stem *fuzzerNewStem( memcpy(pNew->zBasis, zWord, pNew->nBasis+1); pNew->pRule = pCur->pVtab->pRule; pNew->n = -1; - pNew->rBaseCost = rBaseCost; + pNew->rBaseCost = pNew->rCostX = rBaseCost; h = fuzzerHash(pNew->zBasis); pNew->pHash = pCur->apHash[h]; pCur->apHash[h] = pNew; + pCur->nStem++; return pNew; } @@ -430,17 +524,16 @@ static int fuzzerNext(sqlite3_vtab_cursor *cur){ ** a new stem and insert the new stem into the priority queue. */ pStem = pCur->pStem; - if( fuzzerCost(pStem)>0 ){ + if( pStem->rCostX>0 ){ rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf); if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM; - pNew = fuzzerNewStem(pCur, pCur->zBuf, fuzzerCost(pStem)); + pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX); if( pNew ){ if( fuzzerAdvance(pCur, pNew)==0 ){ pNew->pNext = pCur->pDone; pCur->pDone = pNew; }else{ - pCur->pStem = fuzzerInsert(pStem, pNew); - if( pCur->pStem==pNew ){ + if( fuzzerInsert(pCur, pNew)==pNew ){ return SQLITE_OK; } } @@ -454,17 +547,18 @@ static int fuzzerNext(sqlite3_vtab_cursor *cur){ */ while( (pStem = pCur->pStem)!=0 ){ if( fuzzerAdvance(pCur, pStem) ){ - pCur->pStem = pStem = fuzzerInsert(pStem->pNext, pStem); + pCur->pStem = 0; + pStem = fuzzerInsert(pCur, pStem); if( (rc = fuzzerSeen(pCur, pStem))!=0 ){ if( rc<0 ) return SQLITE_NOMEM; continue; } return SQLITE_OK; /* New word found */ } - pCur->pStem = pStem->pNext; + pCur->pStem = 0; pStem->pNext = pCur->pDone; pCur->pDone = pStem; - if( pCur->pStem ){ + if( fuzzerLowestCostStem(pCur) ){ rc = fuzzerSeen(pCur, pCur->pStem); if( rc<0 ) return SQLITE_NOMEM; if( rc==0 ){ @@ -531,7 +625,7 @@ static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT); }else if( i==1 ){ /* the "distance" column */ - sqlite3_result_int(ctx, fuzzerCost(pCur->pStem)); + sqlite3_result_int(ctx, pCur->pStem->rCostX); }else{ /* All other columns are NULL */ sqlite3_result_null(ctx);