Move to an O(NlogN) algorithm for the priority queue. An insertion sort

author drh <drh@noemail.net>

Wed, 30 Mar 2011 01:43:00 +0000 (01:43 +0000)

committer drh <drh@noemail.net>

Wed, 30 Mar 2011 01:43:00 +0000 (01:43 +0000)
author drh <drh@noemail.net>
Wed, 30 Mar 2011 01:43:00 +0000 (01:43 +0000)
committer drh <drh@noemail.net>
Wed, 30 Mar 2011 01:43:00 +0000 (01:43 +0000)
diff --git a/manifest b/manifest

index 10d2ec544b4db9533658841fcdbd58b7c962786f..7aec386c02a1a2e481dbe7372f71c1657fd88f1b 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\ssupport\sfor\srowid.
-D 2011-03-29T23:41:31.447
+C Move\sto\san\sO(NlogN)\salgorithm\sfor\sthe\spriority\squeue.\s\sAn\sinsertion\ssort\nwas\sway\stoo\sslow.
+D 2011-03-30T01:43:00.780
  F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
  F Makefile.in 6c96e694f446500449f683070b906de9fce17b88
  F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -202,7 +202,7 @@ F src/test_config.c 62f0f8f934b1d5c7e4cd4f506ae453a1117b47d7
  F src/test_demovfs.c 0aed671636735116fc872c5b03706fd5612488b5
  F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc
  F src/test_func.c cbdec5cededa0761daedde5baf06004a9bf416b5
-F src/test_fuzzer.c dcb1e78badcf6f469ae386ecbed0e287920699c6
+F src/test_fuzzer.c edc2aaa0f75ce49efef39bcd2df45138479b0992
  F src/test_hexio.c 1237f000ec7a491009b1233f5c626ea71bce1ea2
  F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c
  F src/test_intarray.c d879bbf8e4ce085ab966d1f3c896a7c8b4f5fc99
@@ -918,7 +918,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
  F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
  F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
  F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P dd41155bc7459cafc1a2d5c75233193abfbac05d
-R 580275be924edc1c171a4ef2518a45c4
+P 2cf4158ff051916717fc2c0f4b6332d5f6ea6e3d
+R 9250685bf3b3b21491f862d4fc8952d0
  U drh
-Z c835c92c0dd7f7913ad85caf0a4ef7ac
+Z 0b9911f371ac67382b47460caefb0a30
diff --git a/manifest.uuid b/manifest.uuid

index 398d9aafa88b966ca4112fccfdcf68303d2828d9..dfc8e61028dc49baea3303b9c548c8c61c92e2cf 100644 (file)
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-2cf4158ff051916717fc2c0f4b6332d5f6ea6e3d
-\ No newline at end of file
+7958cbba736a599c1293b06602eec43dfe4fd7d1
+\ No newline at end of file
diff --git a/src/test_fuzzer.c b/src/test_fuzzer.c

index 5287b1f457d09cf51c6711fa90d741f46f8bb1d2..ffdd61c9f63b5cbb82a122a444da851933e8c7a5 100644 (file)
--- a/src/test_fuzzer.c
+++ b/src/test_fuzzer.c
@@ -66,6 +66,7 @@ struct fuzzer_stem {
    const fuzzer_rule *pRule;  /* Current rule to apply */
    int n;                     /* Apply pRule at this character offset */
    fuzzer_cost rBaseCost;     /* Base cost of getting to zBasis */
+  fuzzer_cost rCostX;        /* Precomputed rBaseCost + pRule->rCost */
    fuzzer_stem *pNext;        /* Next stem in rCost order */
    fuzzer_stem *pHash;        /* Next stem with same hash on zBasis */
  };
@@ -82,6 +83,7 @@ struct fuzzer_vtab {
  };
  
  #define FUZZER_HASH  4001    /* Hash table size */
+#define FUZZER_NQUEUE  20    /* Number of slots on the stem queue */
  
  /* A fuzzer cursor object */
  struct fuzzer_cursor {
@@ -89,10 +91,13 @@ struct fuzzer_cursor {
    sqlite3_int64 iRowid;      /* The rowid of the current word */
    fuzzer_vtab *pVtab;        /* The virtual table this cursor belongs to */
    fuzzer_cost rLimit;        /* Maximum cost of any term */
-  fuzzer_stem *pStem;        /* Sorted list of stems for generating new terms */
+  fuzzer_stem *pStem;        /* Stem with smallest rCostX */
    fuzzer_stem *pDone;        /* Stems already processed to completion */
+  fuzzer_stem *aQueue[FUZZER_NQUEUE];  /* Queue of stems with higher rCostX */
+  int mxQueue;               /* Largest used index in aQueue[] */
    char *zBuf;                /* Temporary use buffer */
    int nBuf;                  /* Bytes allocated for zBuf */
+  int nStem;                 /* Number of stems allocated */
    fuzzer_rule nullRule;      /* Null rule used first */
    fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */
  };
@@ -205,23 +210,35 @@ static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
    return SQLITE_OK;
  }
  
+/*
+** Free all stems in a list.
+*/
+static void fuzzerClearStemList(fuzzer_stem *pStem){
+  while( pStem ){
+    fuzzer_stem *pNext = pStem->pNext;
+    sqlite3_free(pStem);
+    pStem = pNext;
+  }
+}
+
  /*
  ** Free up all the memory allocated by a cursor.  Set it rLimit to 0
  ** to indicate that it is at EOF.
  */
  static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){
-  if( pCur->pStem==0 && pCur->pDone==0 ) clearHash = 0;
-  do{
-    while( pCur->pStem ){
-      fuzzer_stem *pStem = pCur->pStem;
-      pCur->pStem = pStem->pNext;
-      sqlite3_free(pStem);
-    }
-    pCur->pStem = pCur->pDone;
-    pCur->pDone = 0;
-  }while( pCur->pStem );
+  int i;
+  fuzzerClearStemList(pCur->pStem);
+  fuzzerClearStemList(pCur->pDone);
+  for(i=0; i<FUZZER_NQUEUE; i++) fuzzerClearStemList(pCur->aQueue[i]);
    pCur->rLimit = (fuzzer_cost)0;
-  if( clearHash ) memset(pCur->apHash, 0, sizeof(pCur->apHash));
+  if( clearHash && pCur->nStem ){
+    pCur->mxQueue = 0;
+    pCur->pStem = 0;
+    pCur->pDone = 0;
+    memset(pCur->aQueue, 0, sizeof(pCur->aQueue));
+    memset(pCur->apHash, 0, sizeof(pCur->apHash));
+  }
+  pCur->nStem = 0;
  }
  
  /*
@@ -280,7 +297,7 @@ static unsigned int fuzzerHash(const char *z){
  ** Current cost of a stem
  */
  static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){
-  return pStem->rBaseCost + pStem->pRule->rCost;
+  return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost;
  }
  
  #if 0
@@ -304,7 +321,7 @@ static void fuzzerStemPrint(
      if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return;
      fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s",
        zPrefix,
-      pStem->zBasis, pStem->rBaseCost, zBuf, fuzzerCost(pStem),
+      pStem->zBasis, pStem->rBaseCost, zBuf, pStem->,
        zSuffix
      );
      sqlite3_free(zBuf);
@@ -349,6 +366,7 @@ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
          int rc = fuzzerSeen(pCur, pStem);
          if( rc<0 ) return -1;
          if( rc==0 ){
+          fuzzerCost(pStem);
            return 1;
          }
        }
@@ -361,31 +379,106 @@ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
  }
  
  /*
-** Insert pNew into the list at pList.  Return a pointer to the new
+** The two input stem lists are both sorted in order of increasing
+** rCostX.  Merge them together into a single list, sorted by rCostX, and
+** return a pointer to the head of that new list.
+*/
+static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){
+  fuzzer_stem head;
+  fuzzer_stem *pTail;
+
+  pTail =  &head;
+  while( pA && pB ){
+    if( pA->rCostX<=pB->rCostX ){
+      pTail->pNext = pA;
+      pTail = pA;
+      pA = pA->pNext;
+    }else{
+      pTail->pNext = pB;
+      pTail = pB;
+      pB = pB->pNext;
+    }
+  }
+  if( pA==0 ){
+    pTail->pNext = pB;
+  }else{
+    pTail->pNext = pA;
+  }
+  return head.pNext;
+}
+
+/*
+** Load pCur->pStem with the lowest-cost stem.  Return a pointer
+** to the lowest-cost stem.
+*/
+static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){
+  fuzzer_stem *pBest, *pX;
+  int iBest;
+  int i;
+
+  if( pCur->pStem==0 ){
+    iBest = -1;
+    pBest = 0;
+    for(i=0; i<=pCur->mxQueue; i++){
+      pX = pCur->aQueue[i];
+      if( pX==0 ) continue;
+      if( pBest==0 || pBest->rCostX>pX->rCostX ){
+        pBest = pX;
+        iBest = i;
+      }
+    } 
+    if( pBest ){
+      pCur->aQueue[iBest] = pBest->pNext;
+      pBest->pNext = 0;
+      pCur->pStem = pBest;
+    }
+  }
+  return pCur->pStem;
+}
+
+/*
+** Insert pNew into queue of pending stems.  Then find the stem
+** with the lowest rCostX and move it into pCur->pStem.
  ** list.  The insert is done such the pNew is in the correct order
  ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
  */
-static fuzzer_stem *fuzzerInsert(fuzzer_stem *pList, fuzzer_stem *pNew){
-  fuzzer_cost c1;
+static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){
+  fuzzer_stem *pX;
+  int i;
  
-  if( pList==0 ){
+  /* If pCur->pStem exists and is greater than pNew, then make pNew
+  ** the new pCur->pStem and insert the old pCur->pStem instead.
+  */
+  if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){
      pNew->pNext = 0;
-    return pNew;
+    pCur->pStem = pNew;
+    pNew = pX;
    }
-  c1 = fuzzerCost(pNew);
-  if( c1 <= fuzzerCost(pList) ){
-    pNew->pNext = pList;
-    return pNew;
-  }else{
-    fuzzer_stem *pPrev;
-    pPrev = pList;
-    while( pPrev->pNext && fuzzerCost(pPrev->pNext)<c1 ){
-      pPrev = pPrev->pNext;
+
+  /* Insert the new value */
+  pNew->pNext = 0;
+  pX = pNew;
+  for(i=0; i<=pCur->mxQueue; i++){
+    if( pCur->aQueue[i] ){
+      pX = fuzzerMergeStems(pX, pCur->aQueue[i]);
+      pCur->aQueue[i] = 0;
+    }else{
+      pCur->aQueue[i] = pX;
+      break;
+    }
+  }
+  if( i>pCur->mxQueue ){
+    if( i<FUZZER_NQUEUE ){
+      pCur->mxQueue = i;
+      pCur->aQueue[i] = pX;
+    }else{
+      assert( pCur->mxQueue==FUZZER_NQUEUE-1 );
+      pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]);
+      pCur->aQueue[FUZZER_NQUEUE-1] = pX;
      }
-    pNew->pNext = pPrev->pNext;
-    pPrev->pNext = pNew;
-    return pList;
    }
+
+  return fuzzerLowestCostStem(pCur);
  }
  
  /*
@@ -408,10 +501,11 @@ static fuzzer_stem *fuzzerNewStem(
    memcpy(pNew->zBasis, zWord, pNew->nBasis+1);
    pNew->pRule = pCur->pVtab->pRule;
    pNew->n = -1;
-  pNew->rBaseCost = rBaseCost;
+  pNew->rBaseCost = pNew->rCostX = rBaseCost;
    h = fuzzerHash(pNew->zBasis);
    pNew->pHash = pCur->apHash[h];
    pCur->apHash[h] = pNew;
+  pCur->nStem++;
    return pNew;
  }
  
@@ -430,17 +524,16 @@ static int fuzzerNext(sqlite3_vtab_cursor *cur){
    ** a new stem and insert the new stem into the priority queue.
    */
    pStem = pCur->pStem;
-  if( fuzzerCost(pStem)>0 ){
+  if( pStem->rCostX>0 ){
      rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf);
      if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM;
-    pNew = fuzzerNewStem(pCur, pCur->zBuf, fuzzerCost(pStem));
+    pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX);
      if( pNew ){
        if( fuzzerAdvance(pCur, pNew)==0 ){
          pNew->pNext = pCur->pDone;
          pCur->pDone = pNew;
        }else{
-        pCur->pStem = fuzzerInsert(pStem, pNew);
-        if( pCur->pStem==pNew ){
+        if( fuzzerInsert(pCur, pNew)==pNew ){
            return SQLITE_OK;
          }
        }
@@ -454,17 +547,18 @@ static int fuzzerNext(sqlite3_vtab_cursor *cur){
    */
    while( (pStem = pCur->pStem)!=0 ){
      if( fuzzerAdvance(pCur, pStem) ){
-      pCur->pStem = pStem = fuzzerInsert(pStem->pNext, pStem);
+      pCur->pStem = 0;
+      pStem = fuzzerInsert(pCur, pStem);
        if( (rc = fuzzerSeen(pCur, pStem))!=0 ){
          if( rc<0 ) return SQLITE_NOMEM;
          continue;
        }
        return SQLITE_OK;  /* New word found */
      }
-    pCur->pStem = pStem->pNext;
+    pCur->pStem = 0;
      pStem->pNext = pCur->pDone;
      pCur->pDone = pStem;
-    if( pCur->pStem ){
+    if( fuzzerLowestCostStem(pCur) ){
        rc = fuzzerSeen(pCur, pCur->pStem);
        if( rc<0 ) return SQLITE_NOMEM;
        if( rc==0 ){
@@ -531,7 +625,7 @@ static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
      sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT);
    }else if( i==1 ){
      /* the "distance" column */
-    sqlite3_result_int(ctx, fuzzerCost(pCur->pStem));
+    sqlite3_result_int(ctx, pCur->pStem->rCostX);
    }else{
      /* All other columns are NULL */
      sqlite3_result_null(ctx);
author	drh <drh@noemail.net>
	Wed, 30 Mar 2011 01:43:00 +0000 (01:43 +0000)
committer	drh <drh@noemail.net>
	Wed, 30 Mar 2011 01:43:00 +0000 (01:43 +0000)
manifest		patch \| blob \| blame \| history
manifest.uuid		patch \| blob \| blame \| history
src/test_fuzzer.c		patch \| blob \| blame \| history