]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Split Lookups in Hash Cache and Chain Table into Two Loops
authorW. Felix Handte <w@felixhandte.com>
Thu, 20 Aug 2020 16:31:25 +0000 (12:31 -0400)
committerW. Felix Handte <w@felixhandte.com>
Thu, 10 Sep 2020 22:51:52 +0000 (18:51 -0400)
Sliiiight speedup.

lib/compress/zstd_lazy.c

index d8ed30a937462f69291ab2327f5b4bd282f1e5f5..d393984014e2a0f17d39e7fcb4f59e6445da9f30 100644 (file)
@@ -570,30 +570,32 @@ size_t ZSTD_HcFindBestMatch_generic (
     }
 
     if (dictMode == ZSTD_dedicatedDictSearch) {
-        const U32 ddsChainSize         = (1 << dms->cParams.chainLog);
-        const U32 ddsChainMask         = ddsChainSize - 1;
-        const U32 ddsLowestIndex       = dms->window.dictLimit;
-        const BYTE* const ddsBase      = dms->window.base;
-        const BYTE* const ddsEnd       = dms->window.nextSrc;
-        const U32 ddsSize              = (U32)(ddsEnd - ddsBase);
-        const U32 ddsIndexDelta        = dictLimit - ddsSize;
-        const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0;
-        const U32 bucketSize           = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
-        U32 attemptNb;
-
-        matchIndex = dms->hashTable[ddsIdx];
-
-        /* Empty chain */
-        if (!matchIndex)
-            return ml;
-
-        for (attemptNb = 0; attemptNb < bucketSize; attemptNb++) {
-            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + attemptNb]);
+        const U32 ddsChainSize    = (1 << dms->cParams.chainLog);
+        const U32 ddsChainMask    = ddsChainSize - 1;
+        const U32 ddsLowestIndex  = dms->window.dictLimit;
+        const BYTE* const ddsBase = dms->window.base;
+        const BYTE* const ddsEnd  = dms->window.nextSrc;
+        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+        const U32 ddsIndexDelta   = dictLimit - ddsSize;
+        const U32 ddsMinChain     = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0;
+        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+        const U32 bucketLimit     = nbAttempts < bucketSize ? nbAttempts : bucketSize;
+        U32 ddsAttempt;
+
+        for (ddsAttempt = 0; ddsAttempt < bucketSize; ddsAttempt++) {
+            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
         }
 
-        for (attemptNb = 1; (matchIndex>ddsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) {
+        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
             size_t currentMl=0;
-            const BYTE* const match = ddsBase + matchIndex;
+            const BYTE* match;
+            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+            match = ddsBase + matchIndex;
+
+            if (matchIndex < ddsLowestIndex) {
+                return ml;
+            }
+
             assert(match+4 <= ddsEnd);
             if (MEM_read32(match) == MEM_read32(ip)) {
                 /* assumption : matchIndex <= dictLimit-4 (by table construction) */
@@ -604,17 +606,38 @@ size_t ZSTD_HcFindBestMatch_generic (
             if (currentMl > ml) {
                 ml = currentMl;
                 *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
-                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+                if (ip+currentMl == iLimit) {
+                    /* best possible, avoids read overflow on next attempt */
+                    return ml;
+                }
             }
 
             if (matchIndex <= ddsMinChain) {
+                return ml;
+            }
+        }
+
+        for ( ; (ddsAttempt < nbAttempts) & (matchIndex >= ddsMinChain); ddsAttempt++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->chainTable[matchIndex & ddsChainMask];
+            match = ddsBase + matchIndex;
+
+            if (matchIndex < ddsLowestIndex) {
                 break;
             }
 
-            if (attemptNb < bucketSize) {
-                matchIndex = dms->hashTable[ddsIdx + attemptNb];
-            } else {
-                matchIndex = dms->chainTable[matchIndex & ddsChainMask];
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
             }
         }
     } else if (dictMode == ZSTD_dictMatchState) {