From: W. Felix Handte Date: Thu, 20 Aug 2020 16:31:25 +0000 (-0400) Subject: Split Lookups in Hash Cache and Chain Table into Two Loops X-Git-Tag: v1.4.7~81^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e8b4011b52c1e4ea4db46f52b77842530025841d;p=thirdparty%2Fzstd.git Split Lookups in Hash Cache and Chain Table into Two Loops Sliiiight speedup. --- diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index d8ed30a93..d39398401 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -570,30 +570,32 @@ size_t ZSTD_HcFindBestMatch_generic ( } if (dictMode == ZSTD_dedicatedDictSearch) { - const U32 ddsChainSize = (1 << dms->cParams.chainLog); - const U32 ddsChainMask = ddsChainSize - 1; - const U32 ddsLowestIndex = dms->window.dictLimit; - const BYTE* const ddsBase = dms->window.base; - const BYTE* const ddsEnd = dms->window.nextSrc; - const U32 ddsSize = (U32)(ddsEnd - ddsBase); - const U32 ddsIndexDelta = dictLimit - ddsSize; - const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0; - const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); - U32 attemptNb; - - matchIndex = dms->hashTable[ddsIdx]; - - /* Empty chain */ - if (!matchIndex) - return ml; - - for (attemptNb = 0; attemptNb < bucketSize; attemptNb++) { - PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + attemptNb]); + const U32 ddsChainSize = (1 << dms->cParams.chainLog); + const U32 ddsChainMask = ddsChainSize - 1; + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize ? nbAttempts : bucketSize; + U32 ddsAttempt; + + for (ddsAttempt = 0; ddsAttempt < bucketSize; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); } - for (attemptNb = 1; (matchIndex>ddsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) { + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { size_t currentMl=0; - const BYTE* const match = ddsBase + matchIndex; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (matchIndex < ddsLowestIndex) { + return ml; + } + assert(match+4 <= ddsEnd); if (MEM_read32(match) == MEM_read32(ip)) { /* assumption : matchIndex <= dictLimit-4 (by table construction) */ @@ -604,17 +606,38 @@ size_t ZSTD_HcFindBestMatch_generic ( if (currentMl > ml) { ml = currentMl; *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } } if (matchIndex <= ddsMinChain) { + return ml; + } + } + + for ( ; (ddsAttempt < nbAttempts) & (matchIndex >= ddsMinChain); ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[matchIndex & ddsChainMask]; + match = ddsBase + matchIndex; + + if (matchIndex < ddsLowestIndex) { break; } - if (attemptNb < bucketSize) { - matchIndex = dms->hashTable[ddsIdx + attemptNb]; - } else { - matchIndex = dms->chainTable[matchIndex & ddsChainMask]; + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } } } else if (dictMode == ZSTD_dictMatchState) {