From: W. Felix Handte Date: Mon, 13 Dec 2021 19:48:26 +0000 (-0500) Subject: Rewrite `step` to Track Increment Between Pairs of Positions X-Git-Tag: v1.5.1~1^2~16^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6ca5f424025d0f34cf1234cde0ba13b20469da46;p=thirdparty%2Fzstd.git Rewrite `step` to Track Increment Between Pairs of Positions The position updates are rewritten from `ip[N] = ip[N-1] + step` to be `ip[N] = ip[N-2] + step`. This lets us only deal with the asymmetric spacing of gaps at setup and then we only have to keep a single `step` variable. This seems to work quite well on GCC and Clang! --- diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index b8e578bc7..852f4286f 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -99,7 +99,7 @@ ZSTD_compressBlock_fast_noDict_generic( U32* const hashTable = ms->hashTable; U32 const hlog = cParams->hashLog; /* support stepSize of 0 */ - size_t const stepSize = cParams->targetLength + !(cParams->targetLength); + size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); @@ -128,8 +128,11 @@ ZSTD_compressBlock_fast_noDict_generic( const BYTE* match0; size_t mLength; - size_t step0; - size_t step1; + /* ip0 and ip1 are always adjacent. The targetLength skipping and + * uncompressibility acceleration is applied to every other position, + * matching the behavior of #1562. step therefore represents the gap + * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */ + size_t step; const BYTE* nextStep; const size_t kStepIncr = (1 << (kSearchStrength - 1)); @@ -145,14 +148,13 @@ ZSTD_compressBlock_fast_noDict_generic( /* start each op */ _start: /* Requires: ip0 */ - step0 = 1; - step1 = stepSize; + step = stepSize; nextStep = ip0 + kStepIncr; /* calculate positions, ip0 - anchor == 0, so we skip step calc */ - ip1 = ip0 + step0; - ip2 = ip1 + step1; - ip3 = ip2 + step0; + ip1 = ip0 + 1; + ip2 = ip0 + step; + ip3 = ip2 + 1; if (ip3 >= ilimit) { goto _cleanup; @@ -236,16 +238,15 @@ _start: /* Requires: ip0 */ if (ip2 >= nextStep) { PREFETCH_L1(ip1 + 64); PREFETCH_L1(ip1 + 128); - step0++; - step1++; + step += 2; nextStep += kStepIncr; } /* advance to next positions */ ip0 = ip1; ip1 = ip2; - ip2 = ip2 + step1; - ip3 = ip2 + step0; + ip2 = ip0 + step; + ip3 = ip1 + step; } while (ip3 < ilimit); _cleanup: