From db4e1b5479c1df569732aa8f486633ed4098bd8c Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 2 Sep 2021 12:25:08 -0400 Subject: [PATCH] Hash Long One Position Ahead (+2.5% Speed) Aside from maybe a latency win in the loop, this means that when we find a short match, we've already done the hash we need to check the next long match. --- lib/compress/zstd_double_fast.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 0cf2d2159..4e38e4662 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -118,10 +118,11 @@ _start: goto _cleanup; } + hl0 = ZSTD_hashPtr(ip, hBitsL, 8); + /* Main Search Loop */ do { curr = (U32)(ip-base); - hl0 = ZSTD_hashPtr(ip, hBitsL, 8); hs0 = ZSTD_hashPtr(ip, hBitsS, mls); idxl0 = hashLong[hl0]; idxs0 = hashSmall[hs0]; @@ -148,6 +149,8 @@ _start: } } + hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); + if (idxs0 > prefixLowestIndex) { /* check prefix short match */ if (MEM_read32(matchs0) == MEM_read32(ip)) { @@ -155,18 +158,20 @@ _start: } } - if (ip >= nextStep) { - PREFETCH_L1(ip + 64); - PREFETCH_L1(ip + 128); + if (ip1 >= nextStep) { + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); step++; nextStep += kStepIncr; } - ip += step; + ip = ip1; + ip1 += step; + hl0 = hl1; #if defined(__aarch64__) PREFETCH_L1(ip+256); #endif - } while (ip < ilimit); + } while (ip1 < ilimit); _cleanup: /* save reps for next block */ @@ -177,8 +182,7 @@ _cleanup: return (size_t)(iend - anchor); _search_next_long: - { hl1 = ZSTD_hashPtr(ip+1, hBitsL, 8); - idxl1 = hashLong[hl1]; + { idxl1 = hashLong[hl1]; matchl1 = base + idxl1; hashLong[hl1] = curr + 1; -- 2.47.2