From: Yann Collet <yann.collet.73@gmail.com>
Date: Mon, 7 Oct 2024 18:22:40 +0000 (-0700)
Subject: minor refactor zstd_fast
X-Git-Tag: v1.5.7^2~76^2~8
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1e7fa242f4aa71c3aec5e1e39ec69ad54e117051;p=thirdparty%2Fzstd.git

minor refactor zstd_fast

make hot variables more local
---

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index cbcded28d..53c3bdee8 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -560,7 +560,9 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
 /* ZSTD_selectAddr:
  * @return a >= b ? trueAddr : falseAddr,
  * tries to force branchless codegen. */
-MEM_STATIC const BYTE* ZSTD_selectAddr(U32 a, U32 b, const BYTE* trueAddr, const BYTE* falseAddr) {
+MEM_STATIC const BYTE*
+ZSTD_selectAddr(U32 a, U32 b, const BYTE* trueAddr, const BYTE* falseAddr)
+{
 #if defined(__GNUC__) && defined(__x86_64__)
     __asm__ (
         "cmp %1, %2\n"
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index 838a18ee5..0b6230150 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -166,7 +166,6 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
      * we load from here instead of from tables, if the index is invalid.
      * Used to avoid unpredictable branches. */
     const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4};
-    const BYTE *mvalAddr;
 
     const BYTE* anchor = istart;
     const BYTE* ip0 = istart;
@@ -182,7 +181,6 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
     size_t hash0; /* hash for ip0 */
     size_t hash1; /* hash for ip1 */
     U32 idx; /* match idx for ip0 */
-    U32 mval; /* src value at match idx */
 
     U32 offcode;
     const BYTE* match0;
@@ -255,22 +253,21 @@ _start: /* Requires: ip0 */
          * However expression below complies into conditional move. Since
          * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
          * if there is a match, all branches become predictable. */
-        mvalAddr = base + idx;
-        mvalAddr = ZSTD_selectAddr(idx, prefixStartIndex, mvalAddr, &dummy[0]);
+        {   const BYTE* mvalAddr = ZSTD_selectAddr(idx, prefixStartIndex, base + idx, &dummy[0]);
+            /* load match for ip[0] */
+            U32 const mval = MEM_read32(mvalAddr);
 
-        /* load match for ip[0] */
-        mval = MEM_read32(mvalAddr);
-
-        /* check match at ip[0] */
-        if (MEM_read32(ip0) == mval && idx >= prefixStartIndex) {
-            /* found a match! */
+            /* check match at ip[0] */
+            if (MEM_read32(ip0) == mval && idx >= prefixStartIndex) {
+                /* found a match! */
 
-            /* First write next hash table entry; we've already calculated it.
-             * This write is known to be safe because the ip1 == ip0 + 1, so
-             * we know we will resume searching after ip1 */
-            hashTable[hash1] = (U32)(ip1 - base);
+                /* Write next hash table entry (it's already calculated).
+                * This write is known to be safe because the ip1 == ip0 + 1,
+                * so searching will resume after ip1 */
+                hashTable[hash1] = (U32)(ip1 - base);
 
-            goto _offset;
+                goto _offset;
+            }
         }
 
         /* lookup ip[1] */
@@ -289,32 +286,30 @@ _start: /* Requires: ip0 */
         current0 = (U32)(ip0 - base);
         hashTable[hash0] = current0;
 
-        mvalAddr = base + idx;
-        mvalAddr = ZSTD_selectAddr(idx, prefixStartIndex, mvalAddr, &dummy[0]);
-
-        /* load match for ip[0] */
-        mval = MEM_read32(mvalAddr);
+        {   const BYTE* mvalAddr = ZSTD_selectAddr(idx, prefixStartIndex, base + idx, &dummy[0]);
+            /* load match for ip[0] */
+            U32 const mval = MEM_read32(mvalAddr);
 
+            /* check match at ip[0] */
+            if (MEM_read32(ip0) == mval && idx >= prefixStartIndex) {
+                /* found a match! */
 
-        /* check match at ip[0] */
-        if (MEM_read32(ip0) == mval && idx >= prefixStartIndex) {
-            /* found a match! */
+                /* first write next hash table entry; we've already calculated it */
+                if (step <= 4) {
+                    /* We need to avoid writing an index into the hash table >= the
+                    * position at which we will pick up our searching after we've
+                    * taken this match.
+                    *
+                    * The minimum possible match has length 4, so the earliest ip0
+                    * can be after we take this match will be the current ip0 + 4.
+                    * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
+                    * write this position.
+                    */
+                    hashTable[hash1] = (U32)(ip1 - base);
+                }
 
-            /* first write next hash table entry; we've already calculated it */
-            if (step <= 4) {
-                /* We need to avoid writing an index into the hash table >= the
-                 * position at which we will pick up our searching after we've
-                 * taken this match.
-                 *
-                 * The minimum possible match has length 4, so the earliest ip0
-                 * can be after we take this match will be the current ip0 + 4.
-                 * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
-                 * write this position.
-                 */
-                hashTable[hash1] = (U32)(ip1 - base);
+                goto _offset;
             }
-
-            goto _offset;
         }
 
         /* lookup ip[1] */
@@ -554,7 +549,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
             size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
             hashTable[hash0] = curr;   /* update hash table */
 
-            if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex)) 
+            if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
                 && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
                 const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
                 mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;