]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
disable prefetch-decode for 32-bits target
authorYann Collet <cyan@fb.com>
Fri, 3 Mar 2017 01:09:21 +0000 (17:09 -0800)
committerYann Collet <cyan@fb.com>
Fri, 3 Mar 2017 01:09:21 +0000 (17:09 -0800)
This decoder variant is detrimental to x86 architecture
likely due to register pressure.

Note that the variant is disabled for all 32-bits targets.
It's unclear if it would help for different architectures,
such as ARM, MIPS or PowerPC.

lib/decompress/zstd_decompress.c

index 4dfdf20fb50c67abcb3549e6d18f90b9dae7a78a..482c334ffbcacdc236acd7d90f2535ac2face05d 100644 (file)
@@ -1410,13 +1410,18 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 
     if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);
 
-    /* Decode literals sub-block */
+    /* Decode literals section */
     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
         if (ZSTD_isError(litCSize)) return litCSize;
         ip += litCSize;
         srcSize -= litCSize;
     }
-    if (dctx->fParams.windowSize > (1<<23)) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
+    if (sizeof(size_t) > 4)  /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
+                             /* likely because of register pressure */
+                             /* if that's the correct cause, then 32-bits ARM should be affected differently */
+                             /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
+        if (dctx->fParams.windowSize > (1<<23))
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
     return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
 }