From: Yann Collet Date: Fri, 3 Mar 2017 01:09:21 +0000 (-0800) Subject: disable prefetch-decode for 32-bits target X-Git-Tag: v1.1.4~1^2~22 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fe5d27062e83f6f55fdee3c80f31f51fbf10f7bb;p=thirdparty%2Fzstd.git disable prefetch-decode for 32-bits target This decoder variant is detrimental to x86 architecture likely due to register pressure. Note that the variant is disabled for all 32-bits targets. It's unclear if it would help for different architectures, such as ARM, MIPS or PowerPC. --- diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 4dfdf20fb..482c334ff 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1410,13 +1410,18 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong); - /* Decode literals sub-block */ + /* Decode literals section */ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); if (ZSTD_isError(litCSize)) return litCSize; ip += litCSize; srcSize -= litCSize; } - if (dctx->fParams.windowSize > (1<<23)) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); + if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */ + /* likely because of register pressure */ + /* if that's the correct cause, then 32-bits ARM should be affected differently */ + /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */ + if (dctx->fParams.windowSize > (1<<23)) + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); }