if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);
- /* Decode literals sub-block */
+ /* Decode literals section */
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize;
srcSize -= litCSize;
}
- if (dctx->fParams.windowSize > (1<<23)) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
+ if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
+ /* likely because of register pressure */
+ /* if that's the correct cause, then 32-bits ARM should be affected differently */
+ /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
+ if (dctx->fParams.windowSize > (1<<23))
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
}