#else
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
+# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T1)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
-# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 0 /* locality */)
+# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 2 /* locality */)
# else
# define PREFETCH(ptr) /* disabled */
# endif
#endif /* NO_PREFETCH */
-#define PREFETCH_AREA(ptr, size) { \
- size_t pos; \
- for (pos=0; pos<size; pos++) { \
- PREFETCH( (const char*)(const void*)ptr + pos); \
- } \
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(ptr, size) { \
+ size_t pos; \
+ for (pos=0; pos<size; pos+=CACHELINE_SIZE) { \
+ PREFETCH( (const char*)ptr + pos); \
+ } \
}
/* disable warnings */
{
case set_repeat:
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
-
- /* prefetch huffman table if cold */
- if (dctx->ddictIsCold) {
- PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
- }
-
/* fall-through */
+
case set_compressed:
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
{ size_t lhSize, litSize, litCSize;
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+ /* prefetch huffman table if cold */
+ if (dctx->ddictIsCold && (litSize > 256 /* heuristic */)) {
+ PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
+ }
+
if (HUF_isError((litEncType==set_repeat) ?
( singleStream ?
HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
const void* src, size_t srcSize,
const U32* baseValue, const U32* nbAdditionalBits,
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
- int ddictIsCold)
+ int ddictIsCold, int nbSeq)
{
switch(type)
{
case set_repeat:
if (!flagRepeatTable) return ERROR(corruption_detected);
/* prefetch FSE table if used */
- if (ddictIsCold) {
+ if (ddictIsCold && (nbSeq > 16 /* heuristic */)) {
+ //if (ddictIsCold) {
const void* const pStart = *DTablePtr;
size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
PREFETCH_AREA(pStart, pSize);
const BYTE* const istart = (const BYTE* const)src;
const BYTE* const iend = istart + srcSize;
const BYTE* ip = istart;
+ int nbSeq;
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
/* check */
if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
/* SeqHead */
- { int nbSeq = *ip++;
- if (!nbSeq) { *nbSeqPtr=0; return 1; }
- if (nbSeq > 0x7F) {
- if (nbSeq == 0xFF) {
- if (ip+2 > iend) return ERROR(srcSize_wrong);
- nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
- } else {
- if (ip >= iend) return ERROR(srcSize_wrong);
- nbSeq = ((nbSeq-0x80)<<8) + *ip++;
- }
+ nbSeq = *ip++;
+ if (!nbSeq) { *nbSeqPtr=0; return 1; }
+ if (nbSeq > 0x7F) {
+ if (nbSeq == 0xFF) {
+ if (ip+2 > iend) return ERROR(srcSize_wrong);
+ nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
+ } else {
+ if (ip >= iend) return ERROR(srcSize_wrong);
+ nbSeq = ((nbSeq-0x80)<<8) + *ip++;
}
- *nbSeqPtr = nbSeq;
}
+ *nbSeqPtr = nbSeq;
+ DEBUGLOG(2, "nbSeqs=%i", nbSeq);
+
/* FSE table descriptors */
if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
ip, iend-ip,
LL_base, LL_bits,
LL_defaultDTable, dctx->fseEntropy,
- dctx->ddictIsCold);
+ dctx->ddictIsCold, nbSeq);
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
ip += llhSize;
}
ip, iend-ip,
OF_base, OF_bits,
OF_defaultDTable, dctx->fseEntropy,
- dctx->ddictIsCold);
+ dctx->ddictIsCold, nbSeq);
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
ip += ofhSize;
}
ip, iend-ip,
ML_base, ML_bits,
ML_defaultDTable, dctx->fseEntropy,
- dctx->ddictIsCold);
+ dctx->ddictIsCold, nbSeq);
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
ip += mlhSize;
}
/* prefetch dictionary content */
if (dctx->ddictIsCold) {
size_t const dictSize = ddict->dictSize;
- size_t const pSize = MIN(dictSize, 32 KB); /* proposed heuristic : 8 x frameContentSize => need to know frameContentSize */
+ size_t const pSize = MIN(dictSize, 2 KB); /* very conservative; would need to know Nb of Copies in dictionary, or frameContentSize as a proxy */
const void* const pStart = (const char*)ddict->dictContent + dictSize - pSize;
PREFETCH_AREA(pStart, pSize);
}