From: Yann Collet Date: Wed, 12 Sep 2018 17:29:47 +0000 (-0700) Subject: added conditional prefetch X-Git-Tag: v0.0.29~13^2~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4de344d50582caebb652199fcab83f3f823a411e;p=thirdparty%2Fzstd.git added conditional prefetch depending on amount of work to do. --- diff --git a/lib/common/compiler.h b/lib/common/compiler.h index a0687ddd6..e68b81bf6 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -95,19 +95,21 @@ #else # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) +# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T1) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 0 /* locality */) +# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 2 /* locality */) # else # define PREFETCH(ptr) /* disabled */ # endif #endif /* NO_PREFETCH */ -#define PREFETCH_AREA(ptr, size) { \ - size_t pos; \ - for (pos=0; poslitEntropy==0) return ERROR(dictionary_corrupted); - - /* prefetch huffman table if cold */ - if (dctx->ddictIsCold) { - PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); - } - /* fall-through */ + case set_compressed: if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ { size_t lhSize, litSize, litCSize; @@ -616,6 +611,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 256 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + if (HUF_isError((litEncType==set_repeat) ? ( singleStream ? HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) : @@ -897,7 +897,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb const void* src, size_t srcSize, const U32* baseValue, const U32* nbAdditionalBits, const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, - int ddictIsCold) + int ddictIsCold, int nbSeq) { switch(type) { @@ -917,7 +917,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb case set_repeat: if (!flagRepeatTable) return ERROR(corruption_detected); /* prefetch FSE table if used */ - if (ddictIsCold) { + if (ddictIsCold && (nbSeq > 16 /* heuristic */)) { + //if (ddictIsCold) { const void* const pStart = *DTablePtr; size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); PREFETCH_AREA(pStart, pSize); @@ -974,25 +975,27 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const BYTE* const istart = (const BYTE* const)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; + int nbSeq; DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); /* check */ if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); /* SeqHead */ - { int nbSeq = *ip++; - if (!nbSeq) { *nbSeqPtr=0; return 1; } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) { - if (ip+2 > iend) return ERROR(srcSize_wrong); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - } else { - if (ip >= iend) return ERROR(srcSize_wrong); - nbSeq = ((nbSeq-0x80)<<8) + *ip++; - } + nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + if (ip+2 > iend) return ERROR(srcSize_wrong); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + } else { + if (ip >= iend) return ERROR(srcSize_wrong); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; } - *nbSeqPtr = nbSeq; } + *nbSeqPtr = nbSeq; + DEBUGLOG(2, "nbSeqs=%i", nbSeq); + /* FSE table descriptors */ if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ @@ -1007,7 +1010,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, LL_base, LL_bits, LL_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold); + dctx->ddictIsCold, nbSeq); if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); ip += llhSize; } @@ -1017,7 +1020,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold); + dctx->ddictIsCold, nbSeq); if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); ip += ofhSize; } @@ -1027,7 +1030,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, ML_base, ML_bits, ML_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold); + dctx->ddictIsCold, nbSeq); if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); ip += mlhSize; } @@ -2395,7 +2398,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) /* prefetch dictionary content */ if (dctx->ddictIsCold) { size_t const dictSize = ddict->dictSize; - size_t const pSize = MIN(dictSize, 32 KB); /* proposed heuristic : 8 x frameContentSize => need to know frameContentSize */ + size_t const pSize = MIN(dictSize, 2 KB); /* very conservative; would need to know Nb of Copies in dictionary, or frameContentSize as a proxy */ const void* const pStart = (const char*)ddict->dictContent + dictSize - pSize; PREFETCH_AREA(pStart, pSize); }