From: Yann Collet Date: Mon, 5 Mar 2018 19:59:54 +0000 (-0800) Subject: Merge branch 'dev' into longOffsetMode X-Git-Tag: v1.3.4~1^2~39^2~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b91ddf0ae69a8857829047616343b5718ebf002c;p=thirdparty%2Fzstd.git Merge branch 'dev' into longOffsetMode --- b91ddf0ae69a8857829047616343b5718ebf002c diff --cc lib/decompress/zstd_decompress.c index 17139d8ce,50e34e358..5fa4e3056 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@@ -754,16 -894,41 +893,45 @@@ static size_t ZSTD_buildSeqTable(ZSTD_s size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); if (FSE_isError(headerSize)) return ERROR(corruption_detected); if (tableLog > maxLog) return ERROR(corruption_detected); - FSE_buildDTable(DTableSpace, norm, max, tableLog); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); *DTablePtr = DTableSpace; return headerSize; - } } + } + default : /* impossible */ + assert(0); + return ERROR(GENERIC); + } } + static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + + static const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + + static const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + + static const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize) { @@@ -1320,101 -1226,52 +1229,71 @@@ size_t ZSTD_execSequenceLong(BYTE* op return sequenceLength; } - static size_t ZSTD_decompressSequencesLong( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) - { - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->base); - const BYTE* const dictStart = (const BYTE*) (dctx->vBase); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - - /* Regen sequences */ - if (nbSeq) { - #define STORED_SEQS 4 - #define STOSEQ_MASK (STORED_SEQS-1) - #define ADVANCED_SEQS 4 - seq_t sequences[STORED_SEQS]; - int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); - seqState_t seqState; - int seqNb; - dctx->fseEntropy = 1; - { U32 i; for (i=0; ientropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); - FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } + #define FUNCTION(fn) fn##_default + #define TARGET + #include "zstd_decompress_impl.h" + #undef TARGET + #undef FUNCTION - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } + #if DYNAMIC_BMI2 - return op-ostart; + #define FUNCTION(fn) fn##_bmi2 + #define TARGET TARGET_ATTRIBUTE("bmi2") + #include "zstd_decompress_impl.h" + #undef TARGET + #undef FUNCTION + + #endif + + typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, + size_t seqSize, const ZSTD_longOffset_e isLongOffset); + + static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, + const ZSTD_longOffset_e isLongOffset) + { + #if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, isLongOffset); + } + #endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, isLongOffset); } + static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, + const ZSTD_longOffset_e isLongOffset) + { + #if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, isLongOffset); + } + #endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, isLongOffset); + } +static unsigned +ZSTD_getLongOffsetsShare(const FSE_DTable* offTable) +{ + const void* ptr = offTable; + U32 const tableLog = ((const FSE_DTableHeader*)ptr)[0].tableLog; + const FSE_decode_t* table = ((const FSE_decode_t*)ptr) + 1; + U32 const max = 1 << tableLog; + U32 u, total = 0; + + assert(tableLog <= OffFSELog); + for (u=0; u 23) total += 1; + + total <<= (OffFSELog - tableLog); + + return total; +} + + static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const int frame) diff --cc programs/bench.c index 939890064,b4cd619c4..10d610eff --- a/programs/bench.c +++ b/programs/bench.c @@@ -461,8 -473,8 +473,8 @@@ static int BMK_benchMem(const void* src } /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */ if (g_displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ - double const cSpeed = (double)srcSize / fastestC; - double const dSpeed = (double)srcSize / fastestD; - double cSpeed = ((double)srcSize / fastestC) * 1000; - double dSpeed = ((double)srcSize / fastestD) * 1000; ++ double const cSpeed = ((double)srcSize / fastestC) * 1000; ++ double const dSpeed = ((double)srcSize / fastestD) * 1000; if (g_additionalParam) DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam); else