From: inikep Date: Mon, 4 Apr 2016 14:28:40 +0000 (+0200) Subject: Merge remote-tracking branch 'refs/remotes/origin/dev' into repcodes X-Git-Tag: v0.6.0^2~17^2~16^2~8^2~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=78e52044de6c7433754a9c018ba91d2c52250de5;p=thirdparty%2Fzstd.git Merge remote-tracking branch 'refs/remotes/origin/dev' into repcodes # Conflicts: # lib/zstd_compress.c # lib/zstd_decompress.c # lib/zstd_internal.h # lib/zstd_opt.h # programs/bench.c --- 78e52044de6c7433754a9c018ba91d2c52250de5 diff --cc lib/zstd_compress.c index e9b9d4a6e,07c72f971..8edea5e23 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@@ -1123,13 -1171,14 +1172,14 @@@ static void ZSTD_compressBlock_fast_ext const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; mlCode = ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repMatchEnd, lowPrefixPtr); ip++; - offset = 0; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode); } else { if ( (matchIndex < lowLimit) || - (MEM_read32(match) != MEM_read32(ip)) ) - { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } - { - const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; mlCode = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iend, matchEnd, lowPrefixPtr); while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ @@@ -1585,8 -1632,9 +1633,8 @@@ void ZSTD_compressBlock_lazy_generic(ZS const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base + ctx->dictLimit; - const U32 maxSearches = 1 << ctx->params.searchLog; - const U32 mls = ctx->params.searchLength; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + const U32 maxSearches = 1 << ctx->params.cParams.searchLog; + const U32 mls = ctx->params.cParams.searchLength; typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, @@@ -1605,31 -1649,20 +1653,30 @@@ while (ip < ilimit) { size_t matchLength=0; size_t offset=0; - const BYTE* start=ip+1; + const BYTE* start=ip; /* check repCode */ - if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1)) { + for (int i=0; i gain1) + matchLength = mlRep, offset = i; + } } - { - /* first search (depth 0) */ - size_t offsetFound = 99999999; - size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + matchLength = ml2, start = ip, offset=offsetFound + ZSTD_REP_MOVE; } if (matchLength < MINMATCH) { @@@ -1641,42 -1674,38 +1688,40 @@@ if (depth>=1) while (ip= MINMATCH) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offset = i, start = ip; } - { - size_t offset2=999999; - size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offset = offset2 + ZSTD_REP_MOVE, start = ip; continue; /* search a better one */ } } /* let's find an even better one */ if ((depth==2) && (ip= MINMATCH) && (gain2 > gain1)) - matchLength = ml2, offset = 0, start = ip; + matchLength = ml2, offset = i, start = ip; } - { - size_t offset2=999999; - size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offset = offset2 + ZSTD_REP_MOVE, start = ip; continue; } } } break; /* nothing found : store previous solution */ @@@ -1690,47 -1720,30 +1735,48 @@@ /* store sequence */ _storeSequence: - { size_t const litLength = start - anchor; + { +#if ZSTD_REP_NUM == 4 + if (offset >= ZSTD_REP_NUM) { +#if 1 + rep[3] = rep[2]; + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE; +#else + if (kSlotNew < 3) rep[3] = rep[2]; + if (kSlotNew < 2) rep[2] = rep[1]; + if (kSlotNew < 1) rep[1] = rep[0]; + rep[kSlotNew] = offset - ZSTD_REP_MOVE; +#endif + } else { + if (offset != 0) { + size_t temp = rep[offset]; + if (offset > 2) rep[3] = rep[2]; + if (offset > 1) rep[2] = rep[1]; + if (offset > 0) rep[1] = rep[0]; + rep[0] = temp; + } + + if (offset<=1 && start==anchor) offset = 1-offset; + } +#else + if (offset >= ZSTD_REP_NUM) { + rep[1] = rep[0]; rep[0] = offset - ZSTD_REP_MOVE; + } +#endif - size_t litLength = start - anchor; ++ size_t const litLength = start - anchor; ++ ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } - - /* check immediate repcode */ - while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { - /* store sequence */ - matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - offset = offset_2; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength); - ip += matchLength+MINMATCH; - anchor = ip; - continue; /* faster when present ... (?) */ - } } + } /* Last Literals */ - { - size_t lastLLSize = iend - anchor; + { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; + ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0); } } @@@ -1779,8 -1792,9 +1825,8 @@@ void ZSTD_compressBlock_lazy_extDict_ge const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictStart = dictBase + ctx->lowLimit; - const U32 maxSearches = 1 << ctx->params.searchLog; - const U32 mls = ctx->params.searchLength; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + const U32 maxSearches = 1 << ctx->params.cParams.searchLog; + const U32 mls = ctx->params.cParams.searchLength; typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr, @@@ -1816,12 -1825,11 +1862,11 @@@ if (depth==0) goto _storeSequence; } } - { - /* first search (depth 0) */ - size_t offsetFound = 99999999; - size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + matchLength = ml2, start = ip, offset=offsetFound + ZSTD_REP_MOVE; } if (matchLength < MINMATCH) { @@@ -1851,13 -1859,12 +1896,12 @@@ } } /* search match, depth 1 */ - { - size_t offset2=999999; - size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offset = offset2 + ZSTD_REP_MOVE, start = ip; continue; /* search a better one */ } } @@@ -1882,13 -1889,12 +1926,12 @@@ } } /* search match, depth 2 */ - { - size_t offset2=999999; - size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offset = offset2 + ZSTD_REP_MOVE, start = ip; continue; } } } break; /* nothing found : store previous solution */ diff --cc lib/zstd_decompress.c index 651087c4d,436058b52..5fd22ffcf --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@@ -645,129 -621,59 +621,112 @@@ typedef struct FSE_DState_t stateLL; FSE_DState_t stateOffb; FSE_DState_t stateML; - size_t prevOffset; + size_t prevOffset[ZSTD_REP_INIT]; - const BYTE* dumps; - const BYTE* dumpsEnd; } seqState_t; + ++ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { - const BYTE* dumps = seqState->dumps; - const BYTE* const de = seqState->dumpsEnd; - size_t litLength, offset; - /* Literal length */ - litLength = FSE_peakSymbol(&(seqState->stateLL)); - if (litLength == MaxLL) { - const U32 add = *dumps++; - if (add < 255) litLength += add; - else { - litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */ - if (litLength&1) litLength>>=1, dumps += 3; - else litLength = (U16)(litLength)>>1, dumps += 2; - } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ - } - - /* Offset */ - { - static const U32 offsetPrefix[MaxOff+1] = { - 1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 0x100, - 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, - 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - const U32 offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ - const U32 nbBits = offsetCode ? offsetCode-1 : 0; - offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); + U32 const llCode = FSE_peekSymbol(&(seqState->stateLL)); + U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); + U32 const ofCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ + + U32 const llBits = LL_bits[llCode]; + U32 const mlBits = ML_bits[mlCode]; + U32 const ofBits = ofCode; + U32 const totalBits = llBits+mlBits+ofBits; + + static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + + static const U32 ML_base[MaxML+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; + + static const U32 OF_base[MaxOff+1] = { + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, + 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, + 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1, 1, 1, 1 }; + + /* sequence */ + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : /* <= 26 bits */ - llCode ? seq->offset : seqState->prevOffset; ++ llCode ? seq->offset : seqState->prevOffset[0]; if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ ++ if (ofCode | !llCode) seqState->prevOffset[0] = seq->offset; /* cmove */ + seq->offset = offset; ++ +#if ZSTD_REP_NUM == 4 - if (offsetCode==0) offset = 0; ++ if (ofCode==0) offset = 0; + + if (offset < ZSTD_REP_NUM) { + if (litLength == 0 && offset <= 1) offset = 1-offset; + + if (offset != 0) { + size_t temp = seqState->prevOffset[offset]; + if (offset != 1) { + if (offset == 3) seqState->prevOffset[3] = seqState->prevOffset[2]; + seqState->prevOffset[2] = seqState->prevOffset[1]; + } + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + + } else { + offset = seqState->prevOffset[0]; + } + + } else { + offset -= ZSTD_REP_MOVE; +#if 1 // faster without kSlotNew + seqState->prevOffset[3] = seqState->prevOffset[2]; + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; +#else + if (kSlotNew < 3) seqState->prevOffset[3] = seqState->prevOffset[2]; + if (kSlotNew < 2) seqState->prevOffset[2] = seqState->prevOffset[1]; + if (kSlotNew < 1) seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[kSlotNew] = offset; +#endif + } +#else // ZSTD_REP_NUM == 1 - #if 0 - if (offsetCode==0) offset = litLength ? seq->offset : seqState->prevOffset[0]; /* repcode, cmove */ ++ #if 1 ++/* if (ofCode==0) offset = litLength ? seq->offset : seqState->prevOffset[0]; + else offset -= ZSTD_REP_MOVE; - if (offsetCode | !litLength) seqState->prevOffset[0] = seq->offset; /* cmove */ ++ if (ofCode | !litLength) seqState->prevOffset[0] = seq->offset; */ + #else - if (offsetCode==0) { ++ if (ofCode==0) { + if (!litLength) { + offset = seqState->prevOffset[0]; /* repcode, cmove */ + seqState->prevOffset[0] = seq->offset; /* cmove */ + } else + offset = seq->offset; /* repcode, cmove */ + } else { + seqState->prevOffset[0] = seq->offset; /* cmove */ + offset -= ZSTD_REP_MOVE; + } + #endif +#endif - FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ - // printf("offsetCode=%d nbBits=%d offset=%d\n", offsetCode, nbBits, (int)offset); fflush(stdout); } - /* Literal length update */ - FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */ - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - - /* MatchLength */ - { - size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); - if (matchLength == MaxML) { - const U32 add = *dumps++; - if (add < 255) matchLength += add; - else { - matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ - if (matchLength&1) matchLength>>=1, dumps += 3; - else matchLength = (U16)(matchLength)>>1, dumps += 2; - } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ - } - matchLength += mls; - seq->matchLength = matchLength; - } + seq->matchLength = ML_base[mlCode] + mls + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream)); - /* save result */ - seq->litLength = litLength; - seq->offset = offset; - seqState->dumps = dumps; + seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); /* <= 16 bits */ + if (MEM_32bits() || + (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream)); - #if 0 /* debug */ - { - static U64 totalDecoded = 0; - printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", - (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset); - totalDecoded += litLength + matchLength; - } - #endif + /* ANS state update */ + FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); /* <= 9 bits */ + FSE_updateState(&(seqState->stateML), &(seqState->DStream)); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); /* <= 18 bits */ + FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <= 8 bits */ } @@@ -883,12 -785,9 +838,10 @@@ static size_t ZSTD_decompressSequences memset(&sequence, 0, sizeof(sequence)); sequence.offset = REPCODE_STARTVALUE; - seqState.dumps = dumps; - seqState.dumpsEnd = dumps + dumpsLength; - seqState.prevOffset = REPCODE_STARTVALUE; + for (int i=0; ibase; const BYTE* const prefixStart = base + ctx->dictLimit; - - const U32 maxSearches = 1U << ctx->params.searchLog; - const U32 sufficient_len = ctx->params.targetLength; - const U32 mls = ctx->params.searchLength; - const U32 minMatch = (ctx->params.searchLength == 3) ? 3 : 4; + - U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; + const U32 maxSearches = 1U << ctx->params.cParams.searchLog; + const U32 sufficient_len = ctx->params.cParams.targetLength; + const U32 mls = ctx->params.cParams.searchLength; + const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; ZSTD_optimal_t* opt = seqStorePtr->priceTable; ZSTD_match_t* matches = seqStorePtr->matchTable; @@@ -713,7 -745,371 +738,378 @@@ void ZSTD_compressBlock_opt_extDict_gen const void* src, size_t srcSize, const U32 depth) { - (void)ctx; (void)src; (void)srcSize; (void)depth; - (void)ZSTD_BtGetAllMatches_selectMLS_extDict; - printf("ZSTD_compressBlock_opt_extDict_generic\n"), exit(0); ++ printf("NOT IMPLEMENTED: ZSTD_compressBlock_opt_extDict_generic\n"), exit(0); ++ (void)ctx; (void)src; (void)srcSize; (void)depth; (void)ZSTD_BtGetAllMatches_selectMLS_extDict; ++#if 0 + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* litstart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = ctx->lowLimit; + - U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; + const U32 maxSearches = 1U << ctx->params.cParams.searchLog; + const U32 sufficient_len = ctx->params.cParams.targetLength; + const U32 mls = ctx->params.cParams.searchLength; + const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; + const BYTE* inr; + U32 cur, match_num, last_pos, litlen, price; + + /* init */ ++ U32 rep[ZSTD_REP_INIT]; ++ for (int i=0; inextToUpdate3 = ctx->nextToUpdate; + ZSTD_resetSeqStore(seqStorePtr); + ZSTD_rescaleFreqs(seqStorePtr); + if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + + ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_EXTDICT srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); + + /* Match Loop */ + while (ip < ilimit) { + U32 u, offset, best_off=0; + U32 mlen=0, best_mlen=0; + U32 current = (U32)(ip-base); + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + inr = ip; + litstart = ((U32)(ip - anchor) > 128) ? ip - 128 : anchor; + opt[0].litlen = (U32)(ip - litstart); + + /* check repCode */ + { + const U32 repIndex = (U32)(current+1 - rep_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + && (MEM_readMINMATCH(ip+1, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(ip+1+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + + ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen); + if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1; + goto _storeSequence; + } + + litlen = opt[0].litlen + 1; + do { + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); + if (mlen + 1 > last_pos || price < opt[mlen + 1].price) + SET_PRICE(mlen + 1, mlen, 0, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } + + best_mlen = (last_pos) ? last_pos : minMatch; + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + + ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); + if (!last_pos && !match_num) { ip++; continue; } + + opt[0].rep = rep_1; + opt[0].rep2 = rep_2; + opt[0].mlen = 1; + + if (match_num && matches[match_num-1].len > sufficient_len) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + // set prices using matches at position = 0 + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = (matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM; + ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); + litlen = opt[0].litlen; + while (mlen <= best_mlen) { + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); + mlen++; + } } + + if (last_pos < minMatch) { + // ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + ip++; continue; + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + size_t cur_rep; + inr = ip + cur; + + if (opt[cur-1].mlen == 1) { + litlen = opt[cur-1].litlen + 1; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, litstart); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen))) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) // last match must start at a minimum distance of 8 from oend + continue; + + mlen = opt[cur].mlen; + + if (opt[cur].off) { + opt[cur].rep2 = opt[cur-mlen].rep; + opt[cur].rep = opt[cur].off; + ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + } else { + if (cur!=mlen && opt[cur].litlen == 0) { + opt[cur].rep2 = opt[cur-mlen].rep; + opt[cur].rep = opt[cur-mlen].rep2; + ZSTD_LOG_ENCODE("%d: COPYREP_SWI cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + } else { + opt[cur].rep2 = opt[cur-mlen].rep2; + opt[cur].rep = opt[cur-mlen].rep; + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2); + } } + + ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); + + best_mlen = 0; + + if (opt[cur].mlen != 1) { + cur_rep = opt[cur].rep2; + ZSTD_LOG_PARSER("%d: tryExt REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); + } else { + cur_rep = opt[cur].rep; + ZSTD_LOG_PARSER("%d: tryExt REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen); + } + + const U32 repIndex = (U32)(current+cur - cur_rep); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off); + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; + best_off = 0; + ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); + last_pos = cur + 1; + goto _storeSequence; + } + + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - minMatch); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, 0, mlen - minMatch); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - minMatch); + } + + best_mlen = mlen; + + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, 0, price, litlen); + + do { + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH + SET_PRICE(cur + mlen, mlen, 0, litlen, price); + mlen--; + } while (mlen >= minMatch); + } + + best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches); + ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); + + if (match_num > 0 && matches[match_num-1].len > sufficient_len) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + // set prices using matches at position = cur + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; + + // ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); + + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - minMatch); + else + price = ZSTD_getPrice(seqStorePtr, litlen, litstart, matches[u].off, mlen - minMatch); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - minMatch); + } + + // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); + + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } // for (cur = 1; cur <= last_pos; cur++) + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ + _storeSequence: // cur, last_pos, best_mlen, best_off have to be set + for (u = 1; u <= last_pos; u++) + ZSTD_LOG_PARSER("%d: price[%u/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2); + ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep); + + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos; ) { + ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2); + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + U32 litLength; + ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2); + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + + litLength = (U32)(ip - anchor); + ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2); + + if (offset) { + rep_2 = rep_1; + rep_1 = offset; + } else { + if (litLength == 0) { + best_off = rep_2; + rep_2 = rep_1; + rep_1 = best_off; + } } + + ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2); + + #if ZSTD_OPT_DEBUG >= 5 + U32 ml2; + if (offset) { + if (offset > (size_t)(ip - prefixStart)) { + const BYTE* match = dictEnd - (offset - (ip - prefixStart)); + ml2 = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart); + ZSTD_LOG_PARSER("%d: ZSTD_count_2segments=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)ml2, (int)offset, dictBase, dictEnd, prefixStart, ip, match); + } + else ml2 = (U32)ZSTD_count(ip, ip-offset, iend); + } + else ml2 = (U32)ZSTD_count(ip, ip-rep_1, iend); + if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) { + printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } + if (ip < anchor) { + printf("%d: ERROR_Ext ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + if (ip + mlen > iend) { + printf("%d: ERROR_Ext ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + #endif + + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-minMatch); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-minMatch); + anchor = ip = ip + mlen; + } + + #if 0 + /* check immediate repcode */ + while ((anchor >= base + lowLimit + rep_2) && (anchor <= ilimit)) { + if ((anchor - rep_2) >= prefixStart) { + if (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(anchor - rep_2, minMatch)) + mlen = (U32)ZSTD_count(anchor+minMatch, anchor - rep_2 + minMatch, iend) + minMatch; + else + break; + } else { + const BYTE* repMatch = dictBase + ((anchor-base) - rep_2); + if ((repMatch + minMatch <= dictEnd) && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch))) + mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, dictEnd, prefixStart) + minMatch; + else + break; + } + + offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */ + ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); + ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-minMatch); + anchor += mlen; + } + #else + /* check immediate repcode */ + /* minimal correctness condition = while ((anchor >= prefixStart + REPCODE_STARTVALUE) && (anchor <= ilimit)) { */ + while ((anchor >= base + lowLimit + rep_2) && (anchor <= ilimit)) { + const U32 repIndex = (U32)((anchor-base) - rep_2); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected, let's take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */ + ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); + ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-minMatch); + anchor += mlen; + continue; /* faster when present ... (?) */ + } + break; + } + #endif + if (anchor > ip) ip = anchor; + } + + { /* Last Literals */ + size_t lastLLSize = iend - anchor; + ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)(lastLLSize)); + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } ++#endif } diff --cc programs/bench.c index a7b4a87a8,cea9634c2..afe685f19 --- a/programs/bench.c +++ b/programs/bench.c @@@ -204,10 -234,8 +234,10 @@@ typedef struc #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) +int kSlotNew = 0; + static int BMK_benchMem(const void* srcBuffer, size_t srcSize, - const char* displayName, int cLevel, int additionalParam, + const char* displayName, int cLevel, const size_t* fileSizes, U32 nbFiles, const void* dictBuffer, size_t dictBufferSize, benchResult_t *result) { @@@ -421,12 -448,9 +450,14 @@@ static void BMK_benchCLevel(void* srcBu { benchResult_t result, total; int l; +#ifdef _WIN32 + SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); +#else + setpriority(PRIO_PROCESS, 0, -20); +#endif + SET_HIGH_PRIORITY; + const char* pch = strrchr(displayName, '\\'); /* Windows */ if (!pch) pch = strrchr(displayName, '/'); /* Linux */ if (pch) displayName = pch+1; @@@ -434,8 -458,8 +465,9 @@@ memset(&result, 0, sizeof(result)); memset(&total, 0, sizeof(total)); - // if (g_displayLevel == 1 && !additionalParam) - // DISPLAY("bench %s: input %u bytes, %i iterations, %u KB blocks\n", ZSTD_VERSION, (U32)benchedSize, g_nbIterations, (U32)(g_blockSize>>10)); ++ kSlotNew = g_additionalParam; + if (g_displayLevel == 1 && !g_additionalParam) + DISPLAY("bench %s: input %u bytes, %i iterations, %u KB blocks\n", ZSTD_VERSION, (U32)benchedSize, g_nbIterations, (U32)(g_blockSize>>10)); if (cLevelLast < cLevel) cLevelLast = cLevel;