From: Bimba Shrestha Date: Mon, 16 Sep 2019 20:29:59 +0000 (-0700) Subject: Merge branch 'dev' into extract_sequences_api X-Git-Tag: v1.4.4~1^2~38^2~12^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a8744354788d6beaa33efbceba65960b01fd8a3a;p=thirdparty%2Fzstd.git Merge branch 'dev' into extract_sequences_api --- a8744354788d6beaa33efbceba65960b01fd8a3a diff --cc lib/compress/zstd_compress.c index 833ae8383,2f0736b24..3fe84b6bc --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@@ -2264,80 -2265,28 +2265,97 @@@ static size_t ZSTD_buildSeqStore(ZSTD_C return ZSTDbss_compress; } +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqs = seqStore->sequencesStart; + size_t seqsSize = seqStore->sequences - seqs; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; size_t position; int repIdx; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= ZSTD_REP_NUM) { + outSeqs[i].rep = outSeqs[i].offset; + repIdx = i - outSeqs[i].offset; + + if (outSeqs[i].litLength == 0) { + if (outSeqs[i].offset < 3) { + --repIdx; + } else { + repIdx = i - 1; + } + ++outSeqs[i].rep; + } + assert(repIdx >= -3); + outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; + if (outSeqs[i].offset == 4) { + --outSeqs[i].offset; + } + } else { + outSeqs[i].offset -= ZSTD_REP_NUM; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = position; + position += outSeqs[i].matchLength; + } + zc->seqCollector.seqIndex += seqsSize; +} + +size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*)); + void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); + + SeqCollector seqCollector; + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_free(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + + /* Returns true if the given block is a RLE block */ + static int ZSTD_isRLE(const BYTE *ip, size_t length) { + size_t i; + if (length < 2) return 1; + for (i = 1; i < length; ++i) { + if (ip[0] != ip[i]) return 0; + } + return 1; + } + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t srcSize) + const void* src, size_t srcSize, U32 frame) { + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); @@@ -2438,8 -2403,9 +2476,8 @@@ static size_t ZSTD_compress_frameChunk { size_t cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, - ip, blockSize); + ip, blockSize, 1 /* frame */); FORWARD_IF_ERROR(cSize); - if (cSize == 0) { /* block is not compressible */ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cSize); diff --cc lib/compress/zstd_compress_internal.h index e3ed93eb4,ae106e02b..5aca65bfe --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@@ -245,7 -240,7 +247,8 @@@ struct ZSTD_CCtx_s XXH64_state_t xxhState; ZSTD_customMem customMem; size_t staticSize; + SeqCollector seqCollector; + int isFirstBlock; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ diff --cc tests/fuzzer.c index fdf6960bc,0ae0b3943..c0c6b7a14 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@@ -1960,11 -1960,19 +1960,23 @@@ static int basicUnitTests(U32 const see DISPLAYLEVEL(3, "OK \n"); } + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); + memset(CNBuffer, 0, 1000000); + assert(ZSTD_getSequences(ZSTD_createCCtx(), compressedBuffer, 1000000, + CNBuffer, 1000000) == 1000000 / 131071 + 1); + + /* Multiple blocks of zeros test */ + #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */ + DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH); + memset(CNBuffer, 0, LONGZEROSLENGTH); + CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(LONGZEROSLENGTH), CNBuffer, LONGZEROSLENGTH, 1) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/LONGZEROSLENGTH*100); + + DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, LONGZEROSLENGTH); + { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, LONGZEROSLENGTH, compressedBuffer, cSize) ); + if (r != LONGZEROSLENGTH) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + - /* All zeroes test (test bug #137) */ #define ZEROESLENGTH 100 DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);