From: Bimba Shrestha Date: Tue, 10 Sep 2019 03:04:46 +0000 (-0700) Subject: Addressing comments X-Git-Tag: v1.4.4~1^2~38^2~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9e7bb55e14b112e514a0e8d5ac7f19fd64f353c1;p=thirdparty%2Fzstd.git Addressing comments --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f8588b348..56da1664e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -13,7 +13,6 @@ ***************************************/ #include /* INT_MAX */ #include /* memset */ -#include #include "cpu.h" #include "mem.h" #include "hist.h" /* HIST_countFast_wksp */ @@ -2265,6 +2264,77 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) return ZSTDbss_compress; } +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqs = seqStore->sequencesStart; + size_t seqsSize = seqStore->sequences - seqs; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; size_t position; int repIdx; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= ZSTD_REP_NUM) { + outSeqs[i].rep = 1; + repIdx = i - outSeqs[i].offset; + + if (repIdx >= 0) { + outSeqs[i].offset = outSeqs[repIdx].offset; + } + + if (repIdx == -1) { + outSeqs[i].offset = 1; + } else if (repIdx == -2) { + outSeqs[i].offset = 4; + } else if (repIdx == -3) { + outSeqs[i].offset = 8; + } + } else { + outSeqs[i].offset -= ZSTD_REP_NUM; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = position; + position += outSeqs[i].matchLength; + } + zc->seqCollector.seqIndex += seqsSize; +} + +/* We call compress2() and collect sequences after each block + * compression. The function stores the ZSTD_Sequences in outSeqs + * and returns the number of collected sequences from all blocks. + */ +size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*)); + void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); + + SeqCollector seqCollector; + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_free(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -2288,6 +2358,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + } + out: if (!ZSTD_isError(cSize) && cSize != 0) { /* confirm repcodes and entropy tables when emitting a compressed block */ @@ -2324,82 +2398,6 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params } } -static void ZSTD_copyBlockSequences(const seqStore_t* seqStore, seqDef* seqs, - ZSTD_Sequence* outSeqs, size_t seqsSize) -{ - size_t i; size_t position; int repIdx; - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; - - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - - if (outSeqs[i].offset <= 3 /* num reps */) { - outSeqs[i].rep = 1; - repIdx = i - outSeqs[i].offset; - - if (repIdx >= 0) { - outSeqs[i].offset = outSeqs[repIdx].offset; - } - - if (repIdx == -1) { - outSeqs[i].offset = 1; - } else if (repIdx == -2) { - outSeqs[i].offset = 4; - } else if (repIdx == -3) { - outSeqs[i].offset = 8; - } - } else { - outSeqs[i].offset -= 3 /* num reps */; - } - - position += outSeqs[i].litLength; - outSeqs[i].matchPos = position; - position += outSeqs[i].matchLength; - } -} - -static void ZSTD_getBlockSequences(ZSTD_CCtx* cctx, const seqStore_t* seqStore) -{ - size_t seqsSize = seqStore->sequences - seqStore->sequencesStart; - - assert(cctx->seqCollector.maxSequences > - (cctx->seqCollector.seqCurrent - cctx->seqCollector.seqStart) + seqsSize); - - ZSTD_copyBlockSequences(seqStore, seqStore->sequencesStart, - cctx->seqCollector.seqCurrent, seqsSize); - cctx->seqCollector.seqCurrent += seqsSize; -} - -size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src, - size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, - int level) -{ - size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*)); - void* dst = malloc(dstCapacity); - size_t seqsSize; - - SeqCollector seqCollector; - seqCollector.collectSequences = 1; - seqCollector.seqStart = outSeqs; - seqCollector.seqCurrent = outSeqs; - seqCollector.maxSequences = outSeqsSize; - zc->seqCollector = seqCollector; - - ZSTD_compressCCtx(zc, dst, dstCapacity, src, srcSize, level); - seqsSize = zc->seqCollector.seqCurrent - zc->seqCollector.seqStart; - - free(dst); - return seqsSize; -} - /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. @@ -2443,10 +2441,6 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); FORWARD_IF_ERROR(cSize); - if (cctx->seqCollector.collectSequences) { - ZSTD_getBlockSequences(cctx, ZSTD_getSeqStore(cctx)); - } - if (cSize == 0) { /* block is not compressible */ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cSize); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index d40d53404..e3ed93eb4 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -195,7 +195,7 @@ typedef struct { typedef struct { int collectSequences; ZSTD_Sequence* seqStart; - ZSTD_Sequence* seqCurrent; + size_t seqIndex; size_t maxSequences; } SeqCollector; diff --git a/lib/zstd.h b/lib/zstd.h index 782940ef5..b2c66e755 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1218,8 +1218,8 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src, - size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, int level); +ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); /*************************************** diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 09fe46959..fdf6960bc 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1962,8 +1962,8 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); memset(CNBuffer, 0, 1000000); - assert(ZSTD_getSequences(ZSTD_createCCtx(), CNBuffer, 1000000, - compressedBuffer, 1000000, 3) == 1000000 / 131071 + 1); + assert(ZSTD_getSequences(ZSTD_createCCtx(), compressedBuffer, 1000000, + CNBuffer, 1000000) == 1000000 / 131071 + 1); /* All zeroes test (test bug #137) */ #define ZEROESLENGTH 100