From: Bimba Shrestha Date: Fri, 30 Aug 2019 16:18:44 +0000 (-0700) Subject: Changing api to get sequences across all blocks X-Git-Tag: v1.4.4~1^2~38^2~19 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5f8b0f6890e03050a3840ed996f95a5e8263c1ea;p=thirdparty%2Fzstd.git Changing api to get sequences across all blocks --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f0b6136b7..f8588b348 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -13,6 +13,7 @@ ***************************************/ #include /* INT_MAX */ #include /* memset */ +#include #include "cpu.h" #include "mem.h" #include "hist.h" /* HIST_countFast_wksp */ @@ -2190,77 +2191,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthID = 0; } -typedef struct { - U32 matchPos; - U32 offset; - U32 litLength; - U32 matchLength; - int rep; -} Sequence; - -static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict, - void* dst, size_t dstSize, - const void* src, size_t srcSize, - Sequence* outSeqs, size_t outSeqsSize) -{ - const seqStore_t* seqStore; - const seqDef* seqs; - size_t seqsSize; - - size_t i; int repIdx; size_t position; - - size_t blockSize = ZSTD_getBlockSize(zc); - size_t maxOutput = ZSTD_compressBound(blockSize); - - assert(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict))); - assert(dstSize >= maxOutput); dstSize = maxOutput; - assert(srcSize >= blockSize); srcSize = blockSize; - assert(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize))); - - seqStore = ZSTD_getSeqStore(zc); - seqs = seqStore->sequencesStart; - seqsSize = seqStore->sequences - seqStore->sequencesStart; - - assert(outSeqsSize >= seqsSize); outSeqsSize = seqsSize; - - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; - - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - - if (outSeqs[i].offset <= 3 /* num reps */) { - outSeqs[i].rep = 1; - repIdx = i - outSeqs[i].offset; - - if (repIdx >= 0) { - outSeqs[i].offset = outSeqs[repIdx].offset; - } - - if (repIdx == -1) { - outSeqs[i].offset = 1; - } else if (repIdx == -2) { - outSeqs[i].offset = 4; - } else if (repIdx == -3) { - outSeqs[i].offset = 8; - } - } else { - outSeqs[i].offset -= 3 /* num reps */; - } - - position += outSeqs[i].litLength; - outSeqs[i].matchPos = position; - position += outSeqs[i].matchLength; - } -} - typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -2394,6 +2324,81 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params } } +static void ZSTD_copyBlockSequences(const seqStore_t* seqStore, seqDef* seqs, + ZSTD_Sequence* outSeqs, size_t seqsSize) +{ + size_t i; size_t position; int repIdx; + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= 3 /* num reps */) { + outSeqs[i].rep = 1; + repIdx = i - outSeqs[i].offset; + + if (repIdx >= 0) { + outSeqs[i].offset = outSeqs[repIdx].offset; + } + + if (repIdx == -1) { + outSeqs[i].offset = 1; + } else if (repIdx == -2) { + outSeqs[i].offset = 4; + } else if (repIdx == -3) { + outSeqs[i].offset = 8; + } + } else { + outSeqs[i].offset -= 3 /* num reps */; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = position; + position += outSeqs[i].matchLength; + } +} + +static void ZSTD_getBlockSequences(ZSTD_CCtx* cctx, const seqStore_t* seqStore) +{ + size_t seqsSize = seqStore->sequences - seqStore->sequencesStart; + + assert(cctx->seqCollector.maxSequences > + (cctx->seqCollector.seqCurrent - cctx->seqCollector.seqStart) + seqsSize); + + ZSTD_copyBlockSequences(seqStore, seqStore->sequencesStart, + cctx->seqCollector.seqCurrent, seqsSize); + cctx->seqCollector.seqCurrent += seqsSize; +} + +size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src, + size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, + int level) +{ + size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*)); + void* dst = malloc(dstCapacity); + size_t seqsSize; + + SeqCollector seqCollector; + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqCurrent = outSeqs; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compressCCtx(zc, dst, dstCapacity, src, srcSize, level); + seqsSize = zc->seqCollector.seqCurrent - zc->seqCollector.seqStart; + + free(dst); + return seqsSize; +} /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. @@ -2438,6 +2443,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); FORWARD_IF_ERROR(cSize); + if (cctx->seqCollector.collectSequences) { + ZSTD_getBlockSequences(cctx, ZSTD_getSeqStore(cctx)); + } if (cSize == 0) { /* block is not compressible */ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 6d623cc6b..d40d53404 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -192,6 +192,13 @@ typedef struct { size_t capacity; /* The capacity starting from `seq` pointer */ } rawSeqStore_t; +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + ZSTD_Sequence* seqCurrent; + size_t maxSequences; +} SeqCollector; + struct ZSTD_CCtx_params_s { ZSTD_format_e format; ZSTD_compressionParameters cParams; @@ -238,6 +245,7 @@ struct ZSTD_CCtx_s { XXH64_state_t xxhState; ZSTD_customMem customMem; size_t staticSize; + SeqCollector seqCollector; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ diff --git a/lib/zstd.h b/lib/zstd.h index f8e95f228..782940ef5 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1072,6 +1072,14 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; +typedef struct { + unsigned int matchPos; + unsigned int offset; + unsigned int litLength; + unsigned int matchLength; + int rep; +} ZSTD_Sequence; + typedef struct { unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ @@ -1210,6 +1218,9 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src, + size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, int level); + /*************************************** * Memory management diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 2de7c0096..09fe46959 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1960,6 +1960,11 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); } + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); + memset(CNBuffer, 0, 1000000); + assert(ZSTD_getSequences(ZSTD_createCCtx(), CNBuffer, 1000000, + compressedBuffer, 1000000, 3) == 1000000 / 131071 + 1); + /* All zeroes test (test bug #137) */ #define ZEROESLENGTH 100 DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);