]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Changing api to get sequences across all blocks
authorBimba Shrestha <bshrestha.msae@gmail.com>
Fri, 30 Aug 2019 16:18:44 +0000 (09:18 -0700)
committerBimba Shrestha <bshrestha.msae@gmail.com>
Fri, 30 Aug 2019 16:18:44 +0000 (09:18 -0700)
lib/compress/zstd_compress.c
lib/compress/zstd_compress_internal.h
lib/zstd.h
tests/fuzzer.c

index f0b6136b7fa45f583e16df6487216d08a2b98a7a..f8588b3484201d52deba16e2534a3c7f232d579a 100644 (file)
@@ -13,6 +13,7 @@
 ***************************************/
 #include <limits.h>         /* INT_MAX */
 #include <string.h>         /* memset */
+#include <stdlib.h>
 #include "cpu.h"
 #include "mem.h"
 #include "hist.h"           /* HIST_countFast_wksp */
@@ -2190,77 +2191,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
     ssPtr->longLengthID = 0;
 }
 
-typedef struct {
-    U32 matchPos;
-    U32 offset;
-    U32 litLength;
-    U32 matchLength;
-    int rep;
-} Sequence;
-
-static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict,
-                                void* dst, size_t dstSize,
-                                const void* src, size_t srcSize,
-                                Sequence* outSeqs, size_t outSeqsSize)
-{
-    const seqStore_t* seqStore;
-    const seqDef* seqs;
-    size_t seqsSize;
-
-    size_t i; int repIdx; size_t position;
-
-    size_t blockSize = ZSTD_getBlockSize(zc);
-    size_t maxOutput = ZSTD_compressBound(blockSize);
-
-    assert(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict)));
-    assert(dstSize >= maxOutput); dstSize = maxOutput;
-    assert(srcSize >= blockSize); srcSize = blockSize;
-    assert(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize)));
-
-    seqStore = ZSTD_getSeqStore(zc);
-    seqs = seqStore->sequencesStart;
-    seqsSize = seqStore->sequences - seqStore->sequencesStart;
-
-    assert(outSeqsSize >= seqsSize); outSeqsSize = seqsSize;
-
-    for (i = 0, position = 0; i < seqsSize; ++i) {
-        outSeqs[i].offset = seqs[i].offset;
-        outSeqs[i].litLength = seqs[i].litLength;
-        outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */;
-
-        if (i == seqStore->longLengthPos) {
-            if (seqStore->longLengthID == 1) {
-                outSeqs[i].litLength += 0x10000;
-            } else if (seqStore->longLengthID == 2) {
-                outSeqs[i].matchLength += 0x10000;
-            }
-        }
-
-        if (outSeqs[i].offset <= 3 /* num reps */) {
-            outSeqs[i].rep = 1;
-            repIdx = i - outSeqs[i].offset;
-
-            if (repIdx >= 0) {
-                outSeqs[i].offset = outSeqs[repIdx].offset;
-            }
-
-            if (repIdx == -1) {
-                outSeqs[i].offset = 1;
-            } else if (repIdx == -2) {
-                outSeqs[i].offset = 4;
-            } else if (repIdx == -3) {
-                outSeqs[i].offset = 8;
-            }
-        } else {
-            outSeqs[i].offset -= 3 /* num reps */;
-        }
-
-        position += outSeqs[i].litLength;
-        outSeqs[i].matchPos = position;
-        position += outSeqs[i].matchLength;
-    }
-}
-
 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
 
 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
@@ -2394,6 +2324,81 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params
     }
 }
 
+static void ZSTD_copyBlockSequences(const seqStore_t* seqStore, seqDef* seqs,
+    ZSTD_Sequence* outSeqs, size_t seqsSize)
+{
+    size_t i; size_t position; int repIdx;
+    for (i = 0, position = 0; i < seqsSize; ++i) {
+        outSeqs[i].offset = seqs[i].offset;
+        outSeqs[i].litLength = seqs[i].litLength;
+        outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */;
+
+        if (i == seqStore->longLengthPos) {
+            if (seqStore->longLengthID == 1) {
+                outSeqs[i].litLength += 0x10000;
+            } else if (seqStore->longLengthID == 2) {
+                outSeqs[i].matchLength += 0x10000;
+            }
+        }
+
+        if (outSeqs[i].offset <= 3 /* num reps */) {
+            outSeqs[i].rep = 1;
+            repIdx = i - outSeqs[i].offset;
+
+            if (repIdx >= 0) {
+                outSeqs[i].offset = outSeqs[repIdx].offset;
+            }
+
+            if (repIdx == -1) {
+                outSeqs[i].offset = 1;
+            } else if (repIdx == -2) {
+                outSeqs[i].offset = 4;
+            } else if (repIdx == -3) {
+                outSeqs[i].offset = 8;
+            }
+        } else {
+            outSeqs[i].offset -= 3 /* num reps */;
+        }
+
+        position += outSeqs[i].litLength;
+        outSeqs[i].matchPos = position;
+        position += outSeqs[i].matchLength;
+    }
+}
+
+static void ZSTD_getBlockSequences(ZSTD_CCtx* cctx, const seqStore_t* seqStore)
+{
+    size_t seqsSize = seqStore->sequences - seqStore->sequencesStart;
+
+    assert(cctx->seqCollector.maxSequences >
+        (cctx->seqCollector.seqCurrent - cctx->seqCollector.seqStart) + seqsSize);
+
+    ZSTD_copyBlockSequences(seqStore, seqStore->sequencesStart,
+        cctx->seqCollector.seqCurrent, seqsSize);
+    cctx->seqCollector.seqCurrent += seqsSize;
+}
+
+size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src,
+    size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize,
+    int level)
+{
+    size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*));
+    void* dst = malloc(dstCapacity);
+    size_t seqsSize;
+
+    SeqCollector seqCollector;
+    seqCollector.collectSequences = 1;
+    seqCollector.seqStart = outSeqs;
+    seqCollector.seqCurrent = outSeqs;
+    seqCollector.maxSequences = outSeqsSize;
+    zc->seqCollector = seqCollector;
+
+    ZSTD_compressCCtx(zc, dst, dstCapacity, src, srcSize, level);
+    seqsSize = zc->seqCollector.seqCurrent - zc->seqCollector.seqStart;
+
+    free(dst);
+    return seqsSize;
+}
 
 /*! ZSTD_compress_frameChunk() :
 *   Compress a chunk of data into one or multiple blocks.
@@ -2438,6 +2443,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
                                 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
                                 ip, blockSize);
             FORWARD_IF_ERROR(cSize);
+            if (cctx->seqCollector.collectSequences) {
+                ZSTD_getBlockSequences(cctx, ZSTD_getSeqStore(cctx));
+            }
 
             if (cSize == 0) {  /* block is not compressible */
                 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
index 6d623cc6be8186beab098b430e880a105a6c1b99..d40d53404faa5c9e998cddd74620dfbb967a778d 100644 (file)
@@ -192,6 +192,13 @@ typedef struct {
   size_t capacity; /* The capacity starting from `seq` pointer */
 } rawSeqStore_t;
 
+typedef struct {
+    int collectSequences;
+    ZSTD_Sequence* seqStart;
+    ZSTD_Sequence* seqCurrent;
+    size_t maxSequences;
+} SeqCollector;
+
 struct ZSTD_CCtx_params_s {
     ZSTD_format_e format;
     ZSTD_compressionParameters cParams;
@@ -238,6 +245,7 @@ struct ZSTD_CCtx_s {
     XXH64_state_t xxhState;
     ZSTD_customMem customMem;
     size_t staticSize;
+    SeqCollector seqCollector;
 
     seqStore_t seqStore;      /* sequences storage ptrs */
     ldmState_t ldmState;      /* long distance matching state */
index f8e95f2283e97abfe2848e742edf8632a9dbe22e..782940ef53812fa3080778a3e8bf8124735f63eb 100644 (file)
@@ -1072,6 +1072,14 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 
 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
 
+typedef struct {
+    unsigned int matchPos;
+    unsigned int offset;
+    unsigned int litLength;
+    unsigned int matchLength;
+    int rep;
+} ZSTD_Sequence;
+
 typedef struct {
     unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
     unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
@@ -1210,6 +1218,9 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
  *           or an error code (if srcSize is too small) */
 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 
+ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src,
+    size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, int level);
+
 
 /***************************************
 *  Memory management
index 2de7c0096ff975513520c646648dcc80c06cb0d9..09fe469599554f0ddb400173ffc2652d57a1189d 100644 (file)
@@ -1960,6 +1960,11 @@ static int basicUnitTests(U32 const seed, double compressibility)
         DISPLAYLEVEL(3, "OK \n");
     }
 
+    DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++);
+    memset(CNBuffer, 0, 1000000);
+    assert(ZSTD_getSequences(ZSTD_createCCtx(), CNBuffer, 1000000,
+        compressedBuffer, 1000000, 3) == 1000000 / 131071 + 1);
+
     /* All zeroes test (test bug #137) */
     #define ZEROESLENGTH 100
     DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);