added a test case for dictBuilder failure

author Yann Collet <cyan@fb.com>

Thu, 11 Jan 2018 17:42:38 +0000 (09:42 -0800)

committer Yann Collet <cyan@fb.com>

Thu, 11 Jan 2018 17:42:38 +0000 (09:42 -0800)
author Yann Collet <cyan@fb.com>
Thu, 11 Jan 2018 17:42:38 +0000 (09:42 -0800)
committer Yann Collet <cyan@fb.com>
Thu, 11 Jan 2018 17:42:38 +0000 (09:42 -0800)
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c

index ef7d3a3437e43819a94de243ac5891df52a9aa69..b9e0ec44ddd5a6c213668e69d73d9144aa995283 100644 (file)
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2869,7 +2869,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
          if (params.nbThreads > 1) {
              if (cctx->mtctx == NULL || (params.nbThreads != ZSTDMT_getNbThreads(cctx->mtctx))) {
                  DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbThreads=%u (previous: %u)",
-                            params.nbThreads, ZSTDMT_getNbThreads(cctx->mtctx));
+                            params.nbThreads, (U32)ZSTDMT_getNbThreads(cctx->mtctx));
                  ZSTDMT_freeCCtx(cctx->mtctx);
                  cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
                  if (cctx->mtctx == NULL) return ERROR(memory_allocation);
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c

index efdffddbf930c5f012fb79c588883ae2fa98232e..b683706710d79de1e273d3e7c67138eae4d077a1 100644 (file)
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -537,8 +537,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
    /* Checks */
    if (totalSamplesSize < MAX(d, sizeof(U64)) ||
        totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
-    DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
-                 (COVER_MAX_SAMPLES_SIZE >> 20));
+    DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
+                 (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
      return 0;
    }
    /* Zero the context */
@@ -651,12 +651,16 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
  }
  
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
-    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
-    const size_t *samplesSizes, unsigned nbSamples,
-    ZDICT_cover_params_t parameters) {
-  BYTE *const dict = (BYTE *)dictBuffer;
+    void *dictBuffer, size_t dictBufferCapacity,
+    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t parameters)
+{
+  BYTE* const dict = (BYTE*)dictBuffer;
    COVER_ctx_t ctx;
    COVER_map_t activeDmers;
+
+  /* Initialize global data */
+  g_displayLevel = parameters.zParams.notificationLevel;
    /* Checks */
    if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
      DISPLAYLEVEL(1, "Cover parameters incorrect\n");
@@ -671,8 +675,6 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
                   ZDICT_DICTSIZE_MIN);
      return ERROR(dstSize_tooSmall);
    }
-  /* Initialize global data */
-  g_displayLevel = parameters.zParams.notificationLevel;
    /* Initialize context and activeDmers */
    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
                        parameters.d)) {
@@ -947,6 +949,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
    unsigned k;
    COVER_best_t best;
    POOL_ctx *pool = NULL;
+
    /* Checks */
    if (kMinK < kMaxD || kMaxK < kMinK) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
@@ -1004,7 +1007,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
        data->parameters.k = k;
        data->parameters.d = d;
        data->parameters.steps = kSteps;
-      data->parameters.zParams.notificationLevel = g_displayLevel;
+      data->parameters.zParams.notificationLevel = displayLevel;
        /* Check the parameters */
        if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
          DISPLAYLEVEL(1, "Cover parameters incorrect\n");
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c

index e8f454045fc0196ef7ceeea708f4045c72b9daa6..3a1649419ef60948fe9834f3e2ec28f6c9ae10d1 100644 (file)
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -207,7 +207,6 @@ static dictItem ZDICT_analyzePos(
      U32 cumulLength[LLIMIT] = {0};
      U32 savings[LLIMIT] = {0};
      const BYTE* b = (const BYTE*)buffer;
-    size_t length;
      size_t maxLength = LLIMIT;
      size_t pos = suffix[start];
      U32 end = start;
@@ -222,26 +221,30 @@ static dictItem ZDICT_analyzePos(
         ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
         ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
          /* skip and mark segment */
-        U16 u16 = MEM_read16(b+pos+4);
-        U32 u, e = 6;
-        while (MEM_read16(b+pos+e) == u16) e+=2 ;
-        if (b[pos+e] == b[pos+e-1]) e++;
-        for (u=1; u<e; u++)
+        U16 const pattern16 = MEM_read16(b+pos+4);
+        U32 u, patternEnd = 6;
+        while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
+        if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
+        for (u=1; u<patternEnd; u++)
              doneMarks[pos+u] = 1;
          return solution;
      }
  
      /* look forward */
-    do {
-        end++;
-        length = ZDICT_count(b + pos, b + suffix[end]);
-    } while (length >=MINMATCHLENGTH);
+    {   size_t length;
+        do {
+            end++;
+            length = ZDICT_count(b + pos, b + suffix[end]);
+        } while (length >= MINMATCHLENGTH);
+    }
  
      /* look backward */
-    do {
-        length = ZDICT_count(b + pos, b + *(suffix+start-1));
-        if (length >=MINMATCHLENGTH) start--;
-    } while(length >= MINMATCHLENGTH);
+    {   size_t length;
+        do {
+            length = ZDICT_count(b + pos, b + *(suffix+start-1));
+            if (length >=MINMATCHLENGTH) start--;
+        } while(length >= MINMATCHLENGTH);
+    }
  
      /* exit if not found a minimum nb of repetitions */
      if (end-start < minRatio) {
@@ -268,7 +271,7 @@ static dictItem ZDICT_analyzePos(
              U32 selectedCount = 0;
              U32 selectedID = currentID;
              for (id =refinedStart; id < refinedEnd; id++) {
-                if (b[ suffix[id] + searchLength] != currentChar) {
+                if (b[suffix[id] + searchLength] != currentChar) {
                      if (currentCount > selectedCount) {
                          selectedCount = currentCount;
                          selectedID = currentID;
@@ -297,20 +300,23 @@ static dictItem ZDICT_analyzePos(
          memset(lengthList, 0, sizeof(lengthList));
  
          /* look forward */
-        do {
-            end++;
-            length = ZDICT_count(b + pos, b + suffix[end]);
-            if (length >= LLIMIT) length = LLIMIT-1;
-            lengthList[length]++;
-        } while (length >=MINMATCHLENGTH);
+        {   size_t length;
+            do {
+                end++;
+                length = ZDICT_count(b + pos, b + suffix[end]);
+                if (length >= LLIMIT) length = LLIMIT-1;
+                lengthList[length]++;
+            } while (length >=MINMATCHLENGTH);
+        }
  
          /* look backward */
-        length = MINMATCHLENGTH;
-        while ((length >= MINMATCHLENGTH) & (start > 0)) {
-            length = ZDICT_count(b + pos, b + suffix[start - 1]);
-            if (length >= LLIMIT) length = LLIMIT - 1;
-            lengthList[length]++;
-            if (length >= MINMATCHLENGTH) start--;
+        {   size_t length = MINMATCHLENGTH;
+            while ((length >= MINMATCHLENGTH) & (start > 0)) {
+                length = ZDICT_count(b + pos, b + suffix[start - 1]);
+                if (length >= LLIMIT) length = LLIMIT - 1;
+                lengthList[length]++;
+                if (length >= MINMATCHLENGTH) start--;
+            }
          }
  
          /* largest useful length */
@@ -345,12 +351,12 @@ static dictItem ZDICT_analyzePos(
          /* mark positions done */
          {   U32 id;
              for (id=start; id<end; id++) {
-                U32 p, pEnd;
+                U32 p, pEnd, length;
                  U32 const testedPos = suffix[id];
                  if (testedPos == pos)
                      length = solution.length;
                  else {
-                    length = ZDICT_count(b+pos, b+testedPos);
+                    length = (U32)ZDICT_count(b+pos, b+testedPos);
                      if (length > solution.length) length = solution.length;
                  }
                  pEnd = (U32)(testedPos + length);
@@ -575,29 +581,30 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
  
  typedef struct
  {
-    ZSTD_CCtx* ref;
-    ZSTD_CCtx* zc;
+    ZSTD_CCtx* ref;    /* contains reference to dictionary */
+    ZSTD_CCtx* zc;     /* working context */
      void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
  } EStats_ress_t;
  
  #define MAXREPOFFSET 1024
  
  static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
-                            U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
-                            const void* src, size_t srcSize, U32 notificationLevel)
+                              U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
+                              const void* src, size_t srcSize,
+                              U32 notificationLevel)
  {
      size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
      size_t cSize;
  
      if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
-    {  size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
-            if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
+    {   size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
+        if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
      }
      cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
      if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
  
      if (cSize) {  /* if == 0; block is not compressible */
-        const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
+        const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
  
          /* literals stats */
          {   const BYTE* bytePtr;
@@ -688,6 +695,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
      BYTE* dstPtr = (BYTE*)dstBuffer;
  
      /* init */
+    DEBUGLOG(4, "ZDICT_analyzeEntropy");
      esr.ref = ZSTD_createCCtx();
      esr.zc = ZSTD_createCCtx();
      esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
@@ -713,7 +721,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
              goto _cleanup;
      }   }
  
-    /* collect stats on all files */
+    /* collect stats on all samples */
      for (u=0; u<nbFiles; u++) {
          ZDICT_countEStats(esr, params,
                            countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
@@ -726,7 +734,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
      errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
      if (HUF_isError(errorCode)) {
          eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "HUF_buildCTable error \n");
+        DISPLAYLEVEL(1, " HUF_buildCTable error \n");
          goto _cleanup;
      }
      huffLog = (U32)errorCode;
@@ -850,6 +858,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
      U32 const notificationLevel = params.notificationLevel;
  
      /* check conditions */
+    DEBUGLOG(4, "ZDICT_finalizeDictionary");
      if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
      if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
      if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
@@ -1054,18 +1063,22 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                               const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
  {
      ZDICT_cover_params_t params;
+    DEBUGLOG(3, "ZDICT_trainFromBuffer");
      memset(&params, 0, sizeof(params));
      params.d = 8;
      params.steps = 4;
-    /* Default to level 6 since no compression level information is avaialble */
+    /* Default to level 6 since no compression level information is available */
      params.zParams.compressionLevel = 6;
+#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
+    params.zParams.notificationLevel = ZSTD_DEBUG;
+#endif
      return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
-                                               samplesBuffer, samplesSizes,
-                                               nbSamples, &params);
+                                               samplesBuffer, samplesSizes, nbSamples,
+                                               &params);
  }
  
  size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
+                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
  {
      ZDICT_params_t params;
      memset(&params, 0, sizeof(params));
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h

index 5f0000b1c7c892b89d37208955a8aa39b197f4a8..92f66415ffcdb375d30d4c0a8d8f313cf8e77bb8 100644 (file)
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -38,21 +38,21 @@ extern "C" {
  
  
  /*! ZDICT_trainFromBuffer():
- * Train a dictionary from an array of samples.
- * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
+ *  Train a dictionary from an array of samples.
+ *  Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
+ *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ *  The resulting dictionary will be saved into `dictBuffer`.
   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *           or an error code, which can be tested with ZDICT_isError().
- * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
- *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *          or an error code, which can be tested with ZDICT_isError().
+ *  Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
+ *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
   *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
   */
  ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
-                       const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
+                                    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
  
  
  /*======   Helper functions   ======*/
@@ -72,14 +72,14 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
   * ==================================================================================== */
  
  typedef struct {
-    int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
-    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
-    unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
+    int      compressionLevel;   /* optimize for a specific zstd compression level; 0 means default */
+    unsigned notificationLevel;  /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
+    unsigned dictID;             /* force dictID value; 0 means auto mode (32-bits random value) */
  } ZDICT_params_t;
  
  /*! ZDICT_cover_params_t:
- *  For all values 0 means default.
   *  k and d are the only required parameters.
+ *  For others, value 0 means default.
   */
  typedef struct {
      unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
@@ -91,28 +91,28 @@ typedef struct {
  
  
  /*! ZDICT_trainFromBuffer_cover():
- * Train a dictionary from an array of samples using the COVER algorithm.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
+ *  Train a dictionary from an array of samples using the COVER algorithm.
+ *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ *  The resulting dictionary will be saved into `dictBuffer`.
   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *           or an error code, which can be tested with ZDICT_isError().
- * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
- *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *          or an error code, which can be tested with ZDICT_isError().
+ *  Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
+ *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
   *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
   */
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
-    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
-    const size_t *samplesSizes, unsigned nbSamples,
+    void *dictBuffer, size_t dictBufferCapacity,
+    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
      ZDICT_cover_params_t parameters);
  
  /*! ZDICT_optimizeTrainFromBuffer_cover():
   * The same requirements as above hold for all the parameters except `parameters`.
   * This function tries many parameter combinations and picks the best parameters.
- * `*parameters` is filled with the best parameters found, and the dictionary
- * constructed with those parameters is stored in `dictBuffer`.
+ * `*parameters` is filled with the best parameters found,
+ * dictionary constructed with those parameters is stored in `dictBuffer`.
   *
   * All of the parameters d, k, steps are optional.
   * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
@@ -125,9 +125,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
   * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
   */
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
-    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
-    const size_t *samplesSizes, unsigned nbSamples,
-    ZDICT_cover_params_t *parameters);
+    void* dictBuffer, size_t dictBufferCapacity,
+    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+    ZDICT_cover_params_t* parameters);
  
  /*! ZDICT_finalizeDictionary():
   * Given a custom content as a basis for dictionary, and a set of samples,
@@ -157,22 +157,23 @@ typedef struct {
  } ZDICT_legacy_params_t;
  
  /*! ZDICT_trainFromBuffer_legacy():
- * Train a dictionary from an array of samples.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
+ *  Train a dictionary from an array of samples.
+ *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ *  The resulting dictionary will be saved into `dictBuffer`.
   * `parameters` is optional and can be provided with values set to 0 to mean "default".
   * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *           or an error code, which can be tested with ZDICT_isError().
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
- *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ *          or an error code, which can be tested with ZDICT_isError().
+ *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
+ *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
   *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
+ *  Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
   */
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
-    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
-    const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
+    void *dictBuffer, size_t dictBufferCapacity,
+    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
+    ZDICT_legacy_params_t parameters);
  
  /* Deprecation warnings */
  /* It is generally possible to disable deprecation warnings from compiler,
diff --git a/tests/fuzzer.c b/tests/fuzzer.c

index 141bf65484c1e7d000f6bf5fcc9f55ce69ef2131..7198329a8629caf65009691ddcc20d748c8ea70d 100644 (file)
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -673,6 +673,15 @@ static int basicUnitTests(U32 seed, double compressibility)
              goto _output_error;
          }
  
+        DISPLAYLEVEL(4, "test%3i : dictBuilder on cyclic data : ", testNb++);
+        assert(compressedBufferSize >= totalSampleSize);
+        { U32 u; for (u=0; u<totalSampleSize; u++) ((BYTE*)compressedBuffer)[u] = (BYTE)u; }
+        { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
+        dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,
+                                         compressedBuffer, samplesSizes, nbSamples);
+        if (ZDICT_isError(dictSize)) goto _output_error;
+        DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
+
          DISPLAYLEVEL(4, "test%3i : dictBuilder : ", testNb++);
          { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
          dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,
author	Yann Collet <cyan@fb.com>
	Thu, 11 Jan 2018 17:42:38 +0000 (09:42 -0800)
committer	Yann Collet <cyan@fb.com>
	Thu, 11 Jan 2018 17:42:38 +0000 (09:42 -0800)
lib/compress/zstd_compress.c		patch \| blob \| blame \| history
lib/dictBuilder/cover.c		patch \| blob \| blame \| history
lib/dictBuilder/zdict.c		patch \| blob \| blame \| history
lib/dictBuilder/zdict.h		patch \| blob \| blame \| history
tests/fuzzer.c		patch \| blob \| blame \| history