Change default splitPoint to 100

author Jennifer Liu <jenniferliu620@fb.com>

Tue, 10 Jul 2018 18:19:33 +0000 (11:19 -0700)

committer Jennifer Liu <jenniferliu620@fb.com>

Tue, 10 Jul 2018 18:19:33 +0000 (11:19 -0700)
author Jennifer Liu <jenniferliu620@fb.com>
Tue, 10 Jul 2018 18:19:33 +0000 (11:19 -0700)
committer Jennifer Liu <jenniferliu620@fb.com>
Tue, 10 Jul 2018 18:19:33 +0000 (11:19 -0700)
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c

index 176c386c4c5ab00eb710147b58ed1349a44f6af3..e32991652a60d332c85f7bd5d63c495fe87a452f 100644 (file)
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -39,7 +39,7 @@
  *  Constants
  ***************************************/
  #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
-#define DEFAULT_SPLITPOINT 0.8
+#define DEFAULT_SPLITPOINT 1.0
  
  /*-*************************************
  *  Console display
@@ -497,7 +497,7 @@ static int COVER_checkParameters(ZDICT_cover_params_t parameters,
    if (parameters.d > parameters.k) {
      return 0;
    }
-  /* 0 < splitPoint < 1 */
+  /* 0 < splitPoint <= 1 */
    if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
      return 0;
    }
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h

index 8244c3bac73e112897aab82106e55a89446719dd..9357e40a603e0c0199e85a5bdef78622b003fa07 100644 (file)
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -86,7 +86,7 @@ typedef struct {
      unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
      unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
      unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
-    double splitPoint;           /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, 0 means default (0.8) */
+    double splitPoint;           /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, 0 means default (1.0) */
      ZDICT_params_t zParams;
  } ZDICT_cover_params_t;
  
diff --git a/programs/zstd.1 b/programs/zstd.1

index b63ef4f2a4a038f987bc5716bddbf2fa94c69b1d..3e9e2942381067a3a8bfb90b2a78ffaabea2650f 100644 (file)
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -218,7 +218,7 @@ A dictionary ID is a locally unique ID that a decoder can use to verify it is us
  .
  .TP
  \fB\-\-train\-cover[=k#,d=#,steps=#,split=#]\fR
-Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or \fIsplit\fR <= 0 or \fIsplit\fR > 100, then the default value of 80 is used\. Requires that \fId\fR <= \fIk\fR\.
+Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or \fIsplit\fR <= 0 or \fIsplit\fR > 100, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\.
  .
  .IP
  Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. Supports multithreading if \fBzstd\fR is compiled with threading support\.
diff --git a/programs/zstd.1.md b/programs/zstd.1.md

index 47035f1c08ae93c91e0a9baa0e3299f6031a0a0a..df6f777df18bca4858d25e4a6bb0492c5103a4f9 100644 (file)
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -228,7 +228,7 @@ Compression of small files similar to the sample set will be greatly improved.
      If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
      If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
      If _steps_ is not specified, then the default value of 40 is used.
-    If _split_ is not specified or split <= 0 or split > 100, then the default value of 80 is used.
+    If _split_ is not specified or split <= 0 or split > 100, then the default value of 100 is used.
      Requires that _d_ <= _k_.
  
      Selects segments of size _k_ with highest score to put in the dictionary.
diff --git a/programs/zstdcli.c b/programs/zstdcli.c

index 5408d2a512b642702bb98a0aed102782974e4d52..a466a7ff37aa633f3c93e932593e6f56b514ad6a 100644 (file)
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -84,7 +84,7 @@ static U32 g_ldmMinMatch = 0;
  static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT;
  static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
  
-#define DEFAULT_SPLITPOINT 0.8
+#define DEFAULT_SPLITPOINT 1.0
  
  /*-************************************
  *  Display Macros
author	Jennifer Liu <jenniferliu620@fb.com>
	Tue, 10 Jul 2018 18:19:33 +0000 (11:19 -0700)
committer	Jennifer Liu <jenniferliu620@fb.com>
	Tue, 10 Jul 2018 18:19:33 +0000 (11:19 -0700)
lib/dictBuilder/cover.c		patch \| blob \| blame \| history
lib/dictBuilder/zdict.h		patch \| blob \| blame \| history
programs/zstd.1		patch \| blob \| blame \| history
programs/zstd.1.md		patch \| blob \| blame \| history
programs/zstdcli.c		patch \| blob \| blame \| history