changed command --blocksize into --split

author Yann Collet <cyan@fb.com>

Tue, 4 Mar 2025 20:51:39 +0000 (12:51 -0800)

committer Yann Collet <cyan@fb.com>

Tue, 4 Mar 2025 20:53:23 +0000 (12:53 -0800)
author Yann Collet <cyan@fb.com>
Tue, 4 Mar 2025 20:51:39 +0000 (12:51 -0800)
committer Yann Collet <cyan@fb.com>
Tue, 4 Mar 2025 20:53:23 +0000 (12:53 -0800)
diff --git a/programs/benchzstd.h b/programs/benchzstd.h

index 4fd0e5a8af433a47b35d9935a2a88a92c0238e37..db4d72f9d743f2a4f7798ad61f6d8743fda571fc 100644 (file)
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@@ -92,9 +92,9 @@ typedef enum {
  } BMK_mode_t;
  
  typedef struct {
-    BMK_mode_t mode;        /* 0: all, 1: compress only 2: decode only */
+    BMK_mode_t mode;        /* 0: both, 1: compress only 2: decode only */
      unsigned nbSeconds;     /* default timing is in nbSeconds */
-    size_t blockSize;       /* Maximum size of each block*/
+    size_t blockSize;       /* Maximum size of each independent chunk */
      size_t targetCBlockSize;/* Approximative size of compressed blocks */
      int nbWorkers;          /* multithreading */
      unsigned realTime;      /* real time priority */
diff --git a/programs/zstd.1.md b/programs/zstd.1.md

index e5c1b7fd215b0de5fb00ff11ec7de51641f1766e..3b7bc342a53ff6df2d9320f66d8329ac354c29e9 100644 (file)
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -113,7 +113,11 @@ the last one takes effect.
      Because the compressor's behavior highly depends on the content to compress, there's no guarantee of a smooth progression from one level to another.
  * `--ultra`:
      unlocks high compression levels 20+ (maximum 22), using a lot more memory.
-    Note that decompression will also require more memory when using these levels.
+    Decompression will also need more memory when using these levels.
+* `--max`:
+    set advanced parameters to reach maximum compression.
+    warning: this setting is very slow and uses a lot of resources.
+    It's inappropriate for 32-bit mode and therefore disabled in this mode.
  * `--fast[=#]`:
      switch to ultra-fast compression levels.
      If `=#` is not present, it defaults to `1`.
@@ -161,10 +165,6 @@ the last one takes effect.
  
      Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
      `--memory=windowSize` needs to be passed to the decompressor.
-* `--max`:
-    set advanced parameters to maximum compression.
-    warning: this setting is very slow and uses a lot of resources.
-    It's inappropriate for 32-bit mode and therefore disabled in this mode.
  * `-D DICT`:
      use `DICT` as Dictionary to compress or decompress FILE(s)
  * `--patch-from FILE`:
@@ -554,8 +554,8 @@ Compression of small files similar to the sample set will be greatly improved.
      Use `#` compression level during training (optional).
      Will generate statistics more tuned for selected compression level,
      resulting in a _small_ compression ratio improvement for this level.
-* `-B#`:
-    Split input files into blocks of size # (default: no split)
+* `--split=#`:
+    Split input files into independent chunks of size # (default: no split)
  * `-M#`, `--memory=#`:
      Limit the amount of sample data loaded for training (default: 2 GB).
      Note that the default (2 GB) is also the maximum.
@@ -683,8 +683,8 @@ Benchmarking will employ `max(1, min(4, nbCores/4))` worker threads by default i
      benchmark decompression speed only (requires providing a zstd-compressed content)
  * `-i#`:
      minimum evaluation time, in seconds (default: 3s), benchmark mode only
-* `-B#`, `--block-size=#`:
-    cut file(s) into independent chunks of size # (default: no chunking)
+* `--split=#`:
+    split input file(s) into independent chunks of size # (default: no chunking)
  * `-S`:
      output one benchmark result per input file (default: consolidated result)
  * `-D dictionary`
diff --git a/programs/zstdcli.c b/programs/zstdcli.c

index 83d9b881e502b12e5e380407cc671cd036f4a9e9..38d00225fe4f27ef4f659ad6ebdd3c9c7c499807 100644 (file)
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -307,7 +307,7 @@ static void usageAdvanced(const char* programName)
      DISPLAYOUT("  -b#                           Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT);
      DISPLAYOUT("  -e#                           Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n");
      DISPLAYOUT("  -i#                           Set the minimum evaluation to time # seconds. [Default: 3]\n");
-    DISPLAYOUT("  -B#                           Cut file into independent chunks of size #. [Default: No chunking]\n");
+    DISPLAYOUT("  --split=#                     Split input into independent chunks of size #. [Default: No chunking]\n");
      DISPLAYOUT("  -S                            Output one benchmark result per input file. [Default: Consolidated result]\n");
      DISPLAYOUT("  -D dictionary                 Benchmark using dictionary \n");
      DISPLAYOUT("  --priority=rt                 Set process priority to real-time.\n");
@@ -773,7 +773,7 @@ static int init_cLevel(void) {
  }
  
  #ifdef ZSTD_MULTITHREAD
-static unsigned default_nbThreads(void) {
+static int default_nbThreads(void) {
      const char* const env = getenv(ENV_NBTHREADS);
      if (env != NULL) {
          const char* ptr = env;
@@ -783,7 +783,7 @@ static unsigned default_nbThreads(void) {
                  DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);
                  return ZSTDCLI_NBTHREADS_DEFAULT;
              } else if (*ptr == 0) {
-                return nbThreads;
+                return (int)nbThreads;
              }
          }
          DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);
@@ -810,22 +810,31 @@ static unsigned default_nbThreads(void) {
              CLEAN_RETURN(1);      \
  }   }   }
  
-#define NEXT_UINT32(val32) {        \
-    const char* __nb;               \
-    NEXT_FIELD(__nb);               \
-    val32 = readU32FromChar(&__nb); \
-    if(*__nb != 0) {                \
+#define NEXT_INT32(_vari32) {              \
+    const char* __nb;                      \
+    NEXT_FIELD(__nb);                      \
+    _vari32 = (int)readU32FromChar(&__nb); \
+    if(*__nb != 0) {                       \
          errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
-    }                               \
+    }                                      \
  }
  
-#define NEXT_TSIZE(valTsize) {           \
-    const char* __nb;                    \
-    NEXT_FIELD(__nb);                    \
-    valTsize = readSizeTFromChar(&__nb); \
-    if(*__nb != 0) {                     \
+#define NEXT_UINT32(_varu32) {        \
+    const char* __nb;                 \
+    NEXT_FIELD(__nb);                 \
+    _varu32 = readU32FromChar(&__nb); \
+    if(*__nb != 0) {                  \
          errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
-    }                                    \
+    }                                 \
+}
+
+#define NEXT_TSIZE(_varTsize) {           \
+    const char* __nb;                     \
+    NEXT_FIELD(__nb);                     \
+    _varTsize = readSizeTFromChar(&__nb); \
+    if(*__nb != 0) {                      \
+        errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
+    }                                     \
  }
  
  typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;
@@ -871,7 +880,7 @@ int main(int argCount, const char* argv[])
      int nbWorkers = -1; /* -1 means unset */
      double compressibility = -1.0;  /* lorem ipsum generator */
      unsigned bench_nbSeconds = 3;   /* would be better if this value was synchronized from bench */
-    size_t blockSize = 0;
+    size_t chunkSize = 0;
  
      FIO_prefs_t* const prefs = FIO_createPreferences();
      FIO_ctx_t* const fCtx = FIO_createContext();
@@ -1069,11 +1078,12 @@ int main(int argCount, const char* argv[])
                    continue;
                  }
  #endif
-                if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; }
+                if (longCommandWArg(&argument, "--threads")) { NEXT_INT32(nbWorkers); continue; }
                  if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; }
                  if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; }
                  if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; }
-                if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(blockSize); continue; }
+                if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(chunkSize); continue; } /* hidden command, prefer --split below */
+                if (longCommandWArg(&argument, "--split")) { NEXT_TSIZE(chunkSize); continue; }
                  if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }
                  if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }
                  if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; }
@@ -1256,10 +1266,10 @@ int main(int argCount, const char* argv[])
                      bench_nbSeconds = readU32FromChar(&argument);
                      break;
  
-                    /* cut input into blocks (benchmark only) */
+                    /* cut input into independent chunks (benchmark only) */
                  case 'B':
                      argument++;
-                    blockSize = readU32FromChar(&argument);
+                    chunkSize = readU32FromChar(&argument);
                      break;
  
                      /* benchmark files separately (hidden option) */
@@ -1273,7 +1283,7 @@ int main(int argCount, const char* argv[])
                      /* nb of threads (hidden option) */
                  case 'T':
                      argument++;
-                    nbWorkers = readU32FromChar(&argument);
+                    nbWorkers = (int)readU32FromChar(&argument);
                      break;
  
                      /* Dictionary Selection level */
@@ -1324,10 +1334,10 @@ int main(int argCount, const char* argv[])
      if ((nbWorkers==0) && (!singleThread)) {
          /* automatically set # workers based on # of reported cpus */
          if (defaultLogicalCores) {
-            nbWorkers = (unsigned)UTIL_countLogicalCores();
+            nbWorkers = UTIL_countLogicalCores();
              DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers);
          } else {
-            nbWorkers = (unsigned)UTIL_countPhysicalCores();
+            nbWorkers = UTIL_countPhysicalCores();
              DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);
          }
      }
@@ -1404,7 +1414,7 @@ int main(int argCount, const char* argv[])
              DISPLAYLEVEL(1, "benchmark mode is only compatible with zstd format \n");
              CLEAN_RETURN(1);
          }
-        benchParams.blockSize = blockSize;
+        benchParams.blockSize = chunkSize;
          benchParams.targetCBlockSize = targetCBlockSize;
          benchParams.nbWorkers = (int)nbWorkers;
          benchParams.realTime = (unsigned)setRealTimePrio;
@@ -1464,18 +1474,18 @@ int main(int argCount, const char* argv[])
              int const optimize = !coverParams.k || !coverParams.d;
              coverParams.nbThreads = (unsigned)nbWorkers;
              coverParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, &coverParams, NULL, optimize, memLimit);
          } else if (dict == fastCover) {
              int const optimize = !fastCoverParams.k || !fastCoverParams.d;
              fastCoverParams.nbThreads = (unsigned)nbWorkers;
              fastCoverParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
          } else {
              ZDICT_legacy_params_t dictParams;
              memset(&dictParams, 0, sizeof(dictParams));
              dictParams.selectivityLevel = dictSelect;
              dictParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, &dictParams, NULL, NULL, 0, memLimit);
          }
  #else
          (void)dictCLevel; (void)dictSelect; (void)dictID;  (void)maxDictSize; /* not used when ZSTD_NODICT set */
@@ -1583,7 +1593,7 @@ int main(int argCount, const char* argv[])
          FIO_setCompressionType(prefs, cType);
          FIO_setContentSize(prefs, contentSize);
          FIO_setNbWorkers(prefs, (int)nbWorkers);
-        FIO_setBlockSize(prefs, (int)blockSize);
+        FIO_setBlockSize(prefs, (int)chunkSize);
          if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);
          FIO_setLdmFlag(prefs, (unsigned)ldmFlag);
          FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);
author	Yann Collet <cyan@fb.com>
	Tue, 4 Mar 2025 20:51:39 +0000 (12:51 -0800)
committer	Yann Collet <cyan@fb.com>
	Tue, 4 Mar 2025 20:53:23 +0000 (12:53 -0800)
programs/benchzstd.h		patch \| blob \| blame \| history
programs/zstd.1.md		patch \| blob \| blame \| history
programs/zstdcli.c		patch \| blob \| blame \| history