spec update : require minimum nb of literals for 4-streams mode

author Yann Collet <cyan@fb.com>

Thu, 22 Dec 2022 19:30:15 +0000 (11:30 -0800)

committer Yann Collet <cyan@fb.com>

Fri, 23 Dec 2022 00:14:34 +0000 (16:14 -0800)
author Yann Collet <cyan@fb.com>
Thu, 22 Dec 2022 19:30:15 +0000 (11:30 -0800)
committer Yann Collet <cyan@fb.com>
Fri, 23 Dec 2022 00:14:34 +0000 (16:14 -0800)
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md

index 042dbe89a33571c43263b49c22dd250f66c2eee3..e40677a1dc82128bf85b7b42a1886d9bc30b0a0b 100644 (file)
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -435,7 +435,7 @@ They can be decoded first, and then copied during [Sequence Execution],
  or they can be decoded on the flow during [Sequence Execution].
  
  Literals can be stored uncompressed or compressed using Huffman prefix codes.
-When compressed, an optional tree description can be present,
+When compressed, a tree description may optionally be present,
  followed by 1 or 4 streams.
  
  | `Literals_Section_Header` | [`Huffman_Tree_Description`] | [jumpTable] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
@@ -510,7 +510,7 @@ Its value is : `Size_Format = (Literals_Section_Header[0]>>2) & 3`
                 `Regenerated_Size = (Literals_Section_Header[0]>>4) + (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12)`
  
  Only Stream1 is present for these cases.
-Note : it's allowed to represent a short value (for example `13`)
+Note : it's allowed to represent a short value (for example `27`)
  using a long format, even if it's less efficient.
  
  __`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ :
@@ -521,19 +521,33 @@ __`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__
                 Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
                 `Literals_Section_Header` uses 3 bytes.
  - `Size_Format` == 01 : 4 streams.
-               Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
+               Both `Regenerated_Size` and `Compressed_Size` use 10 bits (6-1023).
                 `Literals_Section_Header` uses 3 bytes.
  - `Size_Format` == 10 : 4 streams.
-               Both `Regenerated_Size` and `Compressed_Size` use 14 bits (0-16383).
+               Both `Regenerated_Size` and `Compressed_Size` use 14 bits (6-16383).
                 `Literals_Section_Header` uses 4 bytes.
  - `Size_Format` == 11 : 4 streams.
-               Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143).
+               Both `Regenerated_Size` and `Compressed_Size` use 18 bits (6-262143).
                 `Literals_Section_Header` uses 5 bytes.
  
  Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention.
  Note: `Compressed_Size` __includes__ the size of the Huffman Tree description
  _when_ it is present.
  
+4 streams is superior to 1 stream in decompression speed,
+by exploiting instruction level parallelism.
+But it's also more expensive,
+costing on average ~7.3 bytes more than the 1 stream mode, mostly from the jump table.
+
+In general, use the 4 streams mode when there are more literals to decode,
+to favor higher decompression speeds.
+Beyond 1KB, the 4 streams mode is compulsory anyway.
+
+Note that a minimum of 6 bytes is required for the 4 streams mode.
+That's a technical minimum, but it's not recommended to employ the 4 streams mode
+for such a small quantity, that would be wasteful.
+A more practical lower bound would be around ~256 bytes.
+
  #### Raw Literals Block
  The data in Stream1 is `Regenerated_Size` bytes long,
  it contains the raw literals data to be used during [Sequence Execution].
diff --git a/lib/common/error_private.c b/lib/common/error_private.c

index baf9b0b0c58e752cabb920418ce3c17f67a79fc6..fe73c5edd4b4a22ddec7e9a43dd327d1aff3e5b2 100644 (file)
--- a/lib/common/error_private.c
+++ b/lib/common/error_private.c
@@ -29,6 +29,7 @@ const char* ERR_getErrorString(ERR_enum code)
      case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
      case PREFIX(corruption_detected): return "Data corruption detected";
      case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
      case PREFIX(parameter_unsupported): return "Unsupported parameter";
      case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
      case PREFIX(init_missing): return "Context should be init first";
diff --git a/lib/common/huf.h b/lib/common/huf.h

index 679493bfaf6703e0f5371055ef5ea06212529dfd..f7316f9b24a171e02fa382e3e1983080d8d291db 100644 (file)
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -88,8 +88,10 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
                                 unsigned maxSymbolValue, unsigned tableLog);
  
  /** HUF_compress4X_wksp() :
- *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
- * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
+ *  Same as HUF_compress2(), but uses externally allocated @workSpace.
+ * @workSpace's size, aka @wkspSize, must be >= HUF_WORKSPACE_SIZE
+ * @srcSize must be >= 6
+ */
  #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
  #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
  HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h

index c9c97b7e4dc392d59af64bb397a8fbfbc521074d..48558873d62f41151c6960734a21562f95dc6eb5 100644 (file)
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -94,6 +94,7 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
  
  #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
  #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */)   /* for a non-null block */
+#define MIN_LITERALS_FOR_4_STREAMS 6
  
  typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
  
diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c

index e97c485ab09d2cdef4e636f2738c89a92faeb6f3..972d40e712d2d2fd609fee44f6c245e1d87edbed 100644 (file)
--- a/lib/compress/zstd_compress_literals.c
+++ b/lib/compress/zstd_compress_literals.c
@@ -164,16 +164,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
      switch(lhSize)
      {
      case 3: /* 2 - 2 - 10 - 10 */
+        if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
          {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
              MEM_writeLE24(ostart, lhc);
              break;
          }
      case 4: /* 2 - 2 - 14 - 14 */
+        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
          {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
              MEM_writeLE32(ostart, lhc);
              break;
          }
      case 5: /* 2 - 2 - 18 - 18 */
+        assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
          {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
              MEM_writeLE32(ostart, lhc);
              ostart[4] = (BYTE)(cLitSize >> 10);
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c

index b3ba5926b29393ed80f96b1af250998c327b5947..e5c35dae9694c038f4eb9aa0f4425ea63e9a742a 100644 (file)
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -289,10 +289,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1;   /* single-symbol decodi
  static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
      U64 D4;
      if (MEM_isLittleEndian()) {
-        D4 = (symbol << 8) + nbBits;
+        D4 = (U64)((symbol << 8) + nbBits);
      } else {
-        D4 = symbol + (nbBits << 8);
+        D4 = (U64)(symbol + (nbBits << 8));
      }
+    assert(D4 < (1U << 16));
      D4 *= 0x0001000100010001ULL;
      return D4;
  }
@@ -383,9 +384,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
       * rankStart[0] is not filled because there are no entries in the table for
       * weight 0.
       */
-    {
-        int n;
-        int nextRankStart = 0;
+    {   int n;
+        U32 nextRankStart = 0;
          int const unroll = 4;
          int const nLimit = (int)nbSymbols - unroll + 1;
          for (n=0; n<(int)tableLog+1; n++) {
@@ -412,10 +412,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
       * We can switch based on the length to a different inner loop which is
       * optimized for that particular case.
       */
-    {
-        U32 w;
-        int symbol=wksp->rankVal[0];
-        int rankStart=0;
+    {   U32 w;
+        int symbol = wksp->rankVal[0];
+        int rankStart = 0;
          for (w=1; w<tableLog+1; ++w) {
              int const symbolCount = wksp->rankVal[w];
              int const length = (1 << w) >> 1;
@@ -525,7 +524,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
      while (p < pEnd)
          HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  
-    return pEnd-pStart;
+    return (size_t)(pEnd-pStart);
  }
  
  FORCE_INLINE_TEMPLATE size_t
@@ -551,6 +550,10 @@ HUF_decompress1X1_usingDTable_internal_body(
      return dstSize;
  }
  
+/* HUF_decompress4X1_usingDTable_internal_body():
+ * Conditions :
+ * @dstSize >= 6
+ */
  FORCE_INLINE_TEMPLATE size_t
  HUF_decompress4X1_usingDTable_internal_body(
            void* dst,  size_t dstSize,
@@ -594,6 +597,7 @@ HUF_decompress4X1_usingDTable_internal_body(
  
          if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
          if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
+        if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
          CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
          CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
          CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@@ -679,8 +683,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
      const BYTE* const iend = (const BYTE*)cSrc + 6;
      BYTE* const oend = (BYTE*)dst + dstSize;
      HUF_DecompressAsmArgs args;
-    {
-        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+    {   size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
          FORWARD_IF_ERROR(ret, "Failed to init asm args");
          if (ret != 0)
              return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
@@ -700,8 +703,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
      (void)iend;
  
      /* finish bit streams one by one. */
-    {
-        size_t const segmentSize = (dstSize+3) / 4;
+    {   size_t const segmentSize = (dstSize+3) / 4;
          BYTE* segmentEnd = (BYTE*)dst;
          int i;
          for (i = 0; i < 4; ++i) {
@@ -1246,6 +1248,11 @@ HUF_decompress1X2_usingDTable_internal_body(
      /* decoded size */
      return dstSize;
  }
+
+/* HUF_decompress4X2_usingDTable_internal_body():
+ * Conditions:
+ * @dstSize >= 6
+ */
  FORCE_INLINE_TEMPLATE size_t
  HUF_decompress4X2_usingDTable_internal_body(
            void* dst,  size_t dstSize,
@@ -1286,8 +1293,9 @@ HUF_decompress4X2_usingDTable_internal_body(
          DTableDesc const dtd = HUF_getDTableDesc(DTable);
          U32 const dtLog = dtd.tableLog;
  
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-        if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
+        if (length4 > cSrcSize) return ERROR(corruption_detected);  /* overflow */
+        if (opStart4 > oend) return ERROR(corruption_detected);     /* overflow */
+        if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
          CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
          CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
          CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c

index 94728a16524bec5d7016aeb83dd05a910f2ee05a..b2351e8f146360c6144aa8762edb817785030f99 100644 (file)
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -166,6 +166,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                  }
                  RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
                  RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                if (!singleStream)
+                    RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
+                        "Not enough literals (%zu) for the 4-streams mode (min %u)",
+                        litSize, MIN_LITERALS_FOR_4_STREAMS);
                  RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
                  RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
                  ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
@@ -181,6 +185,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                              dctx->litBuffer, litSize, istart+lhSize, litCSize,
                              dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
                      } else {
+                        assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
                          hufSuccess = HUF_decompress4X_usingDTable_bmi2(
                              dctx->litBuffer, litSize, istart+lhSize, litCSize,
                              dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
@@ -509,7 +514,8 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
                  for (i = 8; i < n; i += 8) {
                      MEM_write64(spread + pos + i, sv);
                  }
-                pos += n;
+                assert(n>=0);
+                pos += (size_t)n;
              }
          }
          /* Now we spread those positions across the table.
diff --git a/lib/zstd_errors.h b/lib/zstd_errors.h

index 57a50d0249bf06f29a6eca0121769626db3c5b83..880e8e4f6da4f6ed9500cb8d5f52118e96331538 100644 (file)
--- a/lib/zstd_errors.h
+++ b/lib/zstd_errors.h
@@ -70,6 +70,7 @@ typedef enum {
    ZSTD_error_frameParameter_windowTooLarge = 16,
    ZSTD_error_corruption_detected = 20,
    ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_literals_headerWrong = 24,
    ZSTD_error_dictionary_corrupted      = 30,
    ZSTD_error_dictionary_wrong          = 32,
    ZSTD_error_dictionaryCreation_failed = 34,
author	Yann Collet <cyan@fb.com>
	Thu, 22 Dec 2022 19:30:15 +0000 (11:30 -0800)
committer	Yann Collet <cyan@fb.com>
	Fri, 23 Dec 2022 00:14:34 +0000 (16:14 -0800)
doc/zstd_compression_format.md		patch \| blob \| blame \| history
lib/common/error_private.c		patch \| blob \| blame \| history
lib/common/huf.h		patch \| blob \| blame \| history
lib/common/zstd_internal.h		patch \| blob \| blame \| history
lib/compress/zstd_compress_literals.c		patch \| blob \| blame \| history
lib/decompress/huf_decompress.c		patch \| blob \| blame \| history
lib/decompress/zstd_decompress_block.c		patch \| blob \| blame \| history
lib/zstd_errors.h		patch \| blob \| blame \| history