implemented dictID reserved ranges

author Yann Collet <yann.collet.73@gmail.com>

Fri, 15 Jul 2016 15:03:38 +0000 (17:03 +0200)

committer Yann Collet <yann.collet.73@gmail.com>

Fri, 15 Jul 2016 15:03:38 +0000 (17:03 +0200)
author Yann Collet <yann.collet.73@gmail.com>
Fri, 15 Jul 2016 15:03:38 +0000 (17:03 +0200)
committer Yann Collet <yann.collet.73@gmail.com>
Fri, 15 Jul 2016 15:03:38 +0000 (17:03 +0200)
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c

index 0378a313aa0a991dadedd8df93ef7896c5ef25b2..c8c8ae30110964933d8ca1ce3f5729ccb7e06cbe 100644 (file)
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -894,7 +894,8 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
      /* dictionary header */
      MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
      {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
-        U32 const dictID = params.dictID ? params.dictID : (U32)(randomID>>11);
+        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+        U32 const dictID = params.dictID ? params.dictID : compliantID;
          MEM_writeLE32((char*)dictBuffer+4, dictID);
      }
      hSize = 8;
@@ -912,6 +913,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
      return MIN(dictBufferCapacity, hSize+dictContentSize);
  }
  
+
  #define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
  /*! ZDICT_trainFromBuffer_unsafe() :
  *   `samplesBuffer` must be followed by noisy guard band.
diff --git a/zstd_compression_format.md b/zstd_compression_format.md

index d432f1166da03fbff765114c318793fc8d461d38..c6afeab1cebe05d9c09c88ea136828b45b4ff35f 100644 (file)
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
  
  ### Version
  
-0.1.0 (08/07/16)
+0.1.1 (15/07/16)
  
  
  Introduction
@@ -258,9 +258,9 @@ depending on local limitations.
  
  __Dictionary ID__
  
-This is a variable size field, which contains an ID.
-It checks if the correct dictionary is used for decoding.
-Note that this field is optional. If it's not present,
+This is a variable size field, which contains
+the ID of the dictionary required to properly decode the frame.
+Note that this field is optional. When it's not present,
  it's up to the caller to make sure it uses the correct dictionary.
  
  Field size depends on __Dictionary ID flag__.
@@ -271,6 +271,15 @@ Field size depends on __Dictionary ID flag__.
  It's allowed to represent a small ID (for example `13`)
  with a large 4-bytes dictionary ID, losing some compacity in the process.
  
+_Reserved ranges :_
+If the frame is going to be distributed in a private environment,
+any dictionary ID can be used.
+However, for public distribution of compressed frames using a dictionary,
+some ranges are reserved for future use :
+- low : 1 - 32767 : reserved
+- high : >= (2^31) : reserved
+
+
  __Frame Content Size__
  
  This is the original (uncompressed) size.
@@ -1136,6 +1145,13 @@ __Header__ : 4 bytes ID, value 0xEC30A437, Little Endian format
  __Dict_ID__ : 4 bytes, stored in Little Endian format.
                DictID can be any value, except 0 (which means no DictID).
                It's used by decoders to check if they use the correct dictionary.
+              _Reserved ranges :_
+              If the frame is going to be distributed in a private environment,
+              any dictionary ID can be used.
+              However, for public distribution of compressed frames,
+              some ranges are reserved for future use :
+              - low : 1 - 32767 : reserved
+              - high : >= (2^31) : reserved
  
  __Stats__ : Entropy tables, following the same format as a [compressed blocks].
              They are stored in following order :
@@ -1152,4 +1168,5 @@ __Content__ : Where the actual dictionary content is.
  
  Version changes
  ---------------
+0.1.1 reserved dictID ranges
  0.1.0 initial release
author	Yann Collet <yann.collet.73@gmail.com>
	Fri, 15 Jul 2016 15:03:38 +0000 (17:03 +0200)
committer	Yann Collet <yann.collet.73@gmail.com>
	Fri, 15 Jul 2016 15:03:38 +0000 (17:03 +0200)
lib/dictBuilder/zdict.c		patch \| blob \| blame \| history
zstd_compression_format.md		patch \| blob \| blame \| history