fix eductional decoder

author Yann Collet <cyan@fb.com>

Fri, 6 Sep 2019 21:30:13 +0000 (14:30 -0700)

committer Yann Collet <cyan@fb.com>

Fri, 6 Sep 2019 21:30:13 +0000 (14:30 -0700)
author Yann Collet <cyan@fb.com>
Fri, 6 Sep 2019 21:30:13 +0000 (14:30 -0700)
committer Yann Collet <cyan@fb.com>
Fri, 6 Sep 2019 21:30:13 +0000 (14:30 -0700)
diff --git a/Makefile b/Makefile

index acf50cfa3771c615cfe1374eaf895ff9c9a87b84..da3112e30d39ce81ded4afa9d111334ab4cdd834 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -69,6 +69,7 @@ test: MOREFLAGS += -g -DDEBUGLEVEL=$(DEBUGLEVEL) -Werror
  test:
         MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
         $(MAKE) -C $(TESTDIR) $@
+       $(MAKE) -C doc/educational_decoder test
  
  ## shortest: same as `make check`
  .PHONY: shortest
diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile

index c1d2c4cc42fa1ce99f0699a5b799d1f631f864df..ed3f3fef76dc82db9b9646ec4a61b78a43526064 100644 (file)
--- a/doc/educational_decoder/Makefile
+++ b/doc/educational_decoder/Makefile
@@ -26,7 +26,10 @@ test: harness
         @./harness tmp.zst tmp
         @diff -s tmp README.md
         @$(RM) -f tmp*
-       @zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md
+       # present files for training multiple times, to reach minimum threshold
+       @zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md \
+                  harness.c zstd_decompress.c zstd_decompress.h README.md \
+                  harness.c zstd_decompress.c zstd_decompress.h README.md
         @zstd -D dictionary README.md -o tmp.zst
         @./harness tmp.zst tmp dictionary
         @diff -s tmp README.md
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c

index 8e231bbb5a98fbac625ce4d6a906841c7f39dc69..26143cd1a33de94110998f6874984285fe25f572 100644 (file)
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@@ -395,7 +395,7 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
      /* this decoder assumes decompression of a single frame */
      decode_frame(&out, &in, parsed_dict);
  
-    return out.ptr - (u8 *)dst;
+    return (size_t)(out.ptr - (u8 *)dst);
  }
  
  /******* FRAME DECODING ******************************************************/
@@ -416,7 +416,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
  
  static void decode_frame(ostream_t *const out, istream_t *const in,
                           const dictionary_t *const dict) {
-    const u32 magic_number = IO_read_bits(in, 32);
+    const u32 magic_number = (u32)IO_read_bits(in, 32);
      // Zstandard frame
      //
      // "Magic_Number
@@ -497,7 +497,7 @@ static void parse_frame_header(frame_header_t *const header,
      // 3    Reserved_bit
      // 2    Content_Checksum_flag
      // 1-0  Dictionary_ID_flag"
-    const u8 descriptor = IO_read_bits(in, 8);
+    const u8 descriptor = (u8)IO_read_bits(in, 8);
  
      // decode frame header descriptor into flags
      const u8 frame_content_size_flag = descriptor >> 6;
@@ -521,7 +521,7 @@ static void parse_frame_header(frame_header_t *const header,
          //
          // Bit numbers  7-3         2-0
          // Field name   Exponent    Mantissa"
-        u8 window_descriptor = IO_read_bits(in, 8);
+        u8 window_descriptor = (u8)IO_read_bits(in, 8);
          u8 exponent = window_descriptor >> 3;
          u8 mantissa = window_descriptor & 7;
  
@@ -541,7 +541,7 @@ static void parse_frame_header(frame_header_t *const header,
          const int bytes_array[] = {0, 1, 2, 4};
          const int bytes = bytes_array[dictionary_id_flag];
  
-        header->dictionary_id = IO_read_bits(in, bytes * 8);
+        header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
      } else {
          header->dictionary_id = 0;
      }
@@ -633,8 +633,8 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
          //
          // The next 2 bits represent the Block_Type, while the remaining 21 bits
          // represent the Block_Size. Format is little-endian."
-        last_block = IO_read_bits(in, 1);
-        const int block_type = IO_read_bits(in, 2);
+        last_block = (int)IO_read_bits(in, 1);
+        const int block_type = (int)IO_read_bits(in, 2);
          const size_t block_len = IO_read_bits(in, 21);
  
          switch (block_type) {
@@ -748,8 +748,8 @@ static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
      // types"
      //
      // size_format takes between 1 and 2 bits
-    int block_type = IO_read_bits(in, 2);
-    int size_format = IO_read_bits(in, 2);
+    int block_type = (int)IO_read_bits(in, 2);
+    int size_format = (int)IO_read_bits(in, 2);
  
      if (block_type <= 1) {
          // Raw or RLE literals block
@@ -1005,7 +1005,7 @@ static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {
  static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
      0,  1,  2,   3,   4,   5,    6,    7,    8,    9,     10,    11,
      12, 13, 14,  15,  16,  18,   20,   22,   24,   28,    32,    40,
-    48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65538};
+    48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};
  static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  1,  1,
      1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
@@ -1021,7 +1021,7 @@ static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {
      2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  
  /// Offset decoding is simpler so we just need a maximum code value
-static const u8 SEQ_MAX_CODES[3] = {35, -1, 52};
+static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};
  
  static void decompress_sequences(frame_context_t *const ctx,
                                   istream_t *const in,
@@ -1132,7 +1132,7 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
      // a single 1-bit and then fills the byte with 0-7 0 bits of padding."
      const int padding = 8 - highest_set_bit(src[len - 1]);
      // The offset starts at the end because FSE streams are read backwards
-    i64 bit_offset = len * 8 - padding;
+    i64 bit_offset = (i64)(len * 8 - (size_t)padding);
  
      // "The bitstream starts with initial state values, each using the required
      // number of bits in their respective accuracy, decoded previously from
@@ -1409,7 +1409,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
  
      // get decompressed size from ZSTD frame header
      {
-        const u32 magic_number = IO_read_bits(&in, 32);
+        const u32 magic_number = (u32)IO_read_bits(&in, 32);
  
          if (magic_number == 0xFD2FB528U) {
              // ZSTD frame
@@ -1418,7 +1418,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
  
              if (header.frame_content_size == 0 && !header.single_segment_flag) {
                  // Content size not provided, we can't tell
-                return -1;
+                return (size_t)-1;
              }
  
              return header.frame_content_size;
diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h

index a01fde331fb802d360e16837318879ccb3a77dc8..74b18533850a44bff0a083c965e2ee38f4ecda4b 100644 (file)
--- a/doc/educational_decoder/zstd_decompress.h
+++ b/doc/educational_decoder/zstd_decompress.h
@@ -7,6 +7,8 @@
   * in the COPYING file in the root directory of this source tree).
   */
  
+#include <stddef.h>   /* size_t */
+
  /******* EXPOSED TYPES ********************************************************/
  /*
  * Contains the parsed contents of a dictionary
@@ -39,7 +41,7 @@ size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
   * Return a valid dictionary_t pointer for use with dictionary initialization
   * or decompression
   */
-dictionary_t* create_dictionary();
+dictionary_t* create_dictionary(void);
  
  /*
   * Parse a provided dictionary blob for use in decompression
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c

index 4721205da2afccc85b9944f24a3b20a8195b878c..2e129dd9183a3a1127e166f6c151c06bcc3f0ff4 100644 (file)
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
                      "compared to the source size %u! "
                      "size(source)/size(dictionary) = %f, but it should be >= "
                      "10! This may lead to a subpar dictionary! We recommend "
-                    "training on sources at least 10x, and up to 100x the "
-                    "size of the dictionary!\n", (U32)maxDictSize,
+                    "training on sources at least 10x, and preferably 100x "
+                    "the size of the dictionary! \n", (U32)maxDictSize,
                      (U32)nbDmers, ratio);
  }
author	Yann Collet <cyan@fb.com>
	Fri, 6 Sep 2019 21:30:13 +0000 (14:30 -0700)
committer	Yann Collet <cyan@fb.com>
	Fri, 6 Sep 2019 21:30:13 +0000 (14:30 -0700)
Makefile		patch \| blob \| blame \| history
doc/educational_decoder/Makefile		patch \| blob \| blame \| history
doc/educational_decoder/zstd_decompress.c		patch \| blob \| blame \| history
doc/educational_decoder/zstd_decompress.h		patch \| blob \| blame \| history
lib/dictBuilder/cover.c		patch \| blob \| blame \| history