]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Don't allow default tables to repeat 890/head
authorNick Terrell <terrelln@fb.com>
Fri, 13 Oct 2017 20:23:57 +0000 (13:23 -0700)
committerNick Terrell <terrelln@fb.com>
Mon, 16 Oct 2017 18:37:56 +0000 (11:37 -0700)
It isn't useful in any case to repeat default tables.
Saves a few bytes on Silesia, since we don't trigger the dictionary
heuristic.

Before: 211988480 => 73651998 bytes
After:  211988480 => 73651721 bytes

lib/compress/zstd_compress.c

index 188b8e717a7a15314ceff68edb23e0466287158d..43e5f54617e0873388996617aefae4d0d455871b 100644 (file)
@@ -1298,7 +1298,13 @@ symbolEncodingType_e ZSTD_selectEncodingType(
     if ( isDefaultAllowed
       && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) {
         DEBUGLOG(5, "Selected set_basic");
-        *repeatMode = FSE_repeat_valid;
+        /* The format allows default tables to be repeated, but it isn't useful.
+         * When using simple heuristics to select encoding type, we don't want
+         * to confuse these tables with dictionaries. When running more careful
+         * analysis, we don't need to waste time checking both repeating tables
+         * and default tables.
+         */
+        *repeatMode = FSE_repeat_none;
         return set_basic;
     }
     DEBUGLOG(5, "Selected set_compressed");