]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Add a dictionary training large corpus test
authorSen Huang <senhuang96@fb.com>
Thu, 9 Sep 2021 15:55:43 +0000 (08:55 -0700)
committersenhuang42 <senhuang96@fb.com>
Mon, 13 Sep 2021 16:29:17 +0000 (12:29 -0400)
tests/playTests.sh

index 774655aa9487194702f0fe7877e9cc4656e285df..45d1d3fe8d8804a0f20967ff9ce91afb7616403e 100755 (executable)
@@ -936,8 +936,13 @@ cat tmp | zstd -14 -f --size-hint=5500  | zstd -t  # considerably too low
 
 
 println "\n===>  dictionary tests "
-
-println "- test with raw dict (content only) "
+println "- Test high/low compressibility corpus training"
+datagen -g12M -P90 > tmpCorpusHighCompress
+datagen -g12M -P5 > tmpCorpusLowCompress
+zstd --train -B2K tmpCorpusHighCompress -o tmpDictHighCompress
+zstd --train -B2K tmpCorpusLowCompress -o tmpDictLowCompress
+rm -f tmpCorpusHighCompress tmpCorpusLowCompress tmpDictHighCompress tmpDictLowCompress
+println "- Test with raw dict (content only) "
 datagen > tmpDict
 datagen -g1M | $MD5SUM > tmp1
 datagen -g1M | zstd -D tmpDict | zstd -D tmpDict -dvq | $MD5SUM > tmp2