From 4a498fb9c35fa61a0c07f873fb891e11b92804a1 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 9 Sep 2021 08:55:43 -0700 Subject: [PATCH] Add a dictionary training large corpus test --- tests/playTests.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/playTests.sh b/tests/playTests.sh index 774655aa9..45d1d3fe8 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -936,8 +936,13 @@ cat tmp | zstd -14 -f --size-hint=5500 | zstd -t # considerably too low println "\n===> dictionary tests " - -println "- test with raw dict (content only) " +println "- Test high/low compressibility corpus training" +datagen -g12M -P90 > tmpCorpusHighCompress +datagen -g12M -P5 > tmpCorpusLowCompress +zstd --train -B2K tmpCorpusHighCompress -o tmpDictHighCompress +zstd --train -B2K tmpCorpusLowCompress -o tmpDictLowCompress +rm -f tmpCorpusHighCompress tmpCorpusLowCompress tmpDictHighCompress tmpDictLowCompress +println "- Test with raw dict (content only) " datagen > tmpDict datagen -g1M | $MD5SUM > tmp1 datagen -g1M | zstd -D tmpDict | zstd -D tmpDict -dvq | $MD5SUM > tmp2 -- 2.47.2