From: Nick Terrell Date: Wed, 5 May 2021 00:09:32 +0000 (-0700) Subject: [test] Add large dict/data --patch-from test X-Git-Tag: v1.5.0^2~42^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0b88c2582c4aea7cf2cef4225c7c55ec2302342d;p=thirdparty%2Fzstd.git [test] Add large dict/data --patch-from test Dictionary size must be > `ZSTD_CHUNKSIZE_MAX`. --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 14b284af0..6e18b68e5 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4121,6 +4121,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, /* Assert that we the ms params match the params we're being given */ ZSTD_assertEqualCParams(params->cParams, ms->cParams); + if (srcSize > ZSTD_CHUNKSIZE_MAX) { /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. * Dictionaries right at the edge will immediately trigger overflow @@ -4153,7 +4154,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); if (loadLdmDict) - ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); switch(params->cParams.strategy) { @@ -4167,22 +4168,20 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: - if (srcSize >= HASH_READ_SIZE) { - if (ms->dedicatedDictSearch) { - assert(ms->chainTable != NULL); - ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); + assert(srcSize >= HASH_READ_SIZE); + if (ms->dedicatedDictSearch) { + assert(ms->chainTable != NULL); + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); + } else { + assert(params->useRowMatchFinder != ZSTD_urm_auto); + if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) { + size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); + ZSTD_memset(ms->tagTable, 0, tagTableSize); + ZSTD_row_update(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using row-based hash table for lazy dict"); } else { - assert(params->useRowMatchFinder != ZSTD_urm_auto); - if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) { - size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); - if (ip == src) - ZSTD_memset(ms->tagTable, 0, tagTableSize); - ZSTD_row_update(ms, iend-HASH_READ_SIZE); - DEBUGLOG(4, "Using row-based hash table for lazy dict"); - } else { - ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); - DEBUGLOG(4, "Using chain-based hash table for lazy dict"); - } + ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using chain-based hash table for lazy dict"); } } break; @@ -4191,8 +4190,8 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_btopt: case ZSTD_btultra: case ZSTD_btultra2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); + assert(srcSize >= HASH_READ_SIZE); + ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); break; default: diff --git a/tests/playTests.sh b/tests/playTests.sh index 04a90e3c2..bfb2e378f 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -1430,6 +1430,14 @@ datagen -g5000000 > tmp_patch zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | grep "long mode automatically triggered" rm -rf tmp* +println "\n===> patch-from very large dictionary and file test" +datagen -g550000000 -P0 > tmp_dict +datagen -g100000000 -P1 > tmp_patch +zstd --long=30 -1f --patch-from tmp_dict tmp_patch +zstd --long=30 -df --patch-from tmp_dict tmp_patch.zst -o tmp_patch_recon +$DIFF -s tmp_patch_recon tmp_patch +rm -rf tmp* + println "\n===> patch-from --stream-size test" datagen -g1000 -P50 > tmp_dict datagen -g1000 -P10 > tmp_patch