From: Nick Terrell Date: Mon, 5 Oct 2020 22:17:44 +0000 (-0700) Subject: [bug] Fix dictContentType when reprocessing cdict X-Git-Tag: v1.4.7~58^2~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7083f790084be3f8b9b58c365e55c3a1df7de236;p=thirdparty%2Fzstd.git [bug] Fix dictContentType when reprocessing cdict Conditions to trigger: * CDict is loaded as raw content. * CDict starts with the zstd dictionary magic number. * The CDict is reprocessed (not attached or copied). * The new API is used (streaming or `ZSTD_compress2()`). Bug: The dictionary is loaded as a zstd dictionary, not a raw content dictionary, because the dict content type is set to `ZSTD_dct_auto`. Fix: Pass in the dictionary content type from cdict creation to the call to `ZSTD_compress_insertDictionary()`. Test: Added a test case that exposes the bug, and fixed the raw content tests to not modify the `dictBuffer`, which makes all future tests with the `dictBuffer` raw content, which doesn't seem intentional. --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ea4b04aab..9469fcf39 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -64,6 +64,7 @@ size_t ZSTD_compressBound(size_t srcSize) { struct ZSTD_CDict_s { const void* dictContent; size_t dictContentSize; + ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ ZSTD_cwksp workspace; ZSTD_matchState_t matchState; @@ -3188,7 +3189,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, - cdict->dictContentSize, dictContentType, dtlm, + cdict->dictContentSize, cdict->dictContentType, dtlm, cctx->entropyWorkspace) : ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, @@ -3460,6 +3461,7 @@ static size_t ZSTD_initCDict_internal( ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); } cdict->dictContentSize = dictSize; + cdict->dictContentType = dictContentType; cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 8b10078ac..27a7bde12 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1971,34 +1971,53 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); - DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++); - { - size_t ret; - MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY); - /* Either operation is allowed to fail, but one must fail. */ - ret = ZSTD_CCtx_loadDictionary_advanced( - cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_auto); - if (!ZSTD_isError(ret)) { + { char* rawDictBuffer = (char*)malloc(dictSize); + assert(rawDictBuffer); + memcpy(rawDictBuffer, (char*)dictBuffer + 2, dictSize - 2); + memset(rawDictBuffer + dictSize - 2, 0, 2); + MEM_writeLE32((char*)rawDictBuffer, ZSTD_MAGIC_DICTIONARY); + + DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++); + { + size_t ret; + /* Either operation is allowed to fail, but one must fail. */ + ret = ZSTD_CCtx_loadDictionary_advanced( + cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); + if (!ZSTD_isError(ret)) { + ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)); + if (!ZSTD_isError(ret)) goto _output_error; + } + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++); + { + size_t ret; + ret = ZSTD_CCtx_loadDictionary_advanced( + cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent); + if (ZSTD_isError(ret)) goto _output_error; ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)); - if (!ZSTD_isError(ret)) goto _output_error; + if (ZSTD_isError(ret)) goto _output_error; } - } - DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "OK \n"); - DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++); - { - size_t ret; - MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY); - ret = ZSTD_CCtx_loadDictionary_advanced( - cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_rawContent); - if (ZSTD_isError(ret)) goto _output_error; - ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)); - if (ZSTD_isError(ret)) goto _output_error; + DISPLAYLEVEL(3, "test%3i : Testing non-attached CDict with ZSTD_dct_rawContent : ", testNb++); + { size_t const srcSize = MIN(CNBuffSize, 100); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + /* Force the dictionary to be reloaded in raw content mode */ + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceLoad)); + CHECK_Z(ZSTD_CCtx_loadDictionary_advanced(cctx, rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent)); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize); + CHECK_Z(cSize); + } + DISPLAYLEVEL(3, "OK \n"); + + free(rawDictBuffer); } - DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : ZSTD_CCtx_refCDict() then set parameters : ", testNb++); { ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 1); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 )); CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );