]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
[bug] Fix dictContentType when reprocessing cdict
authorNick Terrell <terrelln@fb.com>
Mon, 5 Oct 2020 22:17:44 +0000 (15:17 -0700)
committerNick Terrell <terrelln@fb.com>
Mon, 12 Oct 2020 19:46:10 +0000 (12:46 -0700)
Conditions to trigger:
* CDict is loaded as raw content.
* CDict starts with the zstd dictionary magic number.
* The CDict is reprocessed (not attached or copied).
* The new API is used (streaming or `ZSTD_compress2()`).

Bug: The dictionary is loaded as a zstd dictionary, not a raw content
dictionary, because the dict content type is set to `ZSTD_dct_auto`.

Fix: Pass in the dictionary content type from cdict creation to the call
to `ZSTD_compress_insertDictionary()`.

Test: Added a test case that exposes the bug, and fixed the raw
content tests to not modify the `dictBuffer`, which makes all future
tests with the `dictBuffer` raw content, which doesn't seem intentional.

lib/compress/zstd_compress.c
tests/fuzzer.c

index ea4b04aabcca1724fb92e4821093cfa54d826042..9469fcf39f2b9b8ce688073d92bfc87fe30ed6e0 100644 (file)
@@ -64,6 +64,7 @@ size_t ZSTD_compressBound(size_t srcSize) {
 struct ZSTD_CDict_s {
     const void* dictContent;
     size_t dictContentSize;
+    ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
     U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
     ZSTD_cwksp workspace;
     ZSTD_matchState_t matchState;
@@ -3188,7 +3189,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
                 ZSTD_compress_insertDictionary(
                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
-                        cdict->dictContentSize, dictContentType, dtlm,
+                        cdict->dictContentSize, cdict->dictContentType, dtlm,
                         cctx->entropyWorkspace)
               : ZSTD_compress_insertDictionary(
                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
@@ -3460,6 +3461,7 @@ static size_t ZSTD_initCDict_internal(
         ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
     }
     cdict->dictContentSize = dictSize;
+    cdict->dictContentType = dictContentType;
 
     cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
 
index 8b10078accb500e9da0629c2f49d87afe45b6366..27a7bde122f23fe8f6c0b84028a6fb6845f9a85f 100644 (file)
@@ -1971,34 +1971,53 @@ static int basicUnitTests(U32 const seed, double compressibility)
         }
         DISPLAYLEVEL(3, "OK \n");
 
-        DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
-        {
-            size_t ret;
-            MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
-            /* Either operation is allowed to fail, but one must fail. */
-            ret = ZSTD_CCtx_loadDictionary_advanced(
-                    cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_auto);
-            if (!ZSTD_isError(ret)) {
+        {   char* rawDictBuffer = (char*)malloc(dictSize);
+            assert(rawDictBuffer);
+            memcpy(rawDictBuffer, (char*)dictBuffer + 2, dictSize - 2);
+            memset(rawDictBuffer + dictSize - 2, 0, 2);
+            MEM_writeLE32((char*)rawDictBuffer, ZSTD_MAGIC_DICTIONARY);
+
+            DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
+            {
+                size_t ret;
+                /* Either operation is allowed to fail, but one must fail. */
+                ret = ZSTD_CCtx_loadDictionary_advanced(
+                        cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+                if (!ZSTD_isError(ret)) {
+                    ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
+                    if (!ZSTD_isError(ret)) goto _output_error;
+                }
+            }
+            DISPLAYLEVEL(3, "OK \n");
+
+            DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
+            {
+                size_t ret;
+                ret = ZSTD_CCtx_loadDictionary_advanced(
+                        cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
+                if (ZSTD_isError(ret)) goto _output_error;
                 ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
-                if (!ZSTD_isError(ret)) goto _output_error;
+                if (ZSTD_isError(ret)) goto _output_error;
             }
-        }
-        DISPLAYLEVEL(3, "OK \n");
+            DISPLAYLEVEL(3, "OK \n");
 
-        DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
-        {
-            size_t ret;
-            MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
-            ret = ZSTD_CCtx_loadDictionary_advanced(
-                    cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
-            if (ZSTD_isError(ret)) goto _output_error;
-            ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
-            if (ZSTD_isError(ret)) goto _output_error;
+            DISPLAYLEVEL(3, "test%3i : Testing non-attached CDict with ZSTD_dct_rawContent : ", testNb++);
+            {   size_t const srcSize = MIN(CNBuffSize, 100);
+                ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+                /* Force the dictionary to be reloaded in raw content mode */
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceLoad));
+                CHECK_Z(ZSTD_CCtx_loadDictionary_advanced(cctx, rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent));
+                cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize);
+                CHECK_Z(cSize);
+            }
+            DISPLAYLEVEL(3, "OK \n");
+
+            free(rawDictBuffer);
         }
-        DISPLAYLEVEL(3, "OK \n");
 
         DISPLAYLEVEL(3, "test%3i : ZSTD_CCtx_refCDict() then set parameters : ", testNb++);
         {   ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 1);
+            ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
             CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
             CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 ));
             CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );