From: Nick Terrell Date: Thu, 20 Oct 2016 00:22:08 +0000 (-0700) Subject: Reject dictionaries with incomplete entropy tables X-Git-Tag: v1.1.1~21^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f9c9af3c2e84bb61841ff41aa8e5afbd2ca83d7d;p=thirdparty%2Fzstd.git Reject dictionaries with incomplete entropy tables If a dictionary specifies that a symbol has probability zero in its `matchLength`, `literalLength`, or `offset` FSE table, but the symbol appears when compressing input, the compressor fails. Ensure that dictionaries support all `matchLength`, and `literalLength` codes. They must also support all of the `offset` codes required to represent every possible offset that can appear in the first block. --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c44ef3d41..fb12898e8 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2448,6 +2448,20 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t } +/* Dictionaries that assign zero probability to symbols that show up causes problems + when FSE encoding. Refuse dictionaries that assign zero probability to symbols + that we may encounter during compression. + NOTE: This behavior is not standard and could be improved in the future. */ +static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted); + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted); + } + return 0; +} + + /* Dictionary format : Magic == ZSTD_DICT_MAGIC (4 bytes) HUF_writeCTable(256) @@ -2464,17 +2478,19 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_ { const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); dictPtr += hufHeaderSize; } - { short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff, offcodeLog; + { unsigned offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted); dictPtr += offcodeHeaderSize; } @@ -2484,6 +2500,8 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_ size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + /* Every match length code must have non-zero probability */ + CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted); dictPtr += matchlengthHeaderSize; } @@ -2493,6 +2511,8 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_ size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + /* Every literal length code must have non-zero probability */ + CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted); dictPtr += litlengthHeaderSize; } @@ -2503,6 +2523,13 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_ cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); dictPtr += 12; + { size_t const maxOffset = (dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */ + /* Calculate minimum offset code required to represent maxOffset */ + unsigned const offcodeMax = ZSTD_highbit32(maxOffset); + /* Every possible supported offset <= dictContentSize + 128 KB must be representable */ + CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); + } + cctx->flagStaticTables = 1; return dictPtr - (const BYTE*)dict; }