}
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+ when FSE encoding. Refuse dictionaries that assign zero probability to symbols
+ that we may encounter during compression.
+ NOTE: This behavior is not standard and could be improved in the future. */
+static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
+ U32 s;
+ if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
+ for (s = 0; s <= maxSymbolValue; ++s) {
+ if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
+ }
+ return 0;
+}
+
+
/* Dictionary format :
Magic == ZSTD_DICT_MAGIC (4 bytes)
HUF_writeCTable(256)
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
+ short offcodeNCount[MaxOff+1];
+ unsigned offcodeMaxValue = MaxOff;
{ size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
dictPtr += hufHeaderSize;
}
- { short offcodeNCount[MaxOff+1];
- unsigned offcodeMaxValue = MaxOff, offcodeLog;
+ { unsigned offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+ /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
dictPtr += offcodeHeaderSize;
}
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+ /* Every match length code must have non-zero probability */
+ CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
dictPtr += matchlengthHeaderSize;
}
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+ /* Every literal length code must have non-zero probability */
+ CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
dictPtr += litlengthHeaderSize;
}
cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
dictPtr += 12;
+ { size_t const maxOffset = (dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */
+ /* Calculate minimum offset code required to represent maxOffset */
+ unsigned const offcodeMax = ZSTD_highbit32(maxOffset);
+ /* Every possible supported offset <= dictContentSize + 128 KB must be representable */
+ CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
+ }
+
cctx->flagStaticTables = 1;
return dictPtr - (const BYTE*)dict;
}