/* Dictionaries that assign zero probability to symbols that show up causes problems
- when FSE encoding. Refuse dictionaries that assign zero probability to symbols
- that we may encounter during compression.
- NOTE: This behavior is not standard and could be improved in the future. */
-static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
+ * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
+ * and only dictionaries with 100% valid symbols can be assumed valid.
+ */
+static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
+{
U32 s;
- RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols");
+ if (dictMaxSymbolValue < maxSymbolValue) {
+ return FSE_repeat_check;
+ }
for (s = 0; s <= maxSymbolValue; ++s) {
- RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols");
+ if (normalizedCounter[s] == 0) {
+ return FSE_repeat_check;
+ }
}
- return 0;
+ return FSE_repeat_valid;
}
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
- short* offcodeNCount, unsigned* offcodeMaxValue,
const void* const dict, size_t dictSize)
{
+ short offcodeNCount[MaxOff+1];
+ unsigned offcodeMaxValue = MaxOff;
const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */
const BYTE* const dictEnd = dictPtr + dictSize;
dictPtr += 8;
}
{ unsigned offcodeLog;
- size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+ size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
- /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
/* fill all offset symbols to avoid garbage at end of table */
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
bs->entropy.fse.offcodeCTable,
offcodeNCount, MaxOff, offcodeLog,
workspace, HUF_WORKSPACE_SIZE)),
dictionary_corrupted, "");
+ /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
dictPtr += offcodeHeaderSize;
}
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
- /* Every match length code must have non-zero probability */
- FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), "");
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
bs->entropy.fse.matchlengthCTable,
matchlengthNCount, matchlengthMaxValue, matchlengthLog,
workspace, HUF_WORKSPACE_SIZE)),
dictionary_corrupted, "");
+ bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
dictPtr += matchlengthHeaderSize;
}
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
- /* Every literal length code must have non-zero probability */
- FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), "");
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
bs->entropy.fse.litlengthCTable,
litlengthNCount, litlengthMaxValue, litlengthLog,
workspace, HUF_WORKSPACE_SIZE)),
dictionary_corrupted, "");
+ bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
dictPtr += litlengthHeaderSize;
}
bs->rep[2] = MEM_readLE32(dictPtr+8);
dictPtr += 12;
+ { size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+ U32 offcodeMax = MaxOff;
+ if (dictContentSize <= ((U32)-1) - 128 KB) {
+ U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+ offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
+ }
+ /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
+ bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
+
+ /* All repCodes must be <= dictContentSize and != 0 */
+ { U32 u;
+ for (u=0; u<3; u++) {
+ RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
+ RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
+ } } }
+
return dictPtr - (const BYTE*)dict;
}
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
- short offcodeNCount[MaxOff+1];
- unsigned offcodeMaxValue = MaxOff;
size_t dictID;
size_t eSize;
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );
- eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize);
+ eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
dictPtr += eSize;
- { size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
- U32 offcodeMax = MaxOff;
- if (dictContentSize <= ((U32)-1) - 128 KB) {
- U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
- offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
- }
- /* All offset values <= dictContentSize + 128 KB must be representable */
- FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), "");
- /* All repCodes must be <= dictContentSize and != 0*/
- { U32 u;
- for (u=0; u<3; u++) {
- RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
- RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
- } }
-
- bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
- bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
- bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
+ {
+ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
- return dictID;
}
+ return dictID;
}
/** ZSTD_compress_insertDictionary() :