Fixes a fuzz issue where dictionary_round_trip failed because the compressor was generating corrupt files thanks to zero weights in the table.
* Only setting loaded dict huf table to valid on non-zero
* Adding hasNoZeroWeights test to fse tables
* Forbiding nbBits != 0 when weight == 0
* Reverting the last commit
* Setting table log to 0 when weight == 0
* Small (invalid) zero weight dict test
* Small (valid) zero weight dict test
* Initializing repeatMode vars to check before zero check
* Removing FSE changes to seperate pr
* Reverting accidentally changed file
* Negating bool, using unsigned, optimization nit
/** HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
/** HUF_getNbBits() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
}
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
} }
/* fill nbBits */
+ *hasZeroWeights = 0;
{ U32 n; for (n=0; n<nbSymbols; n++) {
const U32 w = huffWeight[n];
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+ *hasZeroWeights |= (w == 0);
+ CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
} }
/* fill val */
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
short* offcodeNCount, unsigned* offcodeMaxValue,
- const void* const dict, size_t dictSize)
+ const void* const dict, size_t dictSize)
{
const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */
const BYTE* const dictEnd = dictPtr + dictSize;
dictPtr += 8;
{ unsigned maxSymbolValue = 255;
- size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
+ unsigned hasZeroWeights;
+ size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
+ dictEnd-dictPtr, &hasZeroWeights);
+
+ /* We only set the loaded table as valid if it contains all non-zero
+ * weights. Otherwise, we set it to check */
+ if (!hasZeroWeights)
+ bs->entropy.huf.repeatMode = HUF_repeat_valid;
+ else bs->entropy.huf.repeatMode = HUF_repeat_check;
+
RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted);
RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted);
dictPtr += hufHeaderSize;
RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted);
} }
- bs->entropy.huf.repeatMode = HUF_repeat_valid;
bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
println "test : incorrect stream size"
cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
+println "\n===> zstd zero weight dict test "
+rm -f tmp*
+cp "$TESTDIR/dict-files/zero-weight-dict" tmp_input
+$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" tmp_input
+$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" -d tmp_input.zst -o tmp_decomp
+$DIFF tmp_decomp tmp_input
+rm -rf tmp*
+
+println "\n===> zstd (valid) zero weight dict test "
+rm -f tmp*
+# 0 has a non-zero weight in the dictionary
+echo "0000000000000000000000000" > tmp_input
+$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" tmp_input
+$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" -d tmp_input.zst -o tmp_decomp
+$DIFF tmp_decomp tmp_input
+rm -rf tmp*
println "\n===> size-hint mode"
test -f dictionary
rm -f tmp* dictionary
-
if [ "$isWindows" = false ] ; then
println "\n===> zstd fifo named pipe test "