From d8a07972687004a5446cd612404de8a1c5584188 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 2 Aug 2021 21:02:31 -0700 Subject: [PATCH] [fuzz] Add Huffman round trip fuzzer * Add a Huffman round trip fuzzer * Fix two minor bugs in Huffman that aren't exposed in zstd - Incorrect weight comparison (weights are allowed to be equal to table log). - HUF_compress1X_usingCTable_internal() can return compressed size >= source size, so the assert that `cSize <= 65535` isn't correct, and it needs to be checked instead. --- lib/common/entropy_common.c | 2 +- lib/common/huf.h | 2 + lib/compress/huf_compress.c | 25 ++++--- tests/fuzz/.gitignore | 1 + tests/fuzz/Makefile | 6 +- tests/fuzz/fuzz.py | 1 + tests/fuzz/huf_round_trip.c | 132 ++++++++++++++++++++++++++++++++++++ 7 files changed, 158 insertions(+), 11 deletions(-) create mode 100644 tests/fuzz/huf_round_trip.c diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 41cd69566..6ba1f2209 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -299,7 +299,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); weightTotal = 0; { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected); rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } } diff --git a/lib/common/huf.h b/lib/common/huf.h index 39e6c2d04..2f2bf4b7f 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -190,6 +190,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); @@ -303,6 +304,7 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); /** HUF_compress1X_repeat() : * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * If it uses hufTable it does not modify hufTable or repeat. diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 7c23c0c75..8529a4b0b 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -965,7 +965,12 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); + return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); } static size_t @@ -986,8 +991,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart, (U16)cSize); op += cSize; } @@ -995,8 +999,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+2, (U16)cSize); op += cSize; } @@ -1004,8 +1007,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); + if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+4, (U16)cSize); op += cSize; } @@ -1014,7 +1016,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, assert(op <= oend); assert(ip <= iend); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); - if (cSize==0) return 0; + if (cSize == 0 || cSize > 65535) return 0; op += cSize; } @@ -1023,7 +1025,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); + return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); } typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore index 93d935a85..02c2f10be 100644 --- a/tests/fuzz/.gitignore +++ b/tests/fuzz/.gitignore @@ -17,6 +17,7 @@ decompress_dstSize_tooSmall fse_read_ncount sequence_compression_api seekable_roundtrip +huf_round_trip fuzz-*.log rt_lib_* d_lib_* diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index ccb574b79..5c54ccd77 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -103,7 +103,8 @@ FUZZ_TARGETS := \ decompress_dstSize_tooSmall \ fse_read_ncount \ sequence_compression_api \ - seekable_roundtrip + seekable_roundtrip \ + huf_round_trip all: libregression.a $(FUZZ_TARGETS) @@ -200,6 +201,9 @@ sequence_compression_api: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequenc seekable_roundtrip: $(FUZZ_HEADERS) $(SEEKABLE_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o $(LIB_FUZZING_ENGINE) -o $@ +huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index d8dfa7782..057a47ecb 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -63,6 +63,7 @@ TARGET_INFO = { 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), + 'huf_round_trip': TargetInfo(InputType.RAW_DATA), } TARGETS = list(TARGET_INFO.keys()) ALL_TARGETS = TARGETS + ['all'] diff --git a/tests/fuzz/huf_round_trip.c b/tests/fuzz/huf_round_trip.c new file mode 100644 index 000000000..0e26ca9b5 --- /dev/null +++ b/tests/fuzz/huf_round_trip.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define HUF_STATIC_LINKING_ONLY + +#include +#include +#include +#include +#include "common/cpu.h" +#include "compress/hist.h" +#include "common/huf.h" +#include "fuzz_helpers.h" +#include "fuzz_data_producer.h" + +static size_t adjustTableLog(size_t tableLog, size_t maxSymbol) +{ + size_t const alphabetSize = maxSymbol + 1; + size_t minTableLog = BIT_highbit32(alphabetSize) + 1; + if ((alphabetSize & (alphabetSize - 1)) != 0) { + ++minTableLog; + } + assert(minTableLog <= 9); + if (tableLog < minTableLog) + return minTableLog; + else + return tableLog; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + /* Select random parameters: #streams, X1 or X2 decoding, bmi2 */ + int const streams = FUZZ_dataProducer_int32Range(producer, 0, 1); + int const symbols = FUZZ_dataProducer_int32Range(producer, 0, 1); + int const bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1); + /* Select a random cBufSize - it may be too small */ + size_t const cBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 4 * size); + /* Select a random tableLog - we'll adjust it up later */ + size_t tableLog = FUZZ_dataProducer_uint32Range(producer, 1, 12); + size_t const kMaxSize = 256 * 1024; + size = FUZZ_dataProducer_remainingBytes(producer); + if (size > kMaxSize) + size = kMaxSize; + + if (size <= 1) { + FUZZ_dataProducer_free(producer); + return 0; + } + + uint32_t maxSymbol = 255; + + U32 count[256]; + size_t const mostFrequent = HIST_count(count, &maxSymbol, src, size); + FUZZ_ZASSERT(mostFrequent); + if (mostFrequent == size) { + /* RLE */ + FUZZ_dataProducer_free(producer); + return 0; + + } + FUZZ_ASSERT(maxSymbol <= 255); + tableLog = adjustTableLog(tableLog, maxSymbol); + + size_t const wkspSize = HUF_WORKSPACE_SIZE; + void* wksp = FUZZ_malloc(wkspSize); + void* rBuf = FUZZ_malloc(size); + void* cBuf = FUZZ_malloc(cBufSize); + HUF_CElt* ct = (HUF_CElt*)FUZZ_malloc(HUF_CTABLE_SIZE(maxSymbol)); + HUF_DTable* dt = (HUF_DTable*)FUZZ_malloc(HUF_DTABLE_SIZE(tableLog) * sizeof(HUF_DTable)); + dt[0] = tableLog * 0x01000001; + + tableLog = HUF_optimalTableLog(tableLog, size, maxSymbol); + FUZZ_ASSERT(tableLog <= 12); + tableLog = HUF_buildCTable_wksp(ct, count, maxSymbol, tableLog, wksp, wkspSize); + FUZZ_ZASSERT(tableLog); + size_t const tableSize = HUF_writeCTable_wksp(cBuf, cBufSize, ct, maxSymbol, tableLog, wksp, wkspSize); + if (ERR_isError(tableSize)) { + /* Errors on uncompressible data or cBufSize too small */ + goto _out; + } + FUZZ_ZASSERT(tableSize); + if (symbols == 0) { + FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2)); + } else { + size_t const ret = HUF_readDTableX2_wksp(dt, cBuf, tableSize, wksp, wkspSize); + if (ERR_getErrorCode(ret) == ZSTD_error_tableLog_tooLarge) { + FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2)); + } else { + FUZZ_ZASSERT(ret); + } + } + + size_t cSize; + size_t rSize; + if (streams == 0) { + cSize = HUF_compress1X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2); + FUZZ_ZASSERT(cSize); + if (cSize != 0) + rSize = HUF_decompress1X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2); + } else { + cSize = HUF_compress4X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2); + FUZZ_ZASSERT(cSize); + if (cSize != 0) + rSize = HUF_decompress4X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2); + } + if (cSize != 0) { + FUZZ_ZASSERT(rSize); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!"); + } +_out: + free(rBuf); + free(cBuf); + free(ct); + free(dt); + free(wksp); + FUZZ_dataProducer_free(producer); + return 0; +} -- 2.47.2