[fuzzer] Add dictionary fuzzers

author Nick Terrell <terrelln@fb.com>

Tue, 9 Apr 2019 04:07:28 +0000 (21:07 -0700)

committer Nick Terrell <terrelln@fb.com>

Tue, 9 Apr 2019 04:07:28 +0000 (21:07 -0700)
author Nick Terrell <terrelln@fb.com>
Tue, 9 Apr 2019 04:07:28 +0000 (21:07 -0700)
committer Nick Terrell <terrelln@fb.com>
Tue, 9 Apr 2019 04:07:28 +0000 (21:07 -0700)
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile

index 8a22ad1c5435b0ea651c8772ebc4d9f9465c432e..12ec9524bfc77b0a281f71ac1a72290f4cb1069b 100644 (file)
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -27,7 +27,7 @@ PRGDIR = ../../programs
  
  FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
         -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
-       $(CPPFLAGS)
+       -DZSTD_MULTITHREAD $(CPPFLAGS)
  FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
         -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
         -Wstrict-prototypes -Wundef \
@@ -36,7 +36,7 @@ FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
         -g -fno-omit-frame-pointer
  FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS)
  FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS)
-FUZZ_LDFLAGS := $(LDFLAGS)
+FUZZ_LDFLAGS := -pthread $(LDFLAGS)
  FUZZ_ARFLAGS := $(ARFLAGS)
  FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS)
  
@@ -46,11 +46,13 @@ FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c
  ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c
  ZSTDCOMP_SRC   := $(ZSTDDIR)/compress/*.c
  ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c
+ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c
  FUZZ_SRC       := \
         $(FUZZ_SRC) \
         $(ZSTDDECOMP_SRC) \
         $(ZSTDCOMMON_SRC) \
-       $(ZSTDCOMP_SRC)
+       $(ZSTDCOMP_SRC) \
+       $(ZSTDDICT_SRC)
  
  FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))
  
@@ -65,7 +67,9 @@ FUZZ_TARGETS :=       \
         block_round_trip  \
         simple_decompress \
         stream_decompress \
-       block_decompress
+       block_decompress  \
+       dictionary_round_trip \
+       dictionary_decompress
  
  all: $(FUZZ_TARGETS)
  
@@ -90,6 +94,12 @@ stream_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) stream_decompress.o
  block_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) block_decompress.o
         $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) block_decompress.o $(LIB_FUZZING_ENGINE) -o $@
  
+dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_round_trip.o
+       $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@
+
+dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_decompress.o
+       $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@
+
  libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
         $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
  
diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c

new file mode 100644 (file)

index 0000000..bdecdef
--- /dev/null
+++ b/tests/fuzz/dictionary_decompress.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target attempts to decompress the fuzzed data with the dictionary
+ * decompression function to ensure the decompressor never crashes. It does not
+ * fuzz the dictionary.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+
+static ZSTD_DCtx *dctx = NULL;
+static void* rBuf = NULL;
+static size_t bufSize = 0;
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    size_t neededBufSize;
+
+    uint32_t seed = FUZZ_seed(&src, &size);
+    neededBufSize = MAX(20 * size, (size_t)256 << 10);
+
+    /* Allocate all buffers and contexts if not already allocated */
+    if (neededBufSize > bufSize) {
+        free(rBuf);
+        rBuf = malloc(neededBufSize);
+        bufSize = neededBufSize;
+        FUZZ_ASSERT(rBuf);
+    }
+    if (!dctx) {
+        dctx = ZSTD_createDCtx();
+        FUZZ_ASSERT(dctx);
+    }
+    {
+        FUZZ_dict_t dict = FUZZ_train(src, size, &seed);
+        ZSTD_decompress_usingDict(dctx,
+                rBuf, neededBufSize,
+                src, size,
+                dict.buff, dict.size);
+        free(dict.buff);
+    }
+
+#ifndef STATEFUL_FUZZING
+    ZSTD_freeDCtx(dctx); dctx = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c

new file mode 100644 (file)

index 0000000..57a2eff
--- /dev/null
+++ b/tests/fuzz/dictionary_round_trip.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target performs a zstd round-trip test (compress & decompress) with
+ * a dictionary, compares the result with the original, and calls abort() on
+ * corruption.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+
+static const int kMaxClevel = 19;
+
+static ZSTD_CCtx *cctx = NULL;
+static ZSTD_DCtx *dctx = NULL;
+static void* cBuf = NULL;
+static void* rBuf = NULL;
+static size_t bufSize = 0;
+static uint32_t seed;
+
+static size_t roundTripTest(void *result, size_t resultCapacity,
+                            void *compressed, size_t compressedCapacity,
+                            const void *src, size_t srcSize)
+{
+    FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed);
+    size_t cSize;
+    if ((FUZZ_rand(&seed) & 15) == 0) {
+        int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
+
+        cSize = ZSTD_compress_usingDict(cctx,
+                compressed, compressedCapacity,
+                src, srcSize,
+                dict.buff, dict.size,
+                cLevel);
+    } else {
+        FUZZ_setRandomParameters(cctx, srcSize, &seed);
+        FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict.buff, dict.size));
+        cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
+    }
+    FUZZ_ZASSERT(cSize);
+    {
+        size_t const ret = ZSTD_decompress_usingDict(dctx,
+                result, resultCapacity,
+                compressed, cSize,
+                dict.buff, dict.size);
+        free(dict.buff);
+        return ret;
+    }
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    size_t neededBufSize;
+
+    seed = FUZZ_seed(&src, &size);
+    neededBufSize = ZSTD_compressBound(size);
+
+    /* Allocate all buffers and contexts if not already allocated */
+    if (neededBufSize > bufSize) {
+        free(cBuf);
+        free(rBuf);
+        cBuf = malloc(neededBufSize);
+        rBuf = malloc(neededBufSize);
+        bufSize = neededBufSize;
+        FUZZ_ASSERT(cBuf && rBuf);
+    }
+    if (!cctx) {
+        cctx = ZSTD_createCCtx();
+        FUZZ_ASSERT(cctx);
+    }
+    if (!dctx) {
+        dctx = ZSTD_createDCtx();
+        FUZZ_ASSERT(dctx);
+    }
+
+    {
+        size_t const result =
+            roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size);
+        FUZZ_ZASSERT(result);
+        FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
+        FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
+    }
+#ifndef STATEFUL_FUZZING
+    ZSTD_freeCCtx(cctx); cctx = NULL;
+    ZSTD_freeDCtx(dctx); dctx = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py

index 69376298518e960aecbf784d3a6e37435ea495ed..ee27015a53579bf1c4fdf4da37665a3ff8b9efe0 100755 (executable)
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -34,6 +34,8 @@ TARGETS = [
      'simple_decompress',
      'stream_decompress',
      'block_decompress',
+    'dictionary_round_trip',
+    'dictionary_decompress',
  ]
  ALL_TARGETS = TARGETS + ['all']
  FUZZ_RNG_SEED_SIZE = 4
diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c

index 10163e1512b0dcea4c1a5526dc69cccfc73d44e2..0e64400e6bc54b80321cc2f926bcc1e1da9c19eb 100644 (file)
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -8,10 +8,14 @@
   */
  
  #define ZSTD_STATIC_LINKING_ONLY
+#define ZDICT_STATIC_LINKING_ONLY
+
+#include <string.h>
  
  #include "zstd_helpers.h"
  #include "fuzz_helpers.h"
  #include "zstd.h"
+#include "zdict.h"
  
  static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value)
  {
@@ -71,7 +75,6 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
      setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, state);
      setRand(cctx, ZSTD_c_checksumFlag, 0, 1, state);
      setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, state);
-    setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
      /* Select long distance matchig parameters */
      setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, state);
      setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state);
@@ -81,4 +84,54 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
              state);
      setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN,
              ZSTD_LDM_HASHRATELOG_MAX, state);
+    /* Set misc parameters */
+    setRand(cctx, ZSTD_c_nbWorkers, 0, 2, state);
+    setRand(cctx, ZSTD_c_rsyncable, 0, 1, state);
+    setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state);
+    setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state);
+    setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
+}
+
+FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)
+{
+    size_t const dictSize = MAX(srcSize / 8, 1024);
+    size_t const totalSampleSize = dictSize * 11;
+    FUZZ_dict_t dict = { malloc(dictSize), dictSize };
+    char* const samples = (char*)malloc(totalSampleSize);
+    unsigned nbSamples = 100;
+    size_t* const samplesSizes = (size_t*)malloc(sizeof(size_t) * nbSamples);
+    size_t pos = 0;
+    size_t sample = 0;
+    ZDICT_fastCover_params_t params;
+    FUZZ_ASSERT(dict.buff && samples && samplesSizes);
+
+    for (sample = 0; sample < nbSamples; ++sample) {
+      size_t const remaining = totalSampleSize - pos;
+      size_t const offset = FUZZ_rand32(state, 0, MAX(srcSize, 1) - 1);
+      size_t const limit = MIN(srcSize - offset, remaining);
+      size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
+      memcpy(samples + pos, src + offset, toCopy);
+      pos += toCopy;
+      samplesSizes[sample] = toCopy;
+
+    }
+    memset(samples + pos, 0, totalSampleSize - pos);
+
+    memset(&params, 0, sizeof(params));
+    params.accel = 5;
+    params.k = 40;
+    params.d = 8;
+    params.f = 14;
+    params.zParams.compressionLevel = 1;
+    dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize,
+        samples, samplesSizes, nbSamples, params);
+    if (ZSTD_isError(dict.size)) {
+        free(dict.buff);
+        memset(&dict, 0, sizeof(dict));
+    }
+
+    free(samplesSizes);
+    free(samples);
+
+    return dict;
  }
diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h

index 3856bebecf7e041b828aa00761c7830b074e0b43..457e6e995f0d0a7c8c227ad2826f46eeb632b2e2 100644 (file)
--- a/tests/fuzz/zstd_helpers.h
+++ b/tests/fuzz/zstd_helpers.h
@@ -14,6 +14,8 @@
  #ifndef ZSTD_HELPERS_H
  #define ZSTD_HELPERS_H
  
+#define ZSTD_STATIC_LINKING_ONLY
+
  #include "zstd.h"
  #include <stdint.h>
  
@@ -27,6 +29,17 @@ ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state);
  ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state);
  ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state);
  
+typedef struct {
+  void* buff;
+  size_t size;
+} FUZZ_dict_t;
+
+/* Quickly train a dictionary from a source for fuzzing.
+ * NOTE: Don't use this to train production dictionaries, it is only optimized
+ * for speed, and doesn't care about dictionary quality.
+ */
+FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state);
+
  
  #ifdef __cplusplus
  }
author	Nick Terrell <terrelln@fb.com>
	Tue, 9 Apr 2019 04:07:28 +0000 (21:07 -0700)
committer	Nick Terrell <terrelln@fb.com>
	Tue, 9 Apr 2019 04:07:28 +0000 (21:07 -0700)
tests/fuzz/Makefile		patch \| blob \| blame \| history
tests/fuzz/dictionary_decompress.c	[new file with mode: 0644]	patch \| blob
tests/fuzz/dictionary_round_trip.c	[new file with mode: 0644]	patch \| blob
tests/fuzz/fuzz.py		patch \| blob \| blame \| history
tests/fuzz/zstd_helpers.c		patch \| blob \| blame \| history
tests/fuzz/zstd_helpers.h		patch \| blob \| blame \| history