$(MAKE) -C $(ZSTDDIR) libzstd
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT)
-ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
+ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c seqgen.c zstreamtest.c
zstreamtest : CPPFLAGS += $(MULTITHREAD_CPP)
zstreamtest : LDFLAGS += $(MULTITHREAD_LD)
zstreamtest : $(ZSTREAMFILES)
--- /dev/null
+/*
+ * Copyright (c) 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "seqgen.h"
+#include "mem.h"
+#include <string.h>
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+static const size_t kMatchBytes = 128;
+
+#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static BYTE SEQ_randByte(U32* src)
+{
+ static const U32 prime1 = 2654435761U;
+ static const U32 prime2 = 2246822519U;
+ U32 rand32 = *src;
+ rand32 *= prime1;
+ rand32 ^= prime2;
+ rand32 = SEQ_rotl32(rand32, 13);
+ *src = rand32;
+ return (BYTE)(rand32 >> 5);
+}
+
+SEQ_stream SEQ_initStream(unsigned seed)
+{
+ SEQ_stream stream;
+ stream.state = 0;
+ XXH64_reset(&stream.xxh, 0);
+ stream.seed = seed;
+ return stream;
+}
+
+/* Generates a single guard byte, then match length + 1 of a different byte,
+ * then another guard byte.
+ */
+static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
+ SEQ_outBuffer* out)
+{
+ typedef enum {
+ ml_first_byte = 0,
+ ml_match_bytes,
+ ml_last_byte,
+ } ml_state;
+ BYTE* const ostart = (BYTE*)out->dst;
+ BYTE* const oend = ostart + out->size;
+ BYTE* op = ostart + out->pos;
+
+ switch ((ml_state)stream->state) {
+ case ml_first_byte:
+ /* Generate a single byte and pick a different byte for the match */
+ if (op >= oend) {
+ stream->bytesLeft = 1;
+ break;
+ }
+ *op = SEQ_randByte(&stream->seed) & 0xFF;
+ do {
+ stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
+ } while (*op == stream->saved);
+ ++op;
+ /* State transition */
+ stream->state = ml_match_bytes;
+ stream->bytesLeft = value + 1;
+ /* fall-through */
+ case ml_match_bytes: {
+ /* Copy matchLength + 1 bytes to the output buffer */
+ size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
+ if (setLength > 0) {
+ memset(op, stream->saved, setLength);
+ op += setLength;
+ stream->bytesLeft -= setLength;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = ml_last_byte;
+ }
+ /* fall-through */
+ case ml_last_byte:
+ /* Generate a single byte and pick a different byte for the match */
+ if (op >= oend) {
+ stream->bytesLeft = 1;
+ break;
+ }
+ do {
+ *op = SEQ_randByte(&stream->seed) & 0xFF;
+ } while (*op == stream->saved);
+ ++op;
+ /* State transition */
+ /* fall-through */
+ default:
+ stream->state = 0;
+ stream->bytesLeft = 0;
+ break;
+ }
+ XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
+ out->pos = op - ostart;
+ return stream->bytesLeft;
+}
+
+/* Saves the current seed then generates kMatchBytes random bytes >= 128.
+ * Generates literal length - kMatchBytes random bytes < 128.
+ * Generates another kMatchBytes using the saved seed to generate a match.
+ * This way the match is easy to find for the compressors.
+ */
+static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
+{
+ typedef enum {
+ ll_start = 0,
+ ll_run_bytes,
+ ll_literals,
+ ll_run_match,
+ } ll_state;
+ BYTE* const ostart = (BYTE*)out->dst;
+ BYTE* const oend = ostart + out->size;
+ BYTE* op = ostart + out->pos;
+
+ switch ((ll_state)stream->state) {
+ case ll_start:
+ stream->state = ll_run_bytes;
+ stream->saved = stream->seed;
+ stream->bytesLeft = MIN(kMatchBytes, value);
+ /* fall-through */
+ case ll_run_bytes:
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->seed) | 0x80;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = ll_literals;
+ stream->bytesLeft = value - MIN(kMatchBytes, value);
+ /* fall-through */
+ case ll_literals:
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->seed) & 0x7F;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = ll_run_match;
+ stream->bytesLeft = MIN(kMatchBytes, value);
+ /* fall-through */
+ case ll_run_match: {
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->saved) | 0x80;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ }
+ /* fall-through */
+ default:
+ stream->state = 0;
+ stream->bytesLeft = 0;
+ break;
+ }
+ XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
+ out->pos = op - ostart;
+ return stream->bytesLeft;
+}
+
+/* Saves the current seed then generates kMatchBytes random bytes >= 128.
+ * Generates offset - kMatchBytes of zeros to get a large offset without
+ * polluting the hash tables.
+ * Generates another kMatchBytes using the saved seed to generate a with the
+ * required offset.
+ */
+static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
+{
+ typedef enum {
+ of_start = 0,
+ of_run_bytes,
+ of_offset,
+ of_run_match,
+ } of_state;
+ BYTE* const ostart = (BYTE*)out->dst;
+ BYTE* const oend = ostart + out->size;
+ BYTE* op = ostart + out->pos;
+
+ switch ((of_state)stream->state) {
+ case of_start:
+ stream->state = of_run_bytes;
+ stream->saved = stream->seed;
+ stream->bytesLeft = MIN(value, kMatchBytes);
+ /* fall-through */
+ case of_run_bytes: {
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->seed) | 0x80;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = of_offset;
+ stream->bytesLeft = value - MIN(value, kMatchBytes);
+ }
+ /* fall-through */
+ case of_offset: {
+ /* Copy matchLength + 1 bytes to the output buffer */
+ size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
+ if (setLength > 0) {
+ memset(op, 0, setLength);
+ op += setLength;
+ stream->bytesLeft -= setLength;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ /* State transition */
+ stream->state = of_run_match;
+ stream->bytesLeft = MIN(value, kMatchBytes);
+ }
+ /* fall-through */
+ case of_run_match: {
+ while (stream->bytesLeft > 0 && op < oend) {
+ *op++ = SEQ_randByte(&stream->saved) | 0x80;
+ --stream->bytesLeft;
+ }
+ if (stream->bytesLeft > 0)
+ break;
+ }
+ /* fall-through */
+ default:
+ stream->state = 0;
+ stream->bytesLeft = 0;
+ break;
+ }
+ XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
+ out->pos = op - ostart;
+ return stream->bytesLeft;
+}
+
+/* Returns the number of bytes left to generate.
+ * Must pass the same type/value until it returns 0.
+ */
+size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
+{
+ switch (type) {
+ case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
+ case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
+ case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
+ case SEQ_gen_max: /* fall-through */
+ default: return 0;
+ }
+}
+
+/* Returns the xxhash of the data produced so far */
+XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
+{
+ return XXH64_digest(&stream->xxh);
+}
--- /dev/null
+/*
+ * Copyright (c) 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef SEQGEN_H
+#define SEQGEN_H
+
+#define XXH_STATIC_LINKING_ONLY
+
+#include "xxhash.h"
+#include <stddef.h> /* size_t */
+
+typedef enum {
+ SEQ_gen_ml = 0,
+ SEQ_gen_ll,
+ SEQ_gen_of,
+ SEQ_gen_max /* Must be the last value */
+} SEQ_gen_type;
+
+/* Internal state, do not use */
+typedef struct {
+ XXH64_state_t xxh; /* xxh state for all the data produced so far (seed=0) */
+ unsigned seed;
+ int state; /* enum to control state machine (clean=0) */
+ unsigned saved;
+ size_t bytesLeft;
+} SEQ_stream;
+
+SEQ_stream SEQ_initStream(unsigned seed);
+
+typedef struct {
+ void* dst;
+ size_t size;
+ size_t pos;
+} SEQ_outBuffer;
+
+/* Returns non-zero until the current type/value has been generated.
+ * Must pass the same type/value until it returns 0.
+ *
+ * Recommended to pick a value in the middle of the range you want, since there
+ * may be some noise that causes actual results to be slightly different.
+ * We try to be more accurate for smaller values.
+ *
+ * NOTE: Very small values don't work well (< 6).
+ */
+size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value,
+ SEQ_outBuffer* out);
+
+/* Returns the xxhash of the data produced so far */
+XXH64_hash_t SEQ_digest(SEQ_stream const* stream);
+
+#endif /* SEQGEN_H */
#include "datagen.h" /* RDG_genBuffer */
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#include "xxhash.h" /* XXH64_* */
+#include "seqgen.h"
/*-************************************
return rand32 >> 5;
}
-#define CHECK_Z(f) { \
- size_t const err = f; \
- if (ZSTD_isError(err)) { \
- DISPLAY("Error => %s : %s ", \
- #f, ZSTD_getErrorName(err)); \
- DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \
+#define CHECK(cond, ...) { \
+ if (cond) { \
+ DISPLAY("Error => "); \
+ DISPLAY(__VA_ARGS__); \
+ DISPLAY(" (seed %u, test nb %u, line %u) \n", \
+ seed, testNb, __LINE__); \
goto _output_error; \
} }
+#define CHECK_Z(f) { \
+ size_t const err = f; \
+ CHECK(ZSTD_isError(err), "%s : %s ", \
+ #f, ZSTD_getErrorName(err)); \
+}
+
/*======================================================
* Basic Unit tests
free(dict.start);
}
+/* Round trips data and updates xxh with the decompressed data produced */
+static size_t SEQ_roundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
+ XXH64_state_t* xxh, void* data, size_t size,
+ ZSTD_EndDirective endOp)
+{
+ static BYTE compressed[1024];
+ static BYTE uncompressed[1024];
+
+ ZSTD_inBuffer cin = {data, size, 0};
+ size_t cret;
+
+ do {
+ ZSTD_outBuffer cout = {compressed, sizeof(compressed), 0};
+ ZSTD_inBuffer din = {compressed, 0, 0};
+ ZSTD_outBuffer dout = {uncompressed, 0, 0};
+
+ cret = ZSTD_compress_generic(cctx, &cout, &cin, endOp);
+ if (ZSTD_isError(cret))
+ return cret;
+
+ din.size = cout.pos;
+ while (din.pos < din.size || (endOp == ZSTD_e_end && cret == 0)) {
+ size_t dret;
+
+ dout.pos = 0;
+ dout.size = sizeof(uncompressed);
+ dret = ZSTD_decompressStream(dctx, &dout, &din);
+ if (ZSTD_isError(dret))
+ return dret;
+ XXH64_update(xxh, dout.dst, dout.pos);
+ if (dret == 0)
+ break;
+ }
+ } while (cin.pos < cin.size || (endOp != ZSTD_e_continue && cret != 0));
+ return 0;
+}
+
+/* Generates some data and round trips it */
+static size_t SEQ_generateRoundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
+ XXH64_state_t* xxh, SEQ_stream* seq,
+ SEQ_gen_type type, unsigned value)
+{
+ static BYTE data[1024];
+ size_t gen;
+
+ do {
+ SEQ_outBuffer sout = {data, sizeof(data), 0};
+ size_t ret;
+ gen = SEQ_gen(seq, type, value, &sout);
+
+ ret = SEQ_roundTrip(cctx, dctx, xxh, sout.dst, sout.pos, ZSTD_e_continue);
+ if (ZSTD_isError(ret))
+ return ret;
+ } while (gen != 0);
+
+ return 0;
+}
static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem)
{
if (r != 0) goto _output_error; } /* error, or some data not flushed */
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : check dictionary FSE tables can represent every code : ", testNb++);
+ { unsigned const kMaxWindowLog = 24;
+ unsigned value;
+ ZSTD_compressionParameters cParams = ZSTD_getCParams(3, 1U << kMaxWindowLog, 1024);
+ ZSTD_CDict* cdict;
+ ZSTD_DDict* ddict;
+ SEQ_stream seq = SEQ_initStream(0x87654321);
+ SEQ_gen_type type;
+ XXH64_state_t xxh;
+
+ XXH64_reset(&xxh, 0);
+ cParams.windowLog = kMaxWindowLog;
+ cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, ZSTD_dlm_byRef, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem);
+ ddict = ZSTD_createDDict(dictionary.start, dictionary.filled);
+
+ if (!cdict || !ddict) goto _output_error;
+
+ ZSTD_CCtx_reset(zc);
+ ZSTD_resetDStream(zd);
+ CHECK_Z(ZSTD_CCtx_refCDict(zc, cdict));
+ CHECK_Z(ZSTD_initDStream_usingDDict(zd, ddict));
+ CHECK_Z(ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1U << kMaxWindowLog));
+ /* Test all values < 300 */
+ for (value = 0; value < 300; ++value) {
+ for (type = (SEQ_gen_type)0; type < SEQ_gen_max; ++type) {
+ CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value));
+ }
+ }
+ /* Test values 2^8 to 2^17 */
+ for (value = (1 << 8); value < (1 << 17); value <<= 1) {
+ for (type = (SEQ_gen_type)0; type < SEQ_gen_max; ++type) {
+ CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value));
+ CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, type, value + (value >> 2)));
+ }
+ }
+ /* Test offset values up to the max window log */
+ for (value = 8; value <= kMaxWindowLog; ++value) {
+ CHECK_Z(SEQ_generateRoundTrip(zc, zd, &xxh, &seq, SEQ_gen_of, (1U << value) - 1));
+ }
+
+ CHECK_Z(SEQ_roundTrip(zc, zd, &xxh, NULL, 0, ZSTD_e_end));
+ CHECK(SEQ_digest(&seq) != XXH64_digest(&xxh), "SEQ XXH64 does not match");
+
+ ZSTD_freeCDict(cdict);
+ ZSTD_freeDDict(ddict);
+ }
+ DISPLAYLEVEL(3, "OK \n");
/* Overlen overwriting window data bug */
DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++);
return (U32)((FUZ_rand(seed) % mod) + minVal);
}
-#define CHECK(cond, ...) { \
- if (cond) { \
- DISPLAY("Error => "); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \
- goto _output_error; \
-} }
-
static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility, int bigTests)
{
U32 const maxSrcLog = bigTests ? 24 : 22;