$(MAKE) -C contrib/seekable_format/examples all
$(MAKE) -C contrib/seekable_format/tests test
$(MAKE) -C contrib/largeNbDicts all
+ $(MAKE) -C contrib/externalMatchfinder all
cd build/single_file_libs/ ; ./build_decoder_test.sh
cd build/single_file_libs/ ; ./build_library_test.sh
$(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
$(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID)
$(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
+ $(Q)$(MAKE) -C contrib/externalMatchfinder $@ > $(VOID)
$(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
$(Q)$(RM) -r lz4
@echo Cleaning completed
#
# zstreamtest
#
-add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c)
+add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c ${TESTS_DIR}/external_matchfinder.c)
if (NOT MSVC)
target_compile_options(zstreamtest PRIVATE "-Wno-deprecated-declarations")
endif()
dependencies: [ testcommon_dep, thread_dep ],
install: false)
-zstreamtest_sources = [join_paths(zstd_rootdir, 'tests/seqgen.c'),
- join_paths(zstd_rootdir, 'tests/zstreamtest.c')]
+zstreamtest_sources = [
+ join_paths(zstd_rootdir, 'tests/seqgen.c'),
+ join_paths(zstd_rootdir, 'tests/zstreamtest.c'),
+ join_paths(zstd_rootdir, 'tests/external_matchfinder.c')]
zstreamtest = executable('zstreamtest',
zstreamtest_sources,
include_directories: test_includes,
--- /dev/null
+# build artifacts
+externalMatchfinder
--- /dev/null
+# ################################################################
+# Copyright (c) Yann Collet, Meta Platforms, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+PROGDIR = ../../programs
+LIBDIR = ../../lib
+
+LIBZSTD = $(LIBDIR)/libzstd.a
+
+CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/compress -I$(LIBDIR)/common
+
+CFLAGS ?= -O3
+CFLAGS += -std=gnu99
+DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+ -Wstrict-aliasing=1 -Wswitch-enum \
+ -Wstrict-prototypes -Wundef -Wpointer-arith \
+ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
+ -Wredundant-decls
+CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
+
+default: externalMatchfinder
+
+all: externalMatchfinder
+
+externalMatchfinder: matchfinder.c main.c $(LIBZSTD)
+ $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+.PHONY: $(LIBZSTD)
+$(LIBZSTD):
+ $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
+
+clean:
+ $(RM) *.o
+ $(MAKE) -C $(LIBDIR) clean > /dev/null
+ $(RM) externalMatchfinder
--- /dev/null
+externalMatchfinder
+=====================
+
+`externalMatchfinder` is a test tool for the external matchfinder API.
+It demonstrates how to use the API to perform a simple round-trip test.
+
+A sample matchfinder is provided in matchfinder.c, but the user can swap
+this out with a different one if desired. The sample matchfinder implements
+LZ compression with a 1KB hashtable. Dictionary compression is not currently supported.
+
+Command line :
+```
+externalMatchfinder filename
+```
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+#include "zstd_errors.h"
+#include "matchfinder.h" // simpleExternalMatchFinder
+
+#define CHECK(res) \
+do { \
+ if (ZSTD_isError(res)) { \
+ printf("ERROR: %s\n", ZSTD_getErrorName(res)); \
+ return 1; \
+ } \
+} while (0) \
+
+int main(int argc, char *argv[]) {
+ if (argc != 2) {
+ printf("Usage: exampleMatchfinder <file>\n");
+ return 1;
+ }
+
+ ZSTD_CCtx* const zc = ZSTD_createCCtx();
+
+ int simpleExternalMatchState = 0xdeadbeef;
+
+ // Here is the crucial bit of code!
+ ZSTD_registerExternalMatchFinder(
+ zc,
+ &simpleExternalMatchState,
+ simpleExternalMatchFinder
+ );
+
+ {
+ size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1);
+ CHECK(res);
+ }
+
+ FILE *f = fopen(argv[1], "rb");
+ assert(f);
+ {
+ int const ret = fseek(f, 0, SEEK_END);
+ assert(ret == 0);
+ }
+ size_t const srcSize = ftell(f);
+ {
+ int const ret = fseek(f, 0, SEEK_SET);
+ assert(ret == 0);
+ }
+
+ char* const src = malloc(srcSize + 1);
+ assert(src);
+ {
+ size_t const ret = fread(src, srcSize, 1, f);
+ assert(ret == 1);
+ int const ret2 = fclose(f);
+ assert(ret2 == 0);
+ }
+
+ size_t const dstSize = ZSTD_compressBound(srcSize);
+ char* const dst = malloc(dstSize);
+ assert(dst);
+
+ size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize);
+ CHECK(cSize);
+
+ char* const val = malloc(srcSize);
+ assert(val);
+
+ {
+ size_t const res = ZSTD_decompress(val, srcSize, dst, cSize);
+ CHECK(res);
+ }
+
+ if (memcmp(src, val, srcSize) == 0) {
+ printf("Compression and decompression were successful!\n");
+ printf("Original size: %lu\n", srcSize);
+ printf("Compressed size: %lu\n", cSize);
+ } else {
+ printf("ERROR: input and validation buffers don't match!\n");
+ for (size_t i = 0; i < srcSize; i++) {
+ if (src[i] != val[i]) {
+ printf("First bad index: %zu\n", i);
+ break;
+ }
+ }
+ return 1;
+ }
+
+ ZSTD_freeCCtx(zc);
+ free(src);
+ free(dst);
+ free(val);
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "matchfinder.h"
+
+#define HSIZE 1024
+static U32 const HLOG = 10;
+static U32 const MLS = 4;
+static U32 const BADIDX = 0xffffffff;
+
+size_t simpleExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+) {
+ const BYTE* const istart = (const BYTE*)src;
+ const BYTE* const iend = istart + srcSize;
+ const BYTE* ip = istart;
+ const BYTE* anchor = istart;
+ size_t seqCount = 0;
+ U32 hashTable[HSIZE];
+
+ (void)externalMatchState;
+ (void)dict;
+ (void)dictSize;
+ (void)outSeqsCapacity;
+ (void)compressionLevel;
+
+ { int i;
+ for (i=0; i < HSIZE; i++) {
+ hashTable[i] = BADIDX;
+ } }
+
+ while (ip + MLS < iend) {
+ size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
+ U32 const matchIndex = hashTable[hash];
+ hashTable[hash] = (U32)(ip - istart);
+
+ if (matchIndex != BADIDX) {
+ const BYTE* const match = istart + matchIndex;
+ U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
+ if (matchLen >= ZSTD_MINMATCH_MIN) {
+ U32 const litLen = (U32)(ip - anchor);
+ U32 const offset = (U32)(ip - match);
+ ZSTD_Sequence const seq = {
+ offset, litLen, matchLen, 0
+ };
+
+ /* Note: it's crucial to stay within the window size! */
+ if (offset <= windowSize) {
+ outSeqs[seqCount++] = seq;
+ ip += matchLen;
+ anchor = ip;
+ continue;
+ }
+ }
+ }
+
+ ip++;
+ }
+
+ { ZSTD_Sequence const finalSeq = {
+ 0, (U32)(iend - anchor), 0, 0
+ };
+ outSeqs[seqCount++] = finalSeq;
+ }
+
+ return seqCount;
+}
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MATCHFINDER_H
+#define MATCHFINDER_H
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+
+size_t simpleExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+);
+
+#endif
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
case PREFIX(parameter_unsupported): return "Unsupported parameter";
+ case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
+ case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code";
case PREFIX(maxCode):
default: return notErrorCode;
}
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
}
+/* Enables validation for external sequences in debug builds. */
+static int ZSTD_resolveExternalSequenceValidation(int mode) {
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2)
+ (void)mode;
+ return 1;
+#else
+ return mode;
+#endif
+}
+
/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
}
cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
+ cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
assert(!ZSTD_checkCParams(cParams));
return cctxParams;
}
cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
+ cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
}
bounds.upperBound = (int)ZSTD_ps_disable;
return bounds;
+ case ZSTD_c_enableMatchFinderFallback:
+ bounds.lowerBound = 0;
+ bounds.upperBound = 1;
+ return bounds;
+
default:
bounds.error = ERROR(parameter_unsupported);
return bounds;
case ZSTD_c_useRowMatchFinder:
case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
+ case ZSTD_c_enableMatchFinderFallback:
default:
return 0;
}
case ZSTD_c_useRowMatchFinder:
case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
+ case ZSTD_c_enableMatchFinderFallback:
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
return CCtxParams->prefetchCDictTables;
+ case ZSTD_c_enableMatchFinderFallback:
+ BOUNDCHECK(ZSTD_c_enableMatchFinderFallback, value);
+ CCtxParams->enableMatchFinderFallback = value;
+ return CCtxParams->enableMatchFinderFallback;
+
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
case ZSTD_c_prefetchCDictTables:
*value = (int)CCtxParams->prefetchCDictTables;
break;
+ case ZSTD_c_enableMatchFinderFallback:
+ *value = CCtxParams->enableMatchFinderFallback;
+ break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't reset parameters only when not in init stage.");
ZSTD_clearAllDicts(cctx);
+ ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
}
return 0;
return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
}
+/* Helper function for calculating memory requirements.
+ * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
+static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useExternalMatchFinder) {
+ U32 const divider = (minMatch==3 || useExternalMatchFinder) ? 3 : 4;
+ return blockSize / divider;
+}
+
static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
const ZSTD_compressionParameters* cParams,
const ldmParams_t* ldmParams,
const ZSTD_paramSwitch_e useRowMatchFinder,
const size_t buffInSize,
const size_t buffOutSize,
- const U64 pledgedSrcSize)
+ const U64 pledgedSrcSize,
+ int useExternalMatchFinder)
{
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
- U32 const divider = (cParams->minMatch==3) ? 3 : 4;
- size_t const maxNbSeq = blockSize / divider;
+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder);
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
+ size_t const externalSeqSpace = useExternalMatchFinder
+ ? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
+ : 0;
+
size_t const neededSpace =
cctxSpace +
entropySpace +
ldmSeqSpace +
matchStateSize +
tokenSpace +
- bufferSpace;
+ bufferSpace +
+ externalSeqSpace;
DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
return neededSpace;
* be needed. However, we still allocate two 0-sized buffers, which can
* take space under ASAN. */
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
- &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+ &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder);
}
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
- ZSTD_CONTENTSIZE_UNKNOWN);
+ ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder);
}
}
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
- U32 const divider = (params->cParams.minMatch==3) ? 3 : 4;
- size_t const maxNbSeq = blockSize / divider;
+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useExternalMatchFinder);
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1
: 0;
size_t const neededSpace =
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
- buffInSize, buffOutSize, pledgedSrcSize);
+ buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder);
int resizeWorkspace;
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
zc->ldmState.loadedDictEnd = 0;
}
+ /* reserve space for block-level external sequences */
+ if (params->useExternalMatchFinder) {
+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
+ zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
+ zc->externalMatchCtx.seqBuffer =
+ (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
+ }
+
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
ssPtr->longLengthType = ZSTD_llt_none;
}
+/* ZSTD_postProcessExternalMatchFinderResult() :
+ * Validates and post-processes sequences obtained through the external matchfinder API:
+ * - Checks whether nbExternalSeqs represents an error condition.
+ * - Appends a block delimiter to outSeqs if one is not already present.
+ * See zstd.h for context regarding block delimiters.
+ * Returns the number of sequences after post-processing, or an error code. */
+static size_t ZSTD_postProcessExternalMatchFinderResult(
+ ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
+) {
+ RETURN_ERROR_IF(
+ nbExternalSeqs > outSeqsCapacity,
+ externalMatchFinder_failed,
+ "External matchfinder returned error code %lu",
+ (unsigned long)nbExternalSeqs
+ );
+
+ RETURN_ERROR_IF(
+ nbExternalSeqs == 0 && srcSize > 0,
+ externalMatchFinder_failed,
+ "External matchfinder produced zero sequences for a non-empty src buffer!"
+ );
+
+ if (srcSize == 0) {
+ ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence));
+ return 1;
+ }
+
+ {
+ ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1];
+
+ /* We can return early if lastSeq is already a block delimiter. */
+ if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {
+ return nbExternalSeqs;
+ }
+
+ /* This error condition is only possible if the external matchfinder
+ * produced an invalid parse, by definition of ZSTD_sequenceBound(). */
+ RETURN_ERROR_IF(
+ nbExternalSeqs == outSeqsCapacity,
+ externalMatchFinder_failed,
+ "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
+ );
+
+ /* lastSeq is not a block delimiter, so we need to append one. */
+ ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence));
+ return nbExternalSeqs + 1;
+ }
+}
+
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
}
if (zc->externSeqStore.pos < zc->externSeqStore.size) {
assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
+
+ /* External matchfinder + LDM is technically possible, just not implemented yet.
+ * We need to revisit soon and implement it. */
+ RETURN_ERROR_IF(
+ zc->appliedParams.useExternalMatchFinder,
+ parameter_combination_unsupported,
+ "Long-distance matching with external matchfinder enabled is not currently supported."
+ );
+
/* Updates ldmSeqStore.pos */
lastLLSize =
ZSTD_ldm_blockCompress(&zc->externSeqStore,
} else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
+ /* External matchfinder + LDM is technically possible, just not implemented yet.
+ * We need to revisit soon and implement it. */
+ RETURN_ERROR_IF(
+ zc->appliedParams.useExternalMatchFinder,
+ parameter_combination_unsupported,
+ "Long-distance matching with external matchfinder enabled is not currently supported."
+ );
+
ldmSeqStore.seq = zc->ldmSequences;
ldmSeqStore.capacity = zc->maxNbLdmSequences;
/* Updates ldmSeqStore.size */
zc->appliedParams.useRowMatchFinder,
src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size);
- } else { /* not long range mode */
+ } else if (zc->appliedParams.useExternalMatchFinder) {
+ assert(
+ zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
+ );
+ assert(zc->externalMatchCtx.mFinder != NULL);
+
+ { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
+
+ size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
+ zc->externalMatchCtx.mState,
+ zc->externalMatchCtx.seqBuffer,
+ zc->externalMatchCtx.seqBufferCapacity,
+ src, srcSize,
+ NULL, 0, /* dict and dictSize, currently not supported */
+ zc->appliedParams.compressionLevel,
+ windowSize
+ );
+
+ size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult(
+ zc->externalMatchCtx.seqBuffer,
+ nbExternalSeqs,
+ zc->externalMatchCtx.seqBufferCapacity,
+ srcSize
+ );
+
+ /* Return early if there is no error, since we don't need to worry about last literals */
+ if (!ZSTD_isError(nbPostProcessedSeqs)) {
+ ZSTD_sequencePosition seqPos = {0,0,0};
+ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
+ zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize
+ );
+ ms->ldmSeqStore = NULL;
+ DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs);
+ return ZSTDbss_compress;
+ }
+
+ /* Propagate the error if fallback is disabled */
+ if (!zc->appliedParams.enableMatchFinderFallback) {
+ return nbPostProcessedSeqs;
+ }
+
+ /* Fallback to software matchfinder */
+ { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
+ zc->appliedParams.useRowMatchFinder,
+ dictMode);
+ ms->ldmSeqStore = NULL;
+ DEBUGLOG(
+ 5,
+ "External matchfinder returned error code %lu. Falling back to internal matchfinder.",
+ (unsigned long)nbExternalSeqs
+ );
+ lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
+ } }
+ } else { /* not long range mode and no external matchfinder */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
zc->appliedParams.useRowMatchFinder,
dictMode);
+ assert(zc->externalMatchCtx.mFinder == NULL);
ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
}
params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);
params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
+ params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
#ifdef ZSTD_MULTITHREAD
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
}
}
-typedef struct {
- U32 idx; /* Index in array of ZSTD_Sequence */
- U32 posInSequence; /* Position within sequence at idx */
- size_t posInSrc; /* Number of bytes given by sequences provided so far */
-} ZSTD_sequencePosition;
-
/* ZSTD_validateSequence() :
* @offCode : is presumed to follow format required by ZSTD_storeSeq()
* @returns a ZSTD error code if sequence is not valid
return offBase;
}
-/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
- * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
- */
-static size_t
+size_t
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
return 0;
}
-/* Returns the number of bytes to move the current read position back by.
- * Only non-zero if we ended up splitting a sequence.
- * Otherwise, it may return a ZSTD error if something went wrong.
- *
- * This function will attempt to scan through blockSize bytes
- * represented by the sequences in @inSeqs,
- * storing any (partial) sequences.
- *
- * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
- * avoid splitting a match, or to avoid splitting a match such that it would produce a match
- * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
- */
-static size_t
+size_t
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize)
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
}
+
+void ZSTD_registerExternalMatchFinder(
+ ZSTD_CCtx* zc, void* mState,
+ ZSTD_externalMatchFinder_F* mFinder
+) {
+ ZSTD_externalMatchCtx emctx = {
+ mState,
+ mFinder,
+
+ /* seqBuffer is allocated later (from the cwskp) */
+ NULL, /* seqBuffer */
+ 0 /* seqBufferCapacity */
+ };
+ zc->externalMatchCtx = emctx;
+ zc->requestedParams.useExternalMatchFinder = 1;
+}
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
+typedef struct {
+ U32 idx; /* Index in array of ZSTD_Sequence */
+ U32 posInSequence; /* Position within sequence at idx */
+ size_t posInSrc; /* Number of bytes given by sequences provided so far */
+} ZSTD_sequencePosition;
+
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct {
/* Controls prefetching in some dictMatchState matchfinders */
ZSTD_paramSwitch_e prefetchCDictTables;
+
+ /* Controls whether zstd will fall back to an internal matchfinder
+ * if the external matchfinder returns an error code. */
+ int enableMatchFinderFallback;
+
+ /* Indicates whether an external matchfinder has been referenced.
+ * Users can't set this externally.
+ * It is set internally in ZSTD_registerExternalMatchFinder(). */
+ int useExternalMatchFinder;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
ZSTD_entropyCTablesMetadata_t entropyMetadata;
} ZSTD_blockSplitCtx;
+/* Context for block-level external matchfinder API */
+typedef struct {
+ void* mState;
+ ZSTD_externalMatchFinder_F* mFinder;
+ ZSTD_Sequence* seqBuffer;
+ size_t seqBufferCapacity;
+} ZSTD_externalMatchCtx;
+
struct ZSTD_CCtx_s {
ZSTD_compressionStage_e stage;
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
/* Workspace for block splitter */
ZSTD_blockSplitCtx blockSplitCtx;
+
+ /* Workspace for external matchfinder */
+ ZSTD_externalMatchCtx externalMatchCtx;
};
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
*/
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ * Note that the block delimiter must include the last literals of the block.
+ */
+size_t
+ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
+ ZSTD_sequencePosition* seqPos,
+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+ const void* src, size_t blockSize);
+
+/* Returns the number of bytes to move the current read position back by.
+ * Only non-zero if we ended up splitting a sequence.
+ * Otherwise, it may return a ZSTD error if something went wrong.
+ *
+ * This function will attempt to scan through blockSize bytes
+ * represented by the sequences in @inSeqs,
+ * storing any (partial) sequences.
+ *
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
+ */
+size_t
+ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+ const void* src, size_t blockSize);
+
#endif /* ZSTD_COMPRESS_H */
* ZSTD_c_useBlockSplitter
* ZSTD_c_useRowMatchFinder
* ZSTD_c_prefetchCDictTables
+ * ZSTD_c_enableMatchFinderFallback
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
ZSTD_c_experimentalParam13=1010,
ZSTD_c_experimentalParam14=1011,
ZSTD_c_experimentalParam15=1012,
- ZSTD_c_experimentalParam16=1013
+ ZSTD_c_experimentalParam16=1013,
+ ZSTD_c_experimentalParam17=1014
} ZSTD_cParameter;
typedef struct {
* They will be used to compress next frame.
* Resetting session never fails.
* - The parameters : changes all parameters back to "default".
- * This removes any reference to any dictionary too.
+ * This also removes any reference to any dictionary or external matchfinder.
* Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
* otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
* - Both : similar to resetting the session, followed by resetting parameters.
*/
#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
+/* ZSTD_c_enableMatchFinderFallback
+ * Allowed values are 0 (disable) and 1 (enable). The default setting is 0.
+ *
+ * Controls whether zstd will fall back to an internal matchfinder if an
+ * external matchfinder is registered and returns an error code. This fallback is
+ * block-by-block: the internal matchfinder will only be called for blocks where
+ * the external matchfinder returns an error code. Fallback compression will
+ * follow any other cParam settings, such as compression level, the same as in a
+ * normal (fully-internal) compression operation.
+ *
+ * The user is strongly encouraged to read the full external matchfinder API
+ * documentation (below) before setting this parameter. */
+#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17
+
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+/* ********************** EXTERNAL MATCHFINDER API **********************
+ *
+ * *** OVERVIEW ***
+ * This API allows users to replace the zstd internal block-level matchfinder
+ * with an external matchfinder function. Potential applications of the API
+ * include hardware-accelerated matchfinders and matchfinders specialized to
+ * particular types of data.
+ *
+ * See contrib/externalMatchfinder for an example program employing the
+ * external matchfinder API.
+ *
+ * *** USAGE ***
+ * The user is responsible for implementing a function of type
+ * ZSTD_externalMatchFinder_F. For each block, zstd will pass the following
+ * arguments to the user-provided function:
+ *
+ * - externalMatchState: a pointer to a user-managed state for the external
+ * matchfinder.
+ *
+ * - outSeqs, outSeqsCapacity: an output buffer for sequences produced by the
+ * external matchfinder. outSeqsCapacity is guaranteed >=
+ * ZSTD_sequenceBound(srcSize). The memory backing outSeqs is managed by
+ * the CCtx.
+ *
+ * - src, srcSize: an input buffer which the external matchfinder must parse
+ * into sequences. srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
+ *
+ * - dict, dictSize: a history buffer, which may be empty, which the external
+ * matchfinder may reference as it produces sequences for the src buffer.
+ * Currently, zstd will always pass dictSize == 0 into external matchfinders,
+ * but this will change in the future.
+ *
+ * - compressionLevel: a signed integer representing the zstd compression level
+ * set by the user for the current operation. The external matchfinder may
+ * choose to use this information to change its compression strategy and
+ * speed/ratio tradeoff. Note: The compression level does not reflect zstd
+ * parameters set through the advanced API.
+ *
+ * - windowSize: a size_t representing the maximum allowed offset for external
+ * sequences. Note that sequence offsets are sometimes allowed to exceed the
+ * windowSize if a dictionary is present, see doc/zstd_compression_format.md
+ * for details.
+ *
+ * The user-provided function shall return a size_t representing the number of
+ * sequences written to outSeqs. This return value will be treated as an error
+ * code if it is greater than outSeqsCapacity. The return value must be non-zero
+ * if srcSize is non-zero. The ZSTD_EXTERNAL_MATCHFINDER_ERROR macro is provided
+ * for convenience, but any value greater than outSeqsCapacity will be treated as
+ * an error code.
+ *
+ * If the user-provided function does not return an error code, the sequences
+ * written to outSeqs must be a valid parse of the src buffer. Data corruption may
+ * occur if the parse is not valid. A parse is defined to be valid if the
+ * following conditions hold:
+ * - The sum of matchLengths and literalLengths is equal to srcSize.
+ * - All sequences in the parse have matchLength != 0, except for the final
+ * sequence. matchLength is not constrained for the final sequence.
+ * - All offsets respect the windowSize parameter as specified in
+ * doc/zstd_compression_format.md.
+ *
+ * zstd will only validate these conditions (and fail compression if they do not
+ * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
+ * validation has a performance cost.
+ *
+ * If the user-provided function returns an error, zstd will either fall back
+ * to an internal matchfinder or fail the compression operation. The user can
+ * choose between the two behaviors by setting the
+ * ZSTD_c_enableMatchFinderFallback cParam. Fallback compression will follow any
+ * other cParam settings, such as compression level, the same as in a normal
+ * compression operation.
+ *
+ * The user shall instruct zstd to use a particular ZSTD_externalMatchFinder_F
+ * function by calling ZSTD_registerExternalMatchFinder(cctx, externalMatchState,
+ * externalMatchFinder). This setting will persist until the next parameter reset
+ * of the CCtx.
+ *
+ * The externalMatchState must be initialized by the user before calling
+ * ZSTD_registerExternalMatchFinder. The user is responsible for destroying the
+ * externalMatchState.
+ *
+ * *** LIMITATIONS ***
+ * External matchfinders are compatible with all zstd compression APIs. There are
+ * only two limitations.
+ *
+ * First, the ZSTD_c_enableLongDistanceMatching cParam is not supported.
+ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an
+ * external matchfinder.
+ * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in
+ * some cases (see its documentation for details). Users must explicitly set
+ * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an
+ * external matchfinder is registered.
+ * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
+ * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
+ * check the docs on ZSTD_c_enableLongDistanceMatching whenever the external
+ * matchfinder API is used in conjunction with advanced settings (like windowLog).
+ *
+ * Second, history buffers are not supported. Concretely, zstd will always pass
+ * dictSize == 0 to the external matchfinder (for now). This has two implications:
+ * - Dictionaries are not supported. Compression will *not* fail if the user
+ * references a dictionary, but the dictionary won't have any effect.
+ * - Stream history is not supported. All compression APIs, including streaming
+ * APIs, work with the external matchfinder, but the external matchfinder won't
+ * receive any history from the previous block. Each block is an independent chunk.
+ *
+ * Long-term, we plan to overcome both limitations. There is no technical blocker to
+ * overcoming them. It is purely a question of engineering effort.
+ */
+
+#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1))
+
+typedef size_t ZSTD_externalMatchFinder_F (
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+);
+
+/*! ZSTD_registerExternalMatchFinder() :
+ * Instruct zstd to use an external matchfinder function.
+ *
+ * The externalMatchState must be initialized by the caller, and the caller is
+ * responsible for managing its lifetime. This parameter is sticky across
+ * compressions. It will remain set until the user explicitly resets compression
+ * parameters.
+ *
+ * The user is strongly encouraged to read the full API documentation (above)
+ * before calling this function. */
+ZSTDLIB_STATIC_API void
+ZSTD_registerExternalMatchFinder(
+ ZSTD_CCtx* cctx,
+ void* externalMatchState,
+ ZSTD_externalMatchFinder_F* externalMatchFinder
+);
+
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
ZSTD_error_dictionary_wrong = 32,
ZSTD_error_dictionaryCreation_failed = 34,
ZSTD_error_parameter_unsupported = 40,
+ ZSTD_error_parameter_combination_unsupported = 41,
ZSTD_error_parameter_outOfBound = 42,
ZSTD_error_tableLog_tooLarge = 44,
ZSTD_error_maxSymbolValue_tooLarge = 46,
ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105,
+ ZSTD_error_externalMatchFinder_failed = 106,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;
$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
CLEAN += zstreamtest zstreamtest32
-ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c
+ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c external_matchfinder.c
ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES)
ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES)
zstreamtest32 : CFLAGS += -m32
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "external_matchfinder.h"
+#include <string.h>
+#include "zstd_compress_internal.h"
+
+#define HSIZE 1024
+static U32 const HLOG = 10;
+static U32 const MLS = 4;
+static U32 const BADIDX = 0xffffffff;
+
+static size_t simpleExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+) {
+ const BYTE* const istart = (const BYTE*)src;
+ const BYTE* const iend = istart + srcSize;
+ const BYTE* ip = istart;
+ const BYTE* anchor = istart;
+ size_t seqCount = 0;
+ U32 hashTable[HSIZE];
+
+ (void)externalMatchState;
+ (void)dict;
+ (void)dictSize;
+ (void)outSeqsCapacity;
+ (void)compressionLevel;
+
+ { int i;
+ for (i=0; i < HSIZE; i++) {
+ hashTable[i] = BADIDX;
+ } }
+
+ while (ip + MLS < iend) {
+ size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
+ U32 const matchIndex = hashTable[hash];
+ hashTable[hash] = (U32)(ip - istart);
+
+ if (matchIndex != BADIDX) {
+ const BYTE* const match = istart + matchIndex;
+ U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
+ if (matchLen >= ZSTD_MINMATCH_MIN) {
+ U32 const litLen = (U32)(ip - anchor);
+ U32 const offset = (U32)(ip - match);
+ ZSTD_Sequence const seq = {
+ offset, litLen, matchLen, 0
+ };
+
+ /* Note: it's crucial to stay within the window size! */
+ if (offset <= windowSize) {
+ outSeqs[seqCount++] = seq;
+ ip += matchLen;
+ anchor = ip;
+ continue;
+ }
+ }
+ }
+
+ ip++;
+ }
+
+ { ZSTD_Sequence const finalSeq = {
+ 0, (U32)(iend - anchor), 0, 0
+ };
+ outSeqs[seqCount++] = finalSeq;
+ }
+
+ return seqCount;
+}
+
+size_t zstreamExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+) {
+ EMF_testCase const testCase = *((EMF_testCase*)externalMatchState);
+ memset(outSeqs, 0, outSeqsCapacity);
+
+ switch (testCase) {
+ case EMF_ZERO_SEQS:
+ return 0;
+ case EMF_ONE_BIG_SEQ:
+ outSeqs[0].offset = 0;
+ outSeqs[0].matchLength = 0;
+ outSeqs[0].litLength = (U32)(srcSize);
+ return 1;
+ case EMF_LOTS_OF_SEQS:
+ return simpleExternalMatchFinder(
+ externalMatchState,
+ outSeqs, outSeqsCapacity,
+ src, srcSize,
+ dict, dictSize,
+ compressionLevel,
+ windowSize
+ );
+ case EMF_SMALL_ERROR:
+ return outSeqsCapacity + 1;
+ case EMF_BIG_ERROR:
+ default:
+ return ZSTD_EXTERNAL_MATCHFINDER_ERROR;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef EXTERNAL_MATCHFINDER
+#define EXTERNAL_MATCHFINDER
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+
+/* See external_matchfinder.c for details on each test case */
+typedef enum {
+ EMF_ZERO_SEQS = 0,
+ EMF_ONE_BIG_SEQ = 1,
+ EMF_LOTS_OF_SEQS = 2,
+ EMF_BIG_ERROR = 3,
+ EMF_SMALL_ERROR = 4
+} EMF_testCase;
+
+size_t zstreamExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+);
+
+#endif // EXTERNAL_MATCHFINDER
#include "seqgen.h"
#include "util.h"
#include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
-
+#include "external_matchfinder.h" /* zstreamExternalMatchFinder, EMF_testCase */
/*-************************************
* Constants
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++);
+ {
+ size_t const dstBufSize = ZSTD_compressBound(CNBufferSize);
+ BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize));
+ size_t const checkBufSize = CNBufferSize;
+ BYTE* const checkBuf = (BYTE*)malloc(checkBufSize);
+ int enableFallback;
+ EMF_testCase externalMatchState;
+
+ CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed");
+
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+
+ /* Reference external matchfinder outside the test loop to
+ * check that the reference is preserved across compressions */
+ ZSTD_registerExternalMatchFinder(
+ zc,
+ &externalMatchState,
+ zstreamExternalMatchFinder
+ );
+
+ for (enableFallback = 0; enableFallback < 1; enableFallback++) {
+ size_t testCaseId;
+
+ EMF_testCase const EMF_successCases[] = {
+ EMF_ONE_BIG_SEQ,
+ EMF_LOTS_OF_SEQS,
+ };
+ size_t const EMF_numSuccessCases = 2;
+
+ EMF_testCase const EMF_failureCases[] = {
+ EMF_ZERO_SEQS,
+ EMF_BIG_ERROR,
+ EMF_SMALL_ERROR,
+ };
+ size_t const EMF_numFailureCases = 3;
+
+ /* Test external matchfinder success scenarios */
+ for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) {
+ size_t res;
+ externalMatchState = EMF_successCases[testCaseId];
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
+ res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+ CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
+ CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res));
+ CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
+ }
+
+ /* Test external matchfinder failure scenarios */
+ for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) {
+ size_t res;
+ externalMatchState = EMF_failureCases[testCaseId];
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
+ res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+ if (enableFallback) {
+ CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res));
+ CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
+ } else {
+ CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
+ CHECK(
+ ZSTD_getErrorCode(res) != ZSTD_error_externalMatchFinder_failed,
+ "EMF: Wrong error code: %s", ZSTD_getErrorName(res)
+ );
+ }
+ }
+
+ /* Test compression with external matchfinder + empty src buffer */
+ {
+ size_t res;
+ externalMatchState = EMF_ZERO_SEQS;
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
+ res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0);
+ CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
+ CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!");
+ }
+ }
+
+ /* Test that reset clears the external matchfinder */
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+ externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
+ CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
+
+ free(dstBuf);
+ free(checkBuf);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
_end:
FUZ_freeDictionary(dictionary);
ZSTD_freeCStream(zc);