]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Rename "External Matchfinder" to "Block-Level Sequence Producer" (#3484)
authorElliot Gorokhovsky <embg@fb.com>
Thu, 9 Feb 2023 22:01:17 +0000 (17:01 -0500)
committerGitHub <noreply@github.com>
Thu, 9 Feb 2023 22:01:17 +0000 (17:01 -0500)
* change "external matchfinder" to "external sequence producer"

* migrate contrib/ to new naming convention

* fix contrib build

* fix error message

* update debug strings

* fix def of invalid sequences in zstd.h

* nit

* update CHANGELOG

* fix .gitignore

20 files changed:
CHANGELOG
Makefile
contrib/externalMatchfinder/.gitignore [deleted file]
contrib/externalMatchfinder/README.md [deleted file]
contrib/externalSequenceProducer/.gitignore [new file with mode: 0644]
contrib/externalSequenceProducer/Makefile [moved from contrib/externalMatchfinder/Makefile with 87% similarity]
contrib/externalSequenceProducer/README.md [new file with mode: 0644]
contrib/externalSequenceProducer/main.c [moved from contrib/externalMatchfinder/main.c with 89% similarity]
contrib/externalSequenceProducer/sequence_producer.c [moved from contrib/externalMatchfinder/matchfinder.c with 94% similarity]
contrib/externalSequenceProducer/sequence_producer.h [moved from contrib/externalMatchfinder/matchfinder.h with 91% similarity]
lib/common/error_private.c
lib/compress/zstd_compress.c
lib/compress/zstd_compress_internal.h
lib/zstd.h
lib/zstd_errors.h
tests/external_matchfinder.c
tests/external_matchfinder.h
tests/fuzz/Makefile
tests/fuzz/zstd_helpers.c
tests/zstreamtest.c

index 11cc0d3fb20640e39e21059bb8937289f2b1056e..4010c1ff5d396c943c10782ece6b0ecc356c6b5e 100644 (file)
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -29,7 +29,7 @@ cli: Fix empty string as argument for `--output-dir-*` (#3220, @embg)
 cli: Fix decompression memory usage reported by `-vv --long` (#3042, @u1f35c, and #3232, @zengyijing)
 cli: Fix infinite loop when empty input is passed to trainer (#3081, @terrelln)
 cli: Fix `--adapt` doesn't work when `--no-progress` is also set (#3354, @terrelln)
-api: Support for External matchfinder (#3333, @embg)
+api: Support for Block-Level Sequence Producer (#3333, @embg)
 api: Support for in-place decompression (#3432, @terrelln)
 api: New  `ZSTD_CCtx_setCParams()`  function, set all parameters defined in a  `ZSTD_compressionParameters`  structure (#3403, @Cyan4973)
 api: Streaming decompression detects incorrect header ID sooner (#3175, @Cyan4973)
index 75a47a308d17622ee98e9eb9b476485242a3a524..a7890a5b1d6f6a4eda330d7903b72c7c0e61427a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -123,7 +123,7 @@ contrib: lib
        $(MAKE) -C contrib/seekable_format/examples all
        $(MAKE) -C contrib/seekable_format/tests test
        $(MAKE) -C contrib/largeNbDicts all
-       $(MAKE) -C contrib/externalMatchfinder all
+       $(MAKE) -C contrib/externalSequenceProducer all
        cd build/single_file_libs/ ; ./build_decoder_test.sh
        cd build/single_file_libs/ ; ./build_library_test.sh
 
@@ -143,7 +143,7 @@ clean:
        $(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
        $(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID)
        $(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
-       $(Q)$(MAKE) -C contrib/externalMatchfinder $@ > $(VOID)
+       $(Q)$(MAKE) -C contrib/externalSequenceProducer $@ > $(VOID)
        $(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
        $(Q)$(RM) -r lz4
        @echo Cleaning completed
diff --git a/contrib/externalMatchfinder/.gitignore b/contrib/externalMatchfinder/.gitignore
deleted file mode 100644 (file)
index 46357ef..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-# build artifacts
-externalMatchfinder
diff --git a/contrib/externalMatchfinder/README.md b/contrib/externalMatchfinder/README.md
deleted file mode 100644 (file)
index cb7d49d..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-externalMatchfinder
-=====================
-
-`externalMatchfinder` is a test tool for the external matchfinder API.
-It demonstrates how to use the API to perform a simple round-trip test.
-
-A sample matchfinder is provided in matchfinder.c, but the user can swap
-this out with a different one if desired. The sample matchfinder implements
-LZ compression with a 1KB hashtable. Dictionary compression is not currently supported.
-
-Command line :
-```
-externalMatchfinder filename
-```
diff --git a/contrib/externalSequenceProducer/.gitignore b/contrib/externalSequenceProducer/.gitignore
new file mode 100644 (file)
index 0000000..147710a
--- /dev/null
@@ -0,0 +1,2 @@
+# build artifacts
+externalSequenceProducer
similarity index 87%
rename from contrib/externalMatchfinder/Makefile
rename to contrib/externalSequenceProducer/Makefile
index 2baa558cb56c80d29e3e69bdc2922c3eb1931a31..0591ae01ba5368f74b66825a2f716cff4e9d3c17 100644 (file)
@@ -23,11 +23,11 @@ DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
             -Wredundant-decls
 CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
 
-default: externalMatchfinder
+default: externalSequenceProducer
 
-all: externalMatchfinder
+all: externalSequenceProducer
 
-externalMatchfinder: matchfinder.c main.c $(LIBZSTD)
+externalSequenceProducer: sequence_producer.c main.c $(LIBZSTD)
        $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
 
 .PHONY: $(LIBZSTD)
@@ -37,4 +37,4 @@ $(LIBZSTD):
 clean:
        $(RM) *.o
        $(MAKE) -C $(LIBDIR) clean > /dev/null
-       $(RM) externalMatchfinder
+       $(RM) externalSequenceProducer
diff --git a/contrib/externalSequenceProducer/README.md b/contrib/externalSequenceProducer/README.md
new file mode 100644 (file)
index 0000000..c16a170
--- /dev/null
@@ -0,0 +1,14 @@
+externalSequenceProducer
+=====================
+
+`externalSequenceProducer` is a test tool for the Block-Level Sequence Producer API.
+It demonstrates how to use the API to perform a simple round-trip test.
+
+A sample sequence producer is provided in sequence_producer.c, but the user can swap
+this out with a different one if desired. The sample sequence producer implements
+LZ parsing with a 1KB hashtable. Dictionary-based parsing is not currently supported.
+
+Command line :
+```
+externalSequenceProducer filename
+```
similarity index 89%
rename from contrib/externalMatchfinder/main.c
rename to contrib/externalSequenceProducer/main.c
index 6971a46c7e28a07089f3f2eae02a459b48018838..e67e295383c25bfd2c4e2a386da170ae3aa65850 100644 (file)
@@ -16,7 +16,7 @@
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
 #include "zstd_errors.h"
-#include "matchfinder.h" // simpleExternalMatchFinder
+#include "sequence_producer.h" // simpleSequenceProducer
 
 #define CHECK(res)                                      \
 do {                                                    \
@@ -28,23 +28,23 @@ do {                                                    \
 
 int main(int argc, char *argv[]) {
     if (argc != 2) {
-        printf("Usage: exampleMatchfinder <file>\n");
+        printf("Usage: externalSequenceProducer <file>\n");
         return 1;
     }
 
     ZSTD_CCtx* const zc = ZSTD_createCCtx();
 
-    int simpleExternalMatchState = 0xdeadbeef;
+    int simpleSequenceProducerState = 0xdeadbeef;
 
     // Here is the crucial bit of code!
-    ZSTD_registerExternalMatchFinder(
+    ZSTD_registerSequenceProducer(
         zc,
-        &simpleExternalMatchState,
-        simpleExternalMatchFinder
+        &simpleSequenceProducerState,
+        simpleSequenceProducer
     );
 
     {
-        size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1);
+        size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 1);
         CHECK(res);
     }
 
similarity index 94%
rename from contrib/externalMatchfinder/matchfinder.c
rename to contrib/externalSequenceProducer/sequence_producer.c
index f119193ef1d76490cc753c68ef20a9860873280f..60a2f9572b00422e5a030d2fd9109f854e34caba 100644 (file)
@@ -9,15 +9,15 @@
  */
 
 #include "zstd_compress_internal.h"
-#include "matchfinder.h"
+#include "sequence_producer.h"
 
 #define HSIZE 1024
 static U32 const HLOG = 10;
 static U32 const MLS = 4;
 static U32 const BADIDX = 0xffffffff;
 
-size_t simpleExternalMatchFinder(
-  void* externalMatchState,
+size_t simpleSequenceProducer(
+  void* sequenceProducerState,
   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
   const void* src, size_t srcSize,
   const void* dict, size_t dictSize,
@@ -31,7 +31,7 @@ size_t simpleExternalMatchFinder(
     size_t seqCount = 0;
     U32 hashTable[HSIZE];
 
-    (void)externalMatchState;
+    (void)sequenceProducerState;
     (void)dict;
     (void)dictSize;
     (void)outSeqsCapacity;
similarity index 91%
rename from contrib/externalMatchfinder/matchfinder.h
rename to contrib/externalSequenceProducer/sequence_producer.h
index f8ba1c96531876acaa4453d9053e5e24ea5ccc7a..19f9982ac36ee1ecfbd29543bca89b0d436271f2 100644 (file)
@@ -14,8 +14,8 @@
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
 
-size_t simpleExternalMatchFinder(
-  void* externalMatchState,
+size_t simpleSequenceProducer(
+  void* sequenceProducerState,
   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
   const void* src, size_t srcSize,
   const void* dict, size_t dictSize,
index 0cff6b80b8e66b6b08a09079f2263aaaed32069d..075fc5ef42fab0a69a1298bde84807fc24156299 100644 (file)
@@ -54,7 +54,7 @@ const char* ERR_getErrorString(ERR_enum code)
     case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
     case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
     case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
-    case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code";
+    case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
     case PREFIX(externalSequences_invalid): return "External sequences are not valid";
     case PREFIX(maxCode):
     default: return notErrorCode;
index 5b89ca6d2cc0569df2799db807b1526c83807d95..b55f684cd710064d0e139278f5b3b2ecaab68f5f 100644 (file)
@@ -615,7 +615,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
         bounds.upperBound = (int)ZSTD_ps_disable;
         return bounds;
 
-    case ZSTD_c_enableMatchFinderFallback:
+    case ZSTD_c_enableSeqProducerFallback:
         bounds.lowerBound = 0;
         bounds.upperBound = 1;
         return bounds;
@@ -695,7 +695,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
     case ZSTD_c_useRowMatchFinder:
     case ZSTD_c_deterministicRefPrefix:
     case ZSTD_c_prefetchCDictTables:
-    case ZSTD_c_enableMatchFinderFallback:
+    case ZSTD_c_enableSeqProducerFallback:
     case ZSTD_c_maxBlockSize:
     case ZSTD_c_searchForExternalRepcodes:
     default:
@@ -754,7 +754,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
     case ZSTD_c_useRowMatchFinder:
     case ZSTD_c_deterministicRefPrefix:
     case ZSTD_c_prefetchCDictTables:
-    case ZSTD_c_enableMatchFinderFallback:
+    case ZSTD_c_enableSeqProducerFallback:
     case ZSTD_c_maxBlockSize:
     case ZSTD_c_searchForExternalRepcodes:
         break;
@@ -989,8 +989,8 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
         return CCtxParams->prefetchCDictTables;
 
-    case ZSTD_c_enableMatchFinderFallback:
-        BOUNDCHECK(ZSTD_c_enableMatchFinderFallback, value);
+    case ZSTD_c_enableSeqProducerFallback:
+        BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
         CCtxParams->enableMatchFinderFallback = value;
         return CCtxParams->enableMatchFinderFallback;
 
@@ -1140,7 +1140,7 @@ size_t ZSTD_CCtxParams_getParameter(
     case ZSTD_c_prefetchCDictTables:
         *value = (int)CCtxParams->prefetchCDictTables;
         break;
-    case ZSTD_c_enableMatchFinderFallback:
+    case ZSTD_c_enableSeqProducerFallback:
         *value = CCtxParams->enableMatchFinderFallback;
         break;
     case ZSTD_c_maxBlockSize:
@@ -1610,8 +1610,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
 
 /* Helper function for calculating memory requirements.
  * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
-static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useExternalMatchFinder) {
-    U32 const divider = (minMatch==3 || useExternalMatchFinder) ? 3 : 4;
+static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) {
+    U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;
     return blockSize / divider;
 }
 
@@ -1623,12 +1623,12 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
         const size_t buffInSize,
         const size_t buffOutSize,
         const U64 pledgedSrcSize,
-        int useExternalMatchFinder,
+        int useSequenceProducer,
         size_t maxBlockSize)
 {
     size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
     size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
-    size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder);
+    size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
     size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
                             + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
                             + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
@@ -1648,7 +1648,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
     size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
 
     size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
-    size_t const externalSeqSpace = useExternalMatchFinder
+    size_t const externalSeqSpace = useSequenceProducer
         ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
         : 0;
 
@@ -1679,7 +1679,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
      * be needed. However, we still allocate two 0-sized buffers, which can
      * take space under ASAN. */
     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder, params->maxBlockSize);
+        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
 }
 
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
@@ -1740,7 +1740,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
 
         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
             &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
-            ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder, params->maxBlockSize);
+            ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
     }
 }
 
@@ -2024,7 +2024,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
 
     {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
         size_t const blockSize = MIN(params->maxBlockSize, windowSize);
-        size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useExternalMatchFinder);
+        size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
         size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
@@ -2041,7 +2041,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         size_t const neededSpace =
             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
                 &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
-                buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder, params->maxBlockSize);
+                buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
         int resizeWorkspace;
 
         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
@@ -2155,7 +2155,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         }
 
         /* reserve space for block-level external sequences */
-        if (params->useExternalMatchFinder) {
+        if (params->useSequenceProducer) {
             size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
             zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
             zc->externalMatchCtx.seqBuffer =
@@ -3022,26 +3022,26 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
     ssPtr->longLengthType = ZSTD_llt_none;
 }
 
-/* ZSTD_postProcessExternalMatchFinderResult() :
+/* ZSTD_postProcessSequenceProducerResult() :
  * Validates and post-processes sequences obtained through the external matchfinder API:
  *   - Checks whether nbExternalSeqs represents an error condition.
  *   - Appends a block delimiter to outSeqs if one is not already present.
  *     See zstd.h for context regarding block delimiters.
  * Returns the number of sequences after post-processing, or an error code. */
-static size_t ZSTD_postProcessExternalMatchFinderResult(
+static size_t ZSTD_postProcessSequenceProducerResult(
     ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
 ) {
     RETURN_ERROR_IF(
         nbExternalSeqs > outSeqsCapacity,
-        externalMatchFinder_failed,
-        "External matchfinder returned error code %lu",
+        sequenceProducer_failed,
+        "External sequence producer returned error code %lu",
         (unsigned long)nbExternalSeqs
     );
 
     RETURN_ERROR_IF(
         nbExternalSeqs == 0 && srcSize > 0,
-        externalMatchFinder_failed,
-        "External matchfinder produced zero sequences for a non-empty src buffer!"
+        sequenceProducer_failed,
+        "Got zero sequences from external sequence producer for a non-empty src buffer!"
     );
 
     if (srcSize == 0) {
@@ -3061,7 +3061,7 @@ static size_t ZSTD_postProcessExternalMatchFinderResult(
          * produced an invalid parse, by definition of ZSTD_sequenceBound(). */
         RETURN_ERROR_IF(
             nbExternalSeqs == outSeqsCapacity,
-            externalMatchFinder_failed,
+            sequenceProducer_failed,
             "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
         );
 
@@ -3139,9 +3139,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
             /* External matchfinder + LDM is technically possible, just not implemented yet.
              * We need to revisit soon and implement it. */
             RETURN_ERROR_IF(
-                zc->appliedParams.useExternalMatchFinder,
+                zc->appliedParams.useSequenceProducer,
                 parameter_combination_unsupported,
-                "Long-distance matching with external matchfinder enabled is not currently supported."
+                "Long-distance matching with external sequence producer enabled is not currently supported."
             );
 
             /* Updates ldmSeqStore.pos */
@@ -3158,9 +3158,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
             /* External matchfinder + LDM is technically possible, just not implemented yet.
              * We need to revisit soon and implement it. */
             RETURN_ERROR_IF(
-                zc->appliedParams.useExternalMatchFinder,
+                zc->appliedParams.useSequenceProducer,
                 parameter_combination_unsupported,
-                "Long-distance matching with external matchfinder enabled is not currently supported."
+                "Long-distance matching with external sequence producer enabled is not currently supported."
             );
 
             ldmSeqStore.seq = zc->ldmSequences;
@@ -3177,7 +3177,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                                        zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(ldmSeqStore.pos == ldmSeqStore.size);
-        } else if (zc->appliedParams.useExternalMatchFinder) {
+        } else if (zc->appliedParams.useSequenceProducer) {
             assert(
                 zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
             );
@@ -3195,7 +3195,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                     windowSize
                 );
 
-                size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult(
+                size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
                     zc->externalMatchCtx.seqBuffer,
                     nbExternalSeqs,
                     zc->externalMatchCtx.seqBufferCapacity,
@@ -3217,7 +3217,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                         "Failed to copy external sequences to seqStore!"
                     );
                     ms->ldmSeqStore = NULL;
-                    DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs);
+                    DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs);
                     return ZSTDbss_compress;
                 }
 
@@ -3233,7 +3233,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                     ms->ldmSeqStore = NULL;
                     DEBUGLOG(
                         5,
-                        "External matchfinder returned error code %lu. Falling back to internal matchfinder.",
+                        "External sequence producer returned error code %lu. Falling back to internal parser.",
                         (unsigned long)nbExternalSeqs
                     );
                     lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
@@ -6033,9 +6033,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
 #ifdef ZSTD_MULTITHREAD
     /* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
     RETURN_ERROR_IF(
-        params.useExternalMatchFinder == 1 && params.nbWorkers >= 1,
+        params.useSequenceProducer == 1 && params.nbWorkers >= 1,
         parameter_combination_unsupported,
-        "External matchfinder isn't supported with nbWorkers >= 1"
+        "External sequence producer isn't supported with nbWorkers >= 1"
     );
 
     if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
@@ -6251,7 +6251,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
  */
 static size_t
 ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
-                      size_t posInSrc, U32 windowLog, size_t dictSize, int useExternalMatchFinder)
+                      size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer)
 {
     U32 const windowSize = 1u << windowLog;
     /* posInSrc represents the amount of data the decoder would decode up to this point.
@@ -6260,7 +6260,7 @@ ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
      * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
      */
     size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
-    size_t const matchLenLowerBound = (minMatch == 3 || useExternalMatchFinder) ? 3 : 4;
+    size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;
     RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!");
     /* Validate maxNbSeq is large enough for the given matchLength and minMatch */
     RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch");
@@ -6325,7 +6325,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
-                                                cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useExternalMatchFinder),
+                                                cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
                                                 "Sequence validation failed");
         }
         RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
@@ -6463,7 +6463,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
-                                                   cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useExternalMatchFinder),
+                                                   cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
                                                    "Sequence validation failed");
         }
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
@@ -6908,9 +6908,9 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
     return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
 }
 
-void ZSTD_registerExternalMatchFinder(
+void ZSTD_registerSequenceProducer(
     ZSTD_CCtx* zc, void* mState,
-    ZSTD_externalMatchFinder_F* mFinder
+    ZSTD_sequenceProducer_F* mFinder
 ) {
     if (mFinder != NULL) {
         ZSTD_externalMatchCtx emctx;
@@ -6919,9 +6919,9 @@ void ZSTD_registerExternalMatchFinder(
         emctx.seqBuffer = NULL;
         emctx.seqBufferCapacity = 0;
         zc->externalMatchCtx = emctx;
-        zc->requestedParams.useExternalMatchFinder = 1;
+        zc->requestedParams.useSequenceProducer = 1;
     } else {
         ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
-        zc->requestedParams.useExternalMatchFinder = 0;
+        zc->requestedParams.useSequenceProducer = 0;
     }
 }
index dac7a0aa2f387191cf963023fc6c1c21c285233d..cbb85e527eb7f239b7a65fde96c0a126fb62747b 100644 (file)
@@ -353,8 +353,8 @@ struct ZSTD_CCtx_params_s {
 
     /* Indicates whether an external matchfinder has been referenced.
      * Users can't set this externally.
-     * It is set internally in ZSTD_registerExternalMatchFinder(). */
-    int useExternalMatchFinder;
+     * It is set internally in ZSTD_registerSequenceProducer(). */
+    int useSequenceProducer;
 
     /* Adjust the max block size*/
     size_t maxBlockSize;
@@ -395,7 +395,7 @@ typedef struct {
 /* Context for block-level external matchfinder API */
 typedef struct {
   void* mState;
-  ZSTD_externalMatchFinder_F* mFinder;
+  ZSTD_sequenceProducer_F* mFinder;
   ZSTD_Sequence* seqBuffer;
   size_t seqBufferCapacity;
 } ZSTD_externalMatchCtx;
index 91a3679a1c94e30a18852e60161558078d7e331a..95aac07370db57c49beeaee89e36c336b48b6685 100644 (file)
@@ -478,7 +478,7 @@ typedef enum {
      * ZSTD_c_useBlockSplitter
      * ZSTD_c_useRowMatchFinder
      * ZSTD_c_prefetchCDictTables
-     * ZSTD_c_enableMatchFinderFallback
+     * ZSTD_c_enableSeqProducerFallback
      * ZSTD_c_maxBlockSize
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
@@ -565,7 +565,7 @@ typedef enum {
  *                  They will be used to compress next frame.
  *                  Resetting session never fails.
  *  - The parameters : changes all parameters back to "default".
- *                  This also removes any reference to any dictionary or external matchfinder.
+ *                  This also removes any reference to any dictionary or external sequence producer.
  *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
  *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
  *  - Both : similar to resetting the session, followed by resetting parameters.
@@ -1627,8 +1627,8 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
  *  Note : only single-threaded compression is supported.
  *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
  *
- *  Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the external matchfinder API at this time.
- *  Size estimates assume that no external matchfinder is registered.
+ *  Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
+ *  Size estimates assume that no external sequence producer is registered.
  */
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
@@ -1650,8 +1650,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
  *         In this case, get total size by adding ZSTD_estimate?DictSize
  *  Note 2 : only single-threaded compression is supported.
  *  ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
- *  Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the external matchfinder API at this time.
- *  Size estimates assume that no external matchfinder is registered.
+ *  Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
+ *  Size estimates assume that no external sequence producer is registered.
  */
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
@@ -2113,19 +2113,19 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
  */
 #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
 
-/* ZSTD_c_enableMatchFinderFallback
+/* ZSTD_c_enableSeqProducerFallback
  * Allowed values are 0 (disable) and 1 (enable). The default setting is 0.
  *
- * Controls whether zstd will fall back to an internal matchfinder if an
- * external matchfinder is registered and returns an error code. This fallback is
- * block-by-block: the internal matchfinder will only be called for blocks where
- * the external matchfinder returns an error code. Fallback compression will
+ * Controls whether zstd will fall back to an internal sequence producer if an
+ * external sequence producer is registered and returns an error code. This fallback
+ * is block-by-block: the internal sequence producer will only be called for blocks
+ * where the external sequence producer returns an error code. Fallback parsing will
  * follow any other cParam settings, such as compression level, the same as in a
  * normal (fully-internal) compression operation.
  *
- * The user is strongly encouraged to read the full external matchfinder API
+ * The user is strongly encouraged to read the full Block-Level Sequence Producer API
  * documentation (below) before setting this parameter. */
-#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17
+#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17
 
 /* ZSTD_c_maxBlockSize
  * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
@@ -2141,12 +2141,13 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
 
 /* ZSTD_c_searchForExternalRepcodes
  * This parameter affects how zstd parses external sequences, such as sequences
- * provided through the compressSequences() API or from an external matchfinder.
+ * provided through the compressSequences() API or from an external block-level
+ * sequence producer.
  *
  * If set to ZSTD_ps_enable, the library will check for repeated offsets in
  * external sequences, even if those repcodes are not explicitly indicated in
  * the "rep" field. Note that this is the only way to exploit repcode matches
- * while using compressSequences() or an external matchfinder, since zstd
+ * while using compressSequences() or an external sequence producer, since zstd
  * currently ignores the "rep" field of external sequences.
  *
  * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
@@ -2805,43 +2806,52 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_
 ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
 
 
-/* ********************** EXTERNAL MATCHFINDER API **********************
+/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
  *
  * *** OVERVIEW ***
- * This API allows users to replace the zstd internal block-level matchfinder
- * with an external matchfinder function. Potential applications of the API
- * include hardware-accelerated matchfinders and matchfinders specialized to
- * particular types of data.
- *
- * See contrib/externalMatchfinder for an example program employing the
- * external matchfinder API.
+ * The Block-Level Sequence Producer API allows users to provide their own custom
+ * sequence producer which libzstd invokes to process each block. The produced list
+ * of sequences (literals and matches) is then post-processed by libzstd to produce
+ * valid compressed blocks.
+ *
+ * This block-level offload API is a more granular complement of the existing
+ * frame-level offload API compressSequences() (introduced in v1.5.1). It offers
+ * an easier migration story for applications already integrated with libzstd: the
+ * user application continues to invoke the same compression functions
+ * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
+ * from the specific advantages of the external sequence producer. For example,
+ * the sequence producer could be tuned to take advantage of known characteristics
+ * of the input, to offer better speed / ratio, or could leverage hardware
+ * acceleration not available within libzstd itself.
+ *
+ * See contrib/externalSequenceProducer for an example program employing the
+ * Block-Level Sequence Producer API.
  *
  * *** USAGE ***
  * The user is responsible for implementing a function of type
- * ZSTD_externalMatchFinder_F. For each block, zstd will pass the following
+ * ZSTD_sequenceProducer_F. For each block, zstd will pass the following
  * arguments to the user-provided function:
  *
- *   - externalMatchState: a pointer to a user-managed state for the external
- *     matchfinder.
+ *   - sequenceProducerState: a pointer to a user-managed state for the sequence
+ *     producer.
  *
- *   - outSeqs, outSeqsCapacity: an output buffer for sequences produced by the
- *     external matchfinder. outSeqsCapacity is guaranteed >=
- *     ZSTD_sequenceBound(srcSize). The memory backing outSeqs is managed by
- *     the CCtx.
+ *   - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
+ *     outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
+ *     backing outSeqs is managed by the CCtx.
  *
- *   - src, srcSize: an input buffer which the external matchfinder must parse
- *     into sequences. srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
+ *   - src, srcSize: an input buffer for the sequence producer to parse.
+ *     srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
  *
- *   - dict, dictSize: a history buffer, which may be empty, which the external
- *     matchfinder may reference as it produces sequences for the src buffer.
- *     Currently, zstd will always pass dictSize == 0 into external matchfinders,
- *     but this will change in the future.
+ *   - dict, dictSize: a history buffer, which may be empty, which the sequence
+ *     producer may reference as it parses the src buffer. Currently, zstd will
+ *     always pass dictSize == 0 into external sequence producers, but this will
+ *     change in the future.
  *
  *   - compressionLevel: a signed integer representing the zstd compression level
- *     set by the user for the current operation. The external matchfinder may
- *     choose to use this information to change its compression strategy and
- *     speed/ratio tradeoff. Note: The compression level does not reflect zstd
- *     parameters set through the advanced API.
+ *     set by the user for the current operation. The sequence producer may choose
+ *     to use this information to change its compression strategy and speed/ratio
+ *     tradeoff. Note: the compression level does not reflect zstd parameters set
+ *     through the advanced API.
  *
  *   - windowSize: a size_t representing the maximum allowed offset for external
  *     sequences. Note that sequence offsets are sometimes allowed to exceed the
@@ -2851,7 +2861,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* bloc
  * The user-provided function shall return a size_t representing the number of
  * sequences written to outSeqs. This return value will be treated as an error
  * code if it is greater than outSeqsCapacity. The return value must be non-zero
- * if srcSize is non-zero. The ZSTD_EXTERNAL_MATCHFINDER_ERROR macro is provided
+ * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
  * for convenience, but any value greater than outSeqsCapacity will be treated as
  * an error code.
  *
@@ -2859,68 +2869,71 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* bloc
  * written to outSeqs must be a valid parse of the src buffer. Data corruption may
  * occur if the parse is not valid. A parse is defined to be valid if the
  * following conditions hold:
- *   - The sum of matchLengths and literalLengths is equal to srcSize.
- *   - All sequences in the parse have matchLength != 0, except for the final
- *     sequence. matchLength is not constrained for the final sequence.
- *   - All offsets respect the windowSize parameter as specified in
+ *   - The sum of matchLengths and literalLengths must equal srcSize.
+ *   - All sequences in the parse, except for the final sequence, must have
+ *     matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
+ *     matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
+ *   - All offsets must respect the windowSize parameter as specified in
  *     doc/zstd_compression_format.md.
+ *   - If the final sequence has matchLength == 0, it must also have offset == 0.
  *
  * zstd will only validate these conditions (and fail compression if they do not
  * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
  * validation has a performance cost.
  *
  * If the user-provided function returns an error, zstd will either fall back
- * to an internal matchfinder or fail the compression operation. The user can
- * choose between the two behaviors by setting the
- * ZSTD_c_enableMatchFinderFallback cParam. Fallback compression will follow any
- * other cParam settings, such as compression level, the same as in a normal
- * compression operation.
- *
- * The user shall instruct zstd to use a particular ZSTD_externalMatchFinder_F
- * function by calling ZSTD_registerExternalMatchFinder(cctx, externalMatchState,
- * externalMatchFinder). This setting will persist until the next parameter reset
- * of the CCtx.
- *
- * The externalMatchState must be initialized by the user before calling
- * ZSTD_registerExternalMatchFinder. The user is responsible for destroying the
- * externalMatchState.
+ * to an internal sequence producer or fail the compression operation. The user can
+ * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
+ * cParam. Fallback compression will follow any other cParam settings, such as
+ * compression level, the same as in a normal compression operation.
+ *
+ * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
+ * function by calling
+ *         ZSTD_registerSequenceProducer(cctx,
+ *                                       sequenceProducerState,
+ *                                       sequenceProducer)
+ * This setting will persist until the next parameter reset of the CCtx.
+ *
+ * The sequenceProducerState must be initialized by the user before calling
+ * ZSTD_registerSequenceProducer(). The user is responsible for destroying the
+ * sequenceProducerState.
  *
  * *** LIMITATIONS ***
- * External matchfinders are compatible with all zstd compression APIs which respect
- * advanced parameters. However, there are three limitations:
- *
- * First, the ZSTD_c_enableLongDistanceMatching cParam is not supported.
- * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an
- * external matchfinder.
- *   - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in
- *     some cases (see its documentation for details). Users must explicitly set
- *     ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an
- *     external matchfinder is registered.
+ * This API is compatible with all zstd compression APIs which respect advanced parameters.
+ * However, there are three limitations:
+ *
+ * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
+ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
+ * external sequence producer.
+ *   - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
+ *     cases (see its documentation for details). Users must explicitly set
+ *     ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
+ *     sequence producer is registered.
  *   - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
  *     whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
- *     check the docs on ZSTD_c_enableLongDistanceMatching whenever the external
- *     matchfinder API is used in conjunction with advanced settings (like windowLog).
+ *     check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
+ *     Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
  *
- * Second, history buffers are not supported. Concretely, zstd will always pass
- * dictSize == 0 to the external matchfinder (for now). This has two implications:
- *   - Dictionaries are not supported. Compression will *not* fail if the user
+ * Second, history buffers are not currently supported. Concretely, zstd will always pass
+ * dictSize == 0 to the external sequence producer (for now). This has two implications:
+ *   - Dictionaries are not currently supported. Compression will *not* fail if the user
  *     references a dictionary, but the dictionary won't have any effect.
- *   - Stream history is not supported. All compression APIs, including streaming
- *     APIs, work with the external matchfinder, but the external matchfinder won't
- *     receive any history from the previous block. Each block is an independent chunk.
+ *   - Stream history is not currently supported. All advanced compression APIs, including
+ *     streaming APIs, work with external sequence producers, but each block is treated as
+ *     an independent chunk without history from previous blocks.
  *
- * Third, multi-threading within a single compression is not supported. In other words,
- * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external matchfinder is registered.
+ * Third, multi-threading within a single compression is not currently supported. In other words,
+ * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
  * Multi-threading across compressions is fine: simply create one CCtx per thread.
  *
  * Long-term, we plan to overcome all three limitations. There is no technical blocker to
  * overcoming them. It is purely a question of engineering effort.
  */
 
-#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1))
+#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
 
-typedef size_t ZSTD_externalMatchFinder_F (
-  void* externalMatchState,
+typedef size_t ZSTD_sequenceProducer_F (
+  void* sequenceProducerState,
   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
   const void* src, size_t srcSize,
   const void* dict, size_t dictSize,
@@ -2928,32 +2941,30 @@ typedef size_t ZSTD_externalMatchFinder_F (
   size_t windowSize
 );
 
-/*! ZSTD_registerExternalMatchFinder() :
- * Instruct zstd to use an external matchfinder function.
+/*! ZSTD_registerSequenceProducer() :
+ * Instruct zstd to use a block-level external sequence producer function.
  *
- * The externalMatchState must be initialized by the caller, and the caller is
+ * The sequenceProducerState must be initialized by the caller, and the caller is
  * responsible for managing its lifetime. This parameter is sticky across
  * compressions. It will remain set until the user explicitly resets compression
  * parameters.
  *
- * External matchfinder registration is considered to be an "advanced parameter",
- * part of the "advanced API". This means it will only have an effect on
- * compression APIs which respect advanced parameters, such as compress2() and
- * compressStream(). Older compression APIs such as compressCCtx(), which predate
- * the introduction of "advanced parameters", will ignore any external matchfinder
- * setting.
+ * Sequence producer registration is considered to be an "advanced parameter",
+ * part of the "advanced API". This means it will only have an effect on compression
+ * APIs which respect advanced parameters, such as compress2() and compressStream2().
+ * Older compression APIs such as compressCCtx(), which predate the introduction of
+ * "advanced parameters", will ignore any external sequence producer setting.
  *
- * The external matchfinder can be "cleared" by registering a NULL external
- * matchfinder function pointer. This removes all limitations described above in
- * the "LIMITATIONS" section of the API docs.
+ * The sequence producer can be "cleared" by registering a NULL function pointer. This
+ * removes all limitations described above in the "LIMITATIONS" section of the API docs.
  *
- * The user is strongly encouraged to read the full API documentation (above)
- * before calling this function. */
+ * The user is strongly encouraged to read the full API documentation (above) before
+ * calling this function. */
 ZSTDLIB_STATIC_API void
-ZSTD_registerExternalMatchFinder(
+ZSTD_registerSequenceProducer(
   ZSTD_CCtx* cctx,
-  void* externalMatchState,
-  ZSTD_externalMatchFinder_F* externalMatchFinder
+  void* sequenceProducerState,
+  ZSTD_sequenceProducer_F* sequenceProducer
 );
 
 #endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
index 6a66bedcb8bf81ab324e99e6d138a07d79356114..dc75eeebad916845409cb62f2ca3f85720475c9d 100644 (file)
@@ -95,7 +95,7 @@ typedef enum {
   ZSTD_error_seekableIO          = 102,
   ZSTD_error_dstBuffer_wrong     = 104,
   ZSTD_error_srcBuffer_wrong     = 105,
-  ZSTD_error_externalMatchFinder_failed = 106,
+  ZSTD_error_sequenceProducer_failed = 106,
   ZSTD_error_externalSequences_invalid = 107,
   ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
 } ZSTD_ErrorCode;
index 97c47caffc43a97e56fcd2a4014789392416258f..76ad41126ecef1b526ce7b8f3b397e9f6bf9c542 100644 (file)
@@ -17,8 +17,8 @@ static U32 const HLOG = 10;
 static U32 const MLS = 4;
 static U32 const BADIDX = 0xffffffff;
 
-static size_t simpleExternalMatchFinder(
-  void* externalMatchState,
+static size_t simpleSequenceProducer(
+  void* sequenceProducerState,
   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
   const void* src, size_t srcSize,
   const void* dict, size_t dictSize,
@@ -32,7 +32,7 @@ static size_t simpleExternalMatchFinder(
     size_t seqCount = 0;
     U32 hashTable[HSIZE];
 
-    (void)externalMatchState;
+    (void)sequenceProducerState;
     (void)dict;
     (void)dictSize;
     (void)outSeqsCapacity;
@@ -80,15 +80,15 @@ static size_t simpleExternalMatchFinder(
     return seqCount;
 }
 
-size_t zstreamExternalMatchFinder(
-  void* externalMatchState,
+size_t zstreamSequenceProducer(
+  void* sequenceProducerState,
   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
   const void* src, size_t srcSize,
   const void* dict, size_t dictSize,
   int compressionLevel,
   size_t windowSize
 ) {
-    EMF_testCase const testCase = *((EMF_testCase*)externalMatchState);
+    EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState);
     memset(outSeqs, 0, outSeqsCapacity);
 
     switch (testCase) {
@@ -100,8 +100,8 @@ size_t zstreamExternalMatchFinder(
             outSeqs[0].litLength = (U32)(srcSize);
             return 1;
          case EMF_LOTS_OF_SEQS:
-            return simpleExternalMatchFinder(
-                externalMatchState,
+            return simpleSequenceProducer(
+                sequenceProducerState,
                 outSeqs, outSeqsCapacity,
                 src, srcSize,
                 dict, dictSize,
@@ -135,6 +135,6 @@ size_t zstreamExternalMatchFinder(
             return outSeqsCapacity + 1;
         case EMF_BIG_ERROR:
         default:
-            return ZSTD_EXTERNAL_MATCHFINDER_ERROR;
+            return ZSTD_SEQUENCE_PRODUCER_ERROR;
     }
 }
index 7550bbcebdb6d9d1f974d59bf101c319c5962d4e..e38dc25caa2fabd759af765fe6b3115d584872bd 100644 (file)
@@ -27,8 +27,8 @@ typedef enum {
     EMF_INVALID_LAST_LITS = 8
 } EMF_testCase;
 
-size_t zstreamExternalMatchFinder(
-  void* externalMatchState,
+size_t zstreamSequenceProducer(
+  void* sequenceProducerState,
   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
   const void* src, size_t srcSize,
   const void* dict, size_t dictSize,
index 4b5102ef1be013ebcaa648b10e4652db6c20dfe0..bbb262add1b8b3d9e9c0f67cc13c6ea9cb70739e 100644 (file)
@@ -35,8 +35,8 @@ PRGDIR = ../../programs
 CONTRIBDIR = ../../contrib
 
 # TODO(embg) make it possible to plug in an arbitrary matchfinder as a .o file
-MATCHFINDER_DIR = $(CONTRIBDIR)/externalMatchfinder
-MATCHFINDER_SRC = $(MATCHFINDER_DIR)/matchfinder.c
+MATCHFINDER_DIR = $(CONTRIBDIR)/externalSequenceProducer
+MATCHFINDER_SRC = $(MATCHFINDER_DIR)/sequence_producer.c
 
 FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
        -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
index 6b833471e4b6d5cf5ddd0447162d39434242b80a..411b6391c19ccec6421bfe28719d0db4fa72b589 100644 (file)
@@ -17,7 +17,7 @@
 #include "fuzz_helpers.h"
 #include "zstd.h"
 #include "zdict.h"
-#include "matchfinder.h"
+#include "sequence_producer.h"
 
 const int kMinClevel = -3;
 const int kMaxClevel = 19;
@@ -71,13 +71,13 @@ ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer)
     return params;
 }
 
-static void setExternalMatchFinderParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) {
-    ZSTD_registerExternalMatchFinder(
+static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) {
+    ZSTD_registerSequenceProducer(
         cctx,
         NULL,
-        simpleExternalMatchFinder
+        simpleSequenceProducer
     );
-    setRand(cctx, ZSTD_c_enableMatchFinderFallback, 0, 1, producer);
+    setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer);
     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0));
     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable));
 }
@@ -138,9 +138,9 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
     }
 
     if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) {
-        setExternalMatchFinderParams(cctx, producer);
+        setSequenceProducerParams(cctx, producer);
     } else {
-        ZSTD_registerExternalMatchFinder(cctx, NULL, NULL);
+        ZSTD_registerSequenceProducer(cctx, NULL, NULL);
     }
 }
 
index aff847b4d93d82480fa1ac9b682a536e5a473058..14c4af82fb76c1fb5675829668cc70a3e17b72ed 100644 (file)
@@ -40,7 +40,7 @@
 #include "seqgen.h"
 #include "util.h"
 #include "timefn.h"       /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
-#include "external_matchfinder.h"   /* zstreamExternalMatchFinder, EMF_testCase */
+#include "external_matchfinder.h"   /* zstreamSequenceProducer, EMF_testCase */
 
 /*-************************************
  *  Constants
@@ -1856,14 +1856,14 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
     }
     DISPLAYLEVEL(3, "OK \n");
 
-    DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++);
+    DISPLAYLEVEL(3, "test%3i : Block-Level External Sequence Producer API: ", testNb++);
     {
         size_t const dstBufSize = ZSTD_compressBound(CNBufferSize);
         BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize));
         size_t const checkBufSize = CNBufferSize;
         BYTE* const checkBuf = (BYTE*)malloc(checkBufSize);
         int enableFallback;
-        EMF_testCase externalMatchState;
+        EMF_testCase sequenceProducerState;
 
         CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed");
 
@@ -1871,7 +1871,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
 
         /* Reference external matchfinder outside the test loop to
          * check that the reference is preserved across compressions */
-        ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
+        ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
 
         for (enableFallback = 0; enableFallback <= 1; enableFallback++) {
             size_t testCaseId;
@@ -1892,9 +1892,9 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
             ZSTD_ErrorCode const errorCodes[] = {
                 ZSTD_error_no_error,
                 ZSTD_error_no_error,
-                ZSTD_error_externalMatchFinder_failed,
-                ZSTD_error_externalMatchFinder_failed,
-                ZSTD_error_externalMatchFinder_failed,
+                ZSTD_error_sequenceProducer_failed,
+                ZSTD_error_sequenceProducer_failed,
+                ZSTD_error_sequenceProducer_failed,
                 ZSTD_error_externalSequences_invalid,
                 ZSTD_error_externalSequences_invalid,
                 ZSTD_error_externalSequences_invalid,
@@ -1906,18 +1906,18 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
 
                 int const compressionShouldSucceed = (
                     (errorCodes[testCaseId] == ZSTD_error_no_error) ||
-                    (enableFallback && errorCodes[testCaseId] == ZSTD_error_externalMatchFinder_failed)
+                    (enableFallback && errorCodes[testCaseId] == ZSTD_error_sequenceProducer_failed)
                 );
 
                 int const testWithSequenceValidation = (
                     testCases[testCaseId] == EMF_INVALID_OFFSET
                 );
 
-                externalMatchState = testCases[testCaseId];
+                sequenceProducerState = testCases[testCaseId];
 
                 ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
                 CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_validateSequences, testWithSequenceValidation));
-                CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
+                CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, enableFallback));
                 res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
 
                 if (compressionShouldSucceed) {
@@ -1936,9 +1936,9 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
             /* Test compression with external matchfinder + empty src buffer */
             {
                 size_t res;
-                externalMatchState = EMF_ZERO_SEQS;
+                sequenceProducerState = EMF_ZERO_SEQS;
                 ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
-                CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
+                CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, enableFallback));
                 res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0);
                 CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
                 CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!");
@@ -1947,30 +1947,30 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
 
         /* Test that reset clears the external matchfinder */
         CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
-        externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
-        CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
+        sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
+        CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0));
         CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
 
         /* Test that registering mFinder == NULL clears the external matchfinder */
         ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
-        ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
-        externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
-        CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
-        ZSTD_registerExternalMatchFinder(zc, NULL, NULL); /* clear the external matchfinder */
+        ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
+        sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
+        CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0));
+        ZSTD_registerSequenceProducer(zc, NULL, NULL); /* clear the external matchfinder */
         CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
 
         /* Test that external matchfinder doesn't interact with older APIs */
         ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
-        ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
-        externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */
-        CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
+        ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
+        sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */
+        CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0));
         CHECK_Z(ZSTD_compressCCtx(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize, 3));
 
         /* Test that compression returns the correct error with LDM */
         CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
         {
             size_t res;
-            ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
+            ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
             CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable));
             res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
             CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
@@ -1985,7 +1985,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
         CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
         {
             size_t res;
-            ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
+            ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
             CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, 1));
             res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
             CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");