]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Bugfixes for the External Matchfinder API (#3433)
authorElliot Gorokhovsky <embg@fb.com>
Thu, 19 Jan 2023 15:41:24 +0000 (10:41 -0500)
committerGitHub <noreply@github.com>
Thu, 19 Jan 2023 15:41:24 +0000 (10:41 -0500)
* external matchfinder bugfixes + tests

* small doc fix

lib/compress/zstd_compress.c
lib/zstd.h
tests/zstreamtest.c

index 0fa2e6e2d3da2688bcbf444a5ea695dc6861681f..3a48e7dcd48e21663f9427602c9dbc121b0730a6 100644 (file)
@@ -1599,7 +1599,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
 
     size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
     size_t const externalSeqSpace = useExternalMatchFinder
-        ? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
+        ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
         : 0;
 
     size_t const neededSpace =
@@ -3158,7 +3158,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
             ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
                                                                                     zc->appliedParams.useRowMatchFinder,
                                                                                     dictMode);
-            assert(zc->externalMatchCtx.mFinder == NULL);
             ms->ldmSeqStore = NULL;
             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
         }
@@ -5945,6 +5944,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
     params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);
 
 #ifdef ZSTD_MULTITHREAD
+    /* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
+    RETURN_ERROR_IF(
+        params.useExternalMatchFinder == 1 && params.nbWorkers >= 1,
+        parameter_combination_unsupported,
+        "External matchfinder isn't supported with nbWorkers >= 1"
+    );
+
     if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
         params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
     }
@@ -6774,14 +6780,19 @@ void ZSTD_registerExternalMatchFinder(
     ZSTD_CCtx* zc, void* mState,
     ZSTD_externalMatchFinder_F* mFinder
 ) {
-    ZSTD_externalMatchCtx emctx = {
-        mState,
-        mFinder,
-
-        /* seqBuffer is allocated later (from the cwskp) */
-        NULL, /* seqBuffer */
-        0 /* seqBufferCapacity */
-    };
-    zc->externalMatchCtx = emctx;
-    zc->requestedParams.useExternalMatchFinder = 1;
+    if (mFinder != NULL) {
+        ZSTD_externalMatchCtx emctx = {
+            mState,
+            mFinder,
+
+            /* seqBuffer is allocated later (from the cwskp) */
+            NULL, /* seqBuffer */
+            0 /* seqBufferCapacity */
+        };
+        zc->externalMatchCtx = emctx;
+        zc->requestedParams.useExternalMatchFinder = 1;
+    } else {
+        ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
+        zc->requestedParams.useExternalMatchFinder = 0;
+    }
 }
index 38dfb3a5d968049a9523d8c753beba4ee5c2ed38..a2e02345248a39ca8c5cb61d69176c258291567d 100644 (file)
@@ -2840,8 +2840,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* bloc
  * externalMatchState.
  *
  * *** LIMITATIONS ***
- * External matchfinders are compatible with all zstd compression APIs. There are
- * only two limitations.
+ * External matchfinders are compatible with all zstd compression APIs which respect
+ * advanced parameters. However, there are three limitations:
  *
  * First, the ZSTD_c_enableLongDistanceMatching cParam is not supported.
  * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an
@@ -2863,7 +2863,11 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* bloc
  *     APIs, work with the external matchfinder, but the external matchfinder won't
  *     receive any history from the previous block. Each block is an independent chunk.
  *
- * Long-term, we plan to overcome both limitations. There is no technical blocker to
+ * Third, multi-threading within a single compression is not supported. In other words,
+ * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external matchfinder is registered.
+ * Multi-threading across compressions is fine: simply create one CCtx per thread.
+ *
+ * Long-term, we plan to overcome all three limitations. There is no technical blocker to
  * overcoming them. It is purely a question of engineering effort.
  */
 
@@ -2886,6 +2890,17 @@ typedef size_t ZSTD_externalMatchFinder_F (
  * compressions. It will remain set until the user explicitly resets compression
  * parameters.
  *
+ * External matchfinder registration is considered to be an "advanced parameter",
+ * part of the "advanced API". This means it will only have an effect on
+ * compression APIs which respect advanced parameters, such as compress2() and
+ * compressStream(). Older compression APIs such as compressCCtx(), which predate
+ * the introduction of "advanced parameters", will ignore any external matchfinder
+ * setting.
+ *
+ * The external matchfinder can be "cleared" by registering a NULL external
+ * matchfinder function pointer. This removes all limitations described above in
+ * the "LIMITATIONS" section of the API docs.
+ *
  * The user is strongly encouraged to read the full API documentation (above)
  * before calling this function. */
 ZSTDLIB_STATIC_API void
index 2febfff91b4d0f00d069ae7e93b25f7d134e8501..4a621692dcd848967e28f9eac6939401561bc5e7 100644 (file)
@@ -1846,15 +1846,11 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
 
         CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed");
 
-        ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+        CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
 
         /* Reference external matchfinder outside the test loop to
          * check that the reference is preserved across compressions */
-        ZSTD_registerExternalMatchFinder(
-            zc,
-            &externalMatchState,
-            zstreamExternalMatchFinder
-        );
+        ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
 
         for (enableFallback = 0; enableFallback < 1; enableFallback++) {
             size_t testCaseId;
@@ -1916,11 +1912,56 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
         }
 
         /* Test that reset clears the external matchfinder */
+        CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
+        externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
+        CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
+        CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
+
+        /* Test that registering mFinder == NULL clears the external matchfinder */
         ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+        ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
         externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
         CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
+        ZSTD_registerExternalMatchFinder(zc, NULL, NULL); /* clear the external matchfinder */
         CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
 
+        /* Test that external matchfinder doesn't interact with older APIs */
+        ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+        ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
+        externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */
+        CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
+        CHECK_Z(ZSTD_compressCCtx(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize, 3));
+
+        /* Test that compression returns the correct error with LDM */
+        CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
+        {
+            size_t res;
+            ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
+            CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable));
+            res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+            CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
+            CHECK(
+                ZSTD_getErrorCode(res) != ZSTD_error_parameter_combination_unsupported,
+                "EMF: Wrong error code: %s", ZSTD_getErrorName(res)
+            );
+        }
+
+#ifdef ZSTD_MULTITHREAD
+        /* Test that compression returns the correct error with nbWorkers > 0 */
+        CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
+        {
+            size_t res;
+            ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
+            CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, 1));
+            res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+            CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
+            CHECK(
+                ZSTD_getErrorCode(res) != ZSTD_error_parameter_combination_unsupported,
+                "EMF: Wrong error code: %s", ZSTD_getErrorName(res)
+            );
+        }
+#endif
+
         free(dstBuf);
         free(checkBuf);
     }