From 345bcb5ff7001fbe2dfd59974808170c9e0f4d5d Mon Sep 17 00:00:00 2001 From: Adenilson Cavalcanti Date: Tue, 18 Jun 2024 17:20:35 -0700 Subject: [PATCH] [zstd][dict] Ensure that dictionary training functions are fully reentrant The two main functions used for dictionary training using the COVER algorithm require initialization of a COVER_ctx_t where a call to qsort() is performed. The issue is that the standard C99 qsort() function doesn't offer a way to pass an extra parameter for the comparison function callback (e.g. a pointer to a context) and currently zstd relies on a *global* static variable to hold a pointer to a context needed to perform the sort operation. If a zstd library user invokes either ZDICT_trainFromBuffer_cover or ZDICT_optimizeTrainFromBuffer_cover from multiple threads, the global context may be overwritten before/during the call/execution to qsort() in the initialization of the COVER_ctx_t, thus yielding to crashes and other bad things (Tm) as reported on issue #4045. Enters qsort_r(): it was designed to address precisely this situation, to quote from the documention [1]: "the comparison function does not need to use global variables to pass through arbitrary arguments, and is therefore reentrant and safe to use in threads." It is available with small variations for multiple OSes (GNU, BSD[2], Windows[3]), and the ISO C11 [4] standard features on annex B-21 qsort_s() as part of the . Let's hope that compilers eventually catch up with it. For now, we have to handle the small variations in function parameters for each platform. The current fix solves the problem by allowing each executing thread pass its own COVER_ctx_t instance to qsort_r(), removing the use of a global pointer and allowing the code to be reentrant. Unfortunately for *BSD, we cannot leverage qsort_r() given that its API has changed on newer versions of FreeBSD (14.0) and the other BSD variants (e.g. NetBSD, OpenBSD) don't implement it. For such cases we provide a fallback that will work only requiring support for compilers implementing support for C90. [1] https://man7.org/linux/man-pages/man3/qsort_r.3.html [2] https://man.freebsd.org/cgi/man.cgi?query=qsort_r [3] https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/qsort-s?view=msvc-170 [4] https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1548.pdf --- lib/common/zstd_deps.h | 12 +++++++ lib/dictBuilder/cover.c | 79 +++++++++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 19 deletions(-) diff --git a/lib/common/zstd_deps.h b/lib/common/zstd_deps.h index 4d767ae9b..f41a973c3 100644 --- a/lib/common/zstd_deps.h +++ b/lib/common/zstd_deps.h @@ -24,6 +24,18 @@ #ifndef ZSTD_DEPS_COMMON #define ZSTD_DEPS_COMMON +/* Even though we use qsort_r only for the dictionary builder, the macro + * _GNU_SOURCE has to be declared *before* the inclusion of any standard + * header and the script 'combine.sh' combines the whole zstd source code + * in a single file. + */ +#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) || \ + defined(__CYGWIN__) || defined(__MSYS__) +#if !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif +#endif + #include #include #include diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 44f9029ac..386a4d572 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -21,8 +21,17 @@ /*-************************************* * Dependencies ***************************************/ +/* qsort_r is an extension. */ +#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) || \ + defined(__CYGWIN__) || defined(__MSYS__) +#if !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif +#endif + #include /* fprintf */ -#include /* malloc, free, qsort */ +#include /* malloc, free, qsort_r */ + #include /* memset */ #include /* clock */ @@ -232,8 +241,10 @@ typedef struct { unsigned d; } COVER_ctx_t; -/* We need a global context for qsort... */ +#if !defined(_GNU_SOURCE) && !defined(__APPLE__) && !defined(_MSC_VER) +/* C90 only offers qsort() that needs a global context. */ static COVER_ctx_t *g_coverCtx = NULL; +#endif /*-************************************* * Helper functions @@ -276,11 +287,15 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { /** * Same as COVER_cmp() except ties are broken by pointer value - * NOTE: g_coverCtx must be set to call this function. A global is required because - * qsort doesn't take an opaque pointer. */ -static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) { - int result = COVER_cmp(g_coverCtx, lp, rp); +#if (defined(_WIN32) && defined(_MSC_VER)) || defined(__APPLE__) +static int WIN_CDECL COVER_strict_cmp(void* g_coverCtx, const void* lp, const void* rp) { +#elif defined(_GNU_SOURCE) +static int COVER_strict_cmp(const void *lp, const void *rp, void *g_coverCtx) { +#else /* C90 fallback.*/ +static int COVER_strict_cmp(const void *lp, const void *rp) { +#endif + int result = COVER_cmp((COVER_ctx_t*)g_coverCtx, lp, rp); if (result == 0) { result = lp < rp ? -1 : 1; } @@ -289,14 +304,50 @@ static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) { /** * Faster version for d <= 8. */ -static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) { - int result = COVER_cmp8(g_coverCtx, lp, rp); +#if (defined(_WIN32) && defined(_MSC_VER)) || defined(__APPLE__) +static int WIN_CDECL COVER_strict_cmp8(void* g_coverCtx, const void* lp, const void* rp) { +#elif defined(_GNU_SOURCE) +static int COVER_strict_cmp8(const void *lp, const void *rp, void *g_coverCtx) { +#else /* C90 fallback.*/ +static int COVER_strict_cmp8(const void *lp, const void *rp) { +#endif + int result = COVER_cmp8((COVER_ctx_t*)g_coverCtx, lp, rp); if (result == 0) { result = lp < rp ? -1 : 1; } return result; } +/** + * Abstract away divergence of qsort_r() parameters. + * Hopefully when C11 become the norm, we will be able + * to clean it up. + */ +static void stableSort(COVER_ctx_t *ctx) { +#if defined(__APPLE__) + qsort_r(ctx->suffix, ctx->suffixSize, sizeof(U32), + ctx, + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#elif defined(_GNU_SOURCE) + qsort_r(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp), + ctx); +#elif defined(_WIN32) && defined(_MSC_VER) + qsort_s(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp), + ctx); +#elif defined(__OpenBSD__) + g_coverCtx = ctx; + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#else /* C90 fallback.*/ + g_coverCtx = ctx; + /* TODO(cavalcanti): implement a reentrant qsort() when is not available. */ + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#endif +} + /** * Returns the first pointer in [first, last) whose element does not compare * less than value. If no such element exists it returns last. @@ -620,17 +671,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, for (i = 0; i < ctx->suffixSize; ++i) { ctx->suffix[i] = i; } - /* qsort doesn't take an opaque pointer, so pass as a global. - * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. - */ - g_coverCtx = ctx; -#if defined(__OpenBSD__) - mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), - (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); -#else - qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), - (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); -#endif + stableSort(ctx); } DISPLAYLEVEL(2, "Computing frequencies\n"); /* For each dmer group (group of positions with the same first d bytes): -- 2.47.2