From: Yann Collet Date: Fri, 18 Oct 2024 01:40:47 +0000 (-0700) Subject: ZSTD_splitBlock_4k() uses externally provided workspace X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=56d0b4b3a694ad601f27f05ee5a26330049ca62c;p=thirdparty%2Fzstd.git ZSTD_splitBlock_4k() uses externally provided workspace ideally, this workspace would be provided from the ZSTD_CCtx* state --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d9f10f849..37b87336c 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4489,8 +4489,11 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, #include "zstd_preSplit.h" -static size_t ZSTD_optimalBlockSize(const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings) + +static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings) { + S64 workspace[ZSTD_SLIPBLOCK_WORKSPACESIZE / 8]; + (void)cctx; /* note: we currenly only split full blocks (128 KB) * and when there is more than 128 KB input remaining */ @@ -4499,7 +4502,7 @@ static size_t ZSTD_optimalBlockSize(const void* src, size_t srcSize, size_t bloc /* dynamic splitting has a cpu cost for analysis, * due to that cost it's only used for btlazy2+ strategies */ if (strat >= ZSTD_btlazy2) - return ZSTD_splitBlock_4k(src, srcSize, blockSizeMax); + return ZSTD_splitBlock_4k(src, srcSize, blockSizeMax, workspace, sizeof(workspace)); /* blind split strategy * no cpu cost, but can over-split homegeneous data. * heuristic, tested as being "generally better". @@ -4537,7 +4540,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, while (remaining) { ZSTD_matchState_t* const ms = &cctx->blockState.matchState; U32 const lastBlock = lastFrameChunk & (blockSizeMax >= remaining); - size_t const blockSize = ZSTD_optimalBlockSize(ip, remaining, blockSizeMax, cctx->appliedParams.cParams.strategy, savings); + size_t const blockSize = ZSTD_optimalBlockSize(cctx, ip, remaining, blockSizeMax, cctx->appliedParams.cParams.strategy, savings); assert(blockSize <= remaining); /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding diff --git a/lib/compress/zstd_preSplit.c b/lib/compress/zstd_preSplit.c index 3256905df..ba9dc6487 100644 --- a/lib/compress/zstd_preSplit.c +++ b/lib/compress/zstd_preSplit.c @@ -10,6 +10,7 @@ #include "../common/mem.h" /* S64 */ #include "../common/zstd_deps.h" /* ZSTD_memset */ +#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */ #include "zstd_preSplit.h" @@ -30,22 +31,19 @@ static unsigned hash2(const void *p) } -/* ==================================== */ -/* Global array -> for testing only !!! */ -/* ==================================== */ typedef struct { int events[HASHTABLESIZE]; S64 nbEvents; } FingerPrint; -static FingerPrint pastEvents; -static FingerPrint newEvents; +typedef struct { + FingerPrint pastEvents; + FingerPrint newEvents; +} FPStats; -static void initStats(void) +static void initStats(FPStats* fpstats) { - ZSTD_memset(&pastEvents, 0, sizeof(pastEvents)); - ZSTD_memset(&newEvents, 0, sizeof(newEvents)); + ZSTD_memset(fpstats, 0, sizeof(FPStats)); } -/* ==================================== */ static void addToFingerprint(FingerPrint* fp, const void* src, size_t s) { @@ -103,14 +101,14 @@ static void mergeEvents(FingerPrint* acc, const FingerPrint* newfp) acc->nbEvents += newfp->nbEvents; } -static void flushEvents(void) +static void flushEvents(FPStats* fpstats) { size_t n; for (n = 0; n < HASHTABLESIZE; n++) { - pastEvents.events[n] = newEvents.events[n]; + fpstats->pastEvents.events[n] = fpstats->newEvents.events[n]; } - pastEvents.nbEvents = newEvents.nbEvents; - ZSTD_memset(&newEvents, 0, sizeof(newEvents)); + fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents; + ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents)); } static void removeEvents(FingerPrint* acc, const FingerPrint* slice) @@ -125,23 +123,30 @@ static void removeEvents(FingerPrint* acc, const FingerPrint* slice) #define CHUNKSIZE (8 << 10) /* Note: technically, we use CHUNKSIZE, so that's 8 KB */ -size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize, size_t blockSizeMax) +size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize, + size_t blockSizeMax, + void* workspace, size_t wkspSize) { + FPStats* const fpstats = (FPStats*)workspace; const char* p = (const char*)src; int penalty = THRESHOLD_PENALTY; size_t pos = 0; if (srcSize <= blockSizeMax) return srcSize; assert(blockSizeMax == (128 << 10)); + assert(workspace != NULL); + assert((size_t)workspace % 8 == 0); + ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE == sizeof(FPStats)); + assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; - initStats(); + initStats(fpstats); for (pos = 0; pos < blockSizeMax;) { assert(pos <= blockSizeMax - CHUNKSIZE); - recordFingerprint(&newEvents, p + pos, CHUNKSIZE); - if (compareFingerprints(&pastEvents, &newEvents, penalty)) { + recordFingerprint(&fpstats->newEvents, p + pos, CHUNKSIZE); + if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty)) { return pos; } else { - mergeEvents(&pastEvents, &newEvents); - ZSTD_memset(&newEvents, 0, sizeof(newEvents)); + mergeEvents(&fpstats->pastEvents, &fpstats->newEvents); + ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents)); penalty = penalty - 1 + (penalty == 0); } pos += CHUNKSIZE; diff --git a/lib/compress/zstd_preSplit.h b/lib/compress/zstd_preSplit.h index 148fc1936..b0b6bb762 100644 --- a/lib/compress/zstd_preSplit.h +++ b/lib/compress/zstd_preSplit.h @@ -17,7 +17,13 @@ extern "C" { #endif -size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize, size_t blockSizeMax); +#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208 + +/* note: + * @workspace must be aligned on 8-bytes boundaries + * @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE + */ +size_t ZSTD_splitBlock_4k(const void* src, size_t srcSize, size_t blockSizeMax, void* workspace, size_t wkspSize); #if defined (__cplusplus) }