ZSTD_cwksp_move(&cctx->workspace, &ws);
cctx->staticSize = workspaceSize;
- /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
+ /* statically sized space. tmpWorkspace never moves (but prev/next block swap places) */
if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
- cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
+ cctx->tmpWorkspace = ZSTD_cwksp_reserve_object_aligned(&cctx->workspace, TMP_WORKSPACE_SIZE, sizeof(S64));
+ cctx->tmpWkspSize = TMP_WORKSPACE_SIZE;
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
return cctx;
}
+ hSize * sizeof(U32)
+ h3Size * sizeof(U32);
size_t const optPotentialSpace =
- ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
- + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
- + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
- + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
- + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
- + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
+ ZSTD_cwksp_aligned64_alloc_size((MaxML+1) * sizeof(U32))
+ + ZSTD_cwksp_aligned64_alloc_size((MaxLL+1) * sizeof(U32))
+ + ZSTD_cwksp_aligned64_alloc_size((MaxOff+1) * sizeof(U32))
+ + ZSTD_cwksp_aligned64_alloc_size((1<<Litbits) * sizeof(U32))
+ + ZSTD_cwksp_aligned64_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
+ + ZSTD_cwksp_aligned64_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
- ? ZSTD_cwksp_aligned_alloc_size(hSize)
+ ? ZSTD_cwksp_aligned64_alloc_size(hSize)
: 0;
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
- + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
+ + ZSTD_cwksp_aligned64_alloc_size(maxNbSeq * sizeof(seqDef))
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
- size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
+ size_t const tmpWorkSpace = ZSTD_cwksp_aligned_alloc_size(TMP_WORKSPACE_SIZE, sizeof(S64));
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ?
- ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
+ ZSTD_cwksp_aligned64_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
size_t const externalSeqSpace = useSequenceProducer
- ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
+ ? ZSTD_cwksp_aligned64_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
: 0;
size_t const neededSpace =
cctxSpace +
- entropySpace +
+ tmpWorkSpace +
blockStateSpace +
ldmSpace +
ldmSeqSpace +
DEBUGLOG(5, "reserving object space");
/* Statically sized space.
- * entropyWorkspace never moves,
+ * tmpWorkspace never moves,
* though prev/next block swap places */
assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
- zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
- RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
+ zc->tmpWorkspace = ZSTD_cwksp_reserve_object_aligned(ws, TMP_WORKSPACE_SIZE, sizeof(S64));
+ RETURN_ERROR_IF(zc->tmpWorkspace == NULL, memory_allocation, "couldn't allocate tmpWorkspace");
+ zc->tmpWkspSize = TMP_WORKSPACE_SIZE;
} }
ZSTD_cwksp_clear(ws);
&zc->blockState.nextCBlock->entropy,
&zc->appliedParams,
entropyMetadata,
- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");
+ zc->tmpWorkspace, zc->tmpWkspSize), "");
return ZSTD_estimateBlockSize(
seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
(size_t)(seqStore->sequences - seqStore->sequencesStart),
&zc->blockState.nextCBlock->entropy,
entropyMetadata,
- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
+ zc->tmpWorkspace, zc->tmpWkspSize,
(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
}
&zc->appliedParams,
op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
srcSize,
- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+ zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */,
zc->bmi2);
FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
&zc->appliedParams,
dst, dstCapacity,
srcSize,
- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+ zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */,
zc->bmi2);
if (frame &&
#include "zstd_preSplit.h"
-
static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings)
{
- S64 workspace[ZSTD_SLIPBLOCK_WORKSPACESIZE / 8];
- (void)cctx;
- /* note: we currenly only split full blocks (128 KB)
- * and when there is more than 128 KB input remaining
+ /* note: conservatively only split full blocks (128 KB) currently,
+ * and even then only if there is more than 128 KB input remaining.
*/
if (srcSize <= 128 KB || blockSizeMax < 128 KB)
return MIN(srcSize, blockSizeMax);
/* dynamic splitting has a cpu cost for analysis,
* due to that cost it's only used for btlazy2+ strategies */
if (strat >= ZSTD_btlazy2)
- return ZSTD_splitBlock_4k(src, srcSize, blockSizeMax, workspace, sizeof(workspace));
+ return ZSTD_splitBlock_4k(src, srcSize, blockSizeMax, cctx->tmpWorkspace, cctx->tmpWkspSize);
/* blind split strategy
* no cpu cost, but can over-split homegeneous data.
* heuristic, tested as being "generally better".
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
cdict->dictContentSize, cdict->dictContentType, dtlm,
- ZSTD_tfp_forCCtx, cctx->entropyWorkspace)
+ ZSTD_tfp_forCCtx, cctx->tmpWorkspace)
: ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
- dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);
+ dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->tmpWorkspace);
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
assert(dictID <= UINT_MAX);
cctx->dictID = (U32)dictID;
&cctx->appliedParams,
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
blockSize,
- cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+ cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */,
cctx->bmi2);
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
* to figure out how much space you need for the matchState tables. Everything
* else is though.
*
- * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned64_alloc_size().
*/
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
if (size == 0)
#endif
}
+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size, size_t alignment) {
+ return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, alignment));
+}
+
/**
* Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
* Used to determine the number of bytes required for a given "aligned".
*/
-MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
- return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
+MEM_STATIC size_t ZSTD_cwksp_aligned64_alloc_size(size_t size) {
+ return ZSTD_cwksp_aligned_alloc_size(size, ZSTD_CWKSP_ALIGNMENT_BYTES);
}
/**
* which we can allocate from the end of the workspace.
*/
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
- return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
+ return (void*)((size_t)ws->workspaceEnd & (size_t)~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
}
/**
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
{
- void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
- ZSTD_cwksp_alloc_aligned);
+ void* const ptr = ZSTD_cwksp_reserve_internal(ws,
+ ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
+ ZSTD_cwksp_alloc_aligned);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
return ptr;
}
}
/**
- * Aligned on sizeof(void*).
* Note : should happen only once, at workspace first initialization
*/
-MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
+MEM_STATIC void*
+ZSTD_cwksp_reserve_object_aligned(ZSTD_cwksp* ws, size_t bytes, size_t alignment)
{
- size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+ size_t const roundedBytes = ZSTD_cwksp_align(bytes, alignment);
void* alloc = ws->objectEnd;
void* end = (BYTE*)alloc + roundedBytes;
DEBUGLOG(4,
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
- assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
- assert(bytes % ZSTD_ALIGNOF(void*) == 0);
+ assert((size_t)alloc % alignment == 0);
+ assert(bytes % alignment == 0);
ZSTD_cwksp_assert_internal_consistency(ws);
/* we must be in the first phase, no advance is possible */
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
return alloc;
}
+/**
+ * Aligned on sizeof(void*).
+ * Note : should happen only once, at workspace first initialization
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
+{
+ return ZSTD_cwksp_reserve_object_aligned(ws, bytes, sizeof(void*));
+}
+
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
{
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");