2 * Copyright (c) Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /*-*************************************
13 ***************************************/
14 #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
15 #include "../common/cpu.h"
16 #include "../common/mem.h"
17 #include "hist.h" /* HIST_countFast_wksp */
18 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
19 #include "../common/fse.h"
20 #define HUF_STATIC_LINKING_ONLY
21 #include "../common/huf.h"
22 #include "zstd_compress_internal.h"
23 #include "zstd_compress_sequences.h"
24 #include "zstd_compress_literals.h"
25 #include "zstd_fast.h"
26 #include "zstd_double_fast.h"
27 #include "zstd_lazy.h"
30 #include "zstd_compress_superblock.h"
32 /* ***************************************************************
34 *****************************************************************/
37 * Select how default decompression function ZSTD_compress() allocates its context,
38 * on stack (0, default), or into heap (1).
39 * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
41 #ifndef ZSTD_COMPRESS_HEAPMODE
42 # define ZSTD_COMPRESS_HEAPMODE 0
46 /*-*************************************
48 ***************************************/
49 /* ZSTD_compressBound()
50 * Note that the result from this function is only compatible with the "normal"
51 * full-block strategy.
52 * When there are a lot of small blocks due to frequent flush in streaming mode
53 * the overhead of headers can make the compressed data to be larger than the
54 * return value of ZSTD_compressBound().
56 size_t ZSTD_compressBound(size_t srcSize
) {
57 return ZSTD_COMPRESSBOUND(srcSize
);
61 /*-*************************************
62 * Context memory management
63 ***************************************/
65 const void* dictContent
;
66 size_t dictContentSize
;
67 ZSTD_dictContentType_e dictContentType
; /* The dictContentType the CDict was created with */
68 U32
* entropyWorkspace
; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
70 ZSTD_matchState_t matchState
;
71 ZSTD_compressedBlockState_t cBlockState
;
72 ZSTD_customMem customMem
;
74 int compressionLevel
; /* 0 indicates that advanced API was used to select CDict params */
75 ZSTD_useRowMatchFinderMode_e useRowMatchFinder
; /* Indicates whether the CDict was created with params that would use
76 * row-based matchfinder. Unless the cdict is reloaded, we will use
77 * the same greedy/lazy matchfinder at compression time.
79 }; /* typedef'd to ZSTD_CDict within "zstd.h" */
81 ZSTD_CCtx
* ZSTD_createCCtx(void)
83 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem
);
86 static void ZSTD_initCCtx(ZSTD_CCtx
* cctx
, ZSTD_customMem memManager
)
89 ZSTD_memset(cctx
, 0, sizeof(*cctx
));
90 cctx
->customMem
= memManager
;
91 cctx
->bmi2
= ZSTD_cpuid_bmi2(ZSTD_cpuid());
92 { size_t const err
= ZSTD_CCtx_reset(cctx
, ZSTD_reset_parameters
);
93 assert(!ZSTD_isError(err
));
98 ZSTD_CCtx
* ZSTD_createCCtx_advanced(ZSTD_customMem customMem
)
100 ZSTD_STATIC_ASSERT(zcss_init
==0);
101 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN
==(0ULL - 1));
102 if ((!customMem
.customAlloc
) ^ (!customMem
.customFree
)) return NULL
;
103 { ZSTD_CCtx
* const cctx
= (ZSTD_CCtx
*)ZSTD_customMalloc(sizeof(ZSTD_CCtx
), customMem
);
104 if (!cctx
) return NULL
;
105 ZSTD_initCCtx(cctx
, customMem
);
110 ZSTD_CCtx
* ZSTD_initStaticCCtx(void* workspace
, size_t workspaceSize
)
114 if (workspaceSize
<= sizeof(ZSTD_CCtx
)) return NULL
; /* minimum size */
115 if ((size_t)workspace
& 7) return NULL
; /* must be 8-aligned */
116 ZSTD_cwksp_init(&ws
, workspace
, workspaceSize
, ZSTD_cwksp_static_alloc
);
118 cctx
= (ZSTD_CCtx
*)ZSTD_cwksp_reserve_object(&ws
, sizeof(ZSTD_CCtx
));
119 if (cctx
== NULL
) return NULL
;
121 ZSTD_memset(cctx
, 0, sizeof(ZSTD_CCtx
));
122 ZSTD_cwksp_move(&cctx
->workspace
, &ws
);
123 cctx
->staticSize
= workspaceSize
;
125 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
126 if (!ZSTD_cwksp_check_available(&cctx
->workspace
, ENTROPY_WORKSPACE_SIZE
+ 2 * sizeof(ZSTD_compressedBlockState_t
))) return NULL
;
127 cctx
->blockState
.prevCBlock
= (ZSTD_compressedBlockState_t
*)ZSTD_cwksp_reserve_object(&cctx
->workspace
, sizeof(ZSTD_compressedBlockState_t
));
128 cctx
->blockState
.nextCBlock
= (ZSTD_compressedBlockState_t
*)ZSTD_cwksp_reserve_object(&cctx
->workspace
, sizeof(ZSTD_compressedBlockState_t
));
129 cctx
->entropyWorkspace
= (U32
*)ZSTD_cwksp_reserve_object(&cctx
->workspace
, ENTROPY_WORKSPACE_SIZE
);
130 cctx
->bmi2
= ZSTD_cpuid_bmi2(ZSTD_cpuid());
135 * Clears and frees all of the dictionaries in the CCtx.
137 static void ZSTD_clearAllDicts(ZSTD_CCtx
* cctx
)
139 ZSTD_customFree(cctx
->localDict
.dictBuffer
, cctx
->customMem
);
140 ZSTD_freeCDict(cctx
->localDict
.cdict
);
141 ZSTD_memset(&cctx
->localDict
, 0, sizeof(cctx
->localDict
));
142 ZSTD_memset(&cctx
->prefixDict
, 0, sizeof(cctx
->prefixDict
));
146 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict
)
148 size_t const bufferSize
= dict
.dictBuffer
!= NULL
? dict
.dictSize
: 0;
149 size_t const cdictSize
= ZSTD_sizeof_CDict(dict
.cdict
);
150 return bufferSize
+ cdictSize
;
153 static void ZSTD_freeCCtxContent(ZSTD_CCtx
* cctx
)
155 assert(cctx
!= NULL
);
156 assert(cctx
->staticSize
== 0);
157 ZSTD_clearAllDicts(cctx
);
158 #ifdef ZSTD_MULTITHREAD
159 ZSTDMT_freeCCtx(cctx
->mtctx
); cctx
->mtctx
= NULL
;
161 ZSTD_cwksp_free(&cctx
->workspace
, cctx
->customMem
);
164 size_t ZSTD_freeCCtx(ZSTD_CCtx
* cctx
)
166 if (cctx
==NULL
) return 0; /* support free on NULL */
167 RETURN_ERROR_IF(cctx
->staticSize
, memory_allocation
,
168 "not compatible with static CCtx");
170 int cctxInWorkspace
= ZSTD_cwksp_owns_buffer(&cctx
->workspace
, cctx
);
171 ZSTD_freeCCtxContent(cctx
);
172 if (!cctxInWorkspace
) {
173 ZSTD_customFree(cctx
, cctx
->customMem
);
180 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx
* cctx
)
182 #ifdef ZSTD_MULTITHREAD
183 return ZSTDMT_sizeof_CCtx(cctx
->mtctx
);
191 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx
* cctx
)
193 if (cctx
==NULL
) return 0; /* support sizeof on NULL */
194 /* cctx may be in the workspace */
195 return (cctx
->workspace
.workspace
== cctx
? 0 : sizeof(*cctx
))
196 + ZSTD_cwksp_sizeof(&cctx
->workspace
)
197 + ZSTD_sizeof_localDict(cctx
->localDict
)
198 + ZSTD_sizeof_mtctx(cctx
);
201 size_t ZSTD_sizeof_CStream(const ZSTD_CStream
* zcs
)
203 return ZSTD_sizeof_CCtx(zcs
); /* same object */
206 /* private API call, for dictBuilder only */
207 const seqStore_t
* ZSTD_getSeqStore(const ZSTD_CCtx
* ctx
) { return &(ctx
->seqStore
); }
209 /* Returns true if the strategy supports using a row based matchfinder */
210 static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy
) {
211 return (strategy
>= ZSTD_greedy
&& strategy
<= ZSTD_lazy2
);
214 /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
215 * for this compression.
217 static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy
, const ZSTD_useRowMatchFinderMode_e mode
) {
218 assert(mode
!= ZSTD_urm_auto
);
219 return ZSTD_rowMatchFinderSupported(strategy
) && (mode
== ZSTD_urm_enableRowMatchFinder
);
222 /* Returns row matchfinder usage enum given an initial mode and cParams */
223 static ZSTD_useRowMatchFinderMode_e
ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode
,
224 const ZSTD_compressionParameters
* const cParams
) {
225 #if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON))
226 int const kHasSIMD128
= 1;
228 int const kHasSIMD128
= 0;
230 if (mode
!= ZSTD_urm_auto
) return mode
; /* if requested enabled, but no SIMD, we still will use row matchfinder */
231 mode
= ZSTD_urm_disableRowMatchFinder
;
232 if (!ZSTD_rowMatchFinderSupported(cParams
->strategy
)) return mode
;
234 if (cParams
->windowLog
> 14) mode
= ZSTD_urm_enableRowMatchFinder
;
236 if (cParams
->windowLog
> 17) mode
= ZSTD_urm_enableRowMatchFinder
;
241 /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
242 static int ZSTD_allocateChainTable(const ZSTD_strategy strategy
,
243 const ZSTD_useRowMatchFinderMode_e useRowMatchFinder
,
244 const U32 forDDSDict
) {
245 assert(useRowMatchFinder
!= ZSTD_urm_auto
);
246 /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
247 * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
249 return forDDSDict
|| ((strategy
!= ZSTD_fast
) && !ZSTD_rowMatchFinderUsed(strategy
, useRowMatchFinder
));
252 /* Returns 1 if compression parameters are such that we should
253 * enable long distance matching (wlog >= 27, strategy >= btopt).
254 * Returns 0 otherwise.
256 static U32
ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters
* const cParams
) {
257 return cParams
->strategy
>= ZSTD_btopt
&& cParams
->windowLog
>= 27;
260 /* Returns 1 if compression parameters are such that we should
261 * enable blockSplitter (wlog >= 17, strategy >= btopt).
262 * Returns 0 otherwise.
264 static U32
ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters
* const cParams
) {
265 return cParams
->strategy
>= ZSTD_btopt
&& cParams
->windowLog
>= 17;
268 static ZSTD_CCtx_params
ZSTD_makeCCtxParamsFromCParams(
269 ZSTD_compressionParameters cParams
)
271 ZSTD_CCtx_params cctxParams
;
272 /* should not matter, as all cParams are presumed properly defined */
273 ZSTD_CCtxParams_init(&cctxParams
, ZSTD_CLEVEL_DEFAULT
);
274 cctxParams
.cParams
= cParams
;
276 /* Adjust advanced params according to cParams */
277 if (ZSTD_CParams_shouldEnableLdm(&cParams
)) {
278 DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
279 cctxParams
.ldmParams
.enableLdm
= 1;
280 /* LDM is enabled by default for optimal parser and window size >= 128MB */
281 ZSTD_ldm_adjustParameters(&cctxParams
.ldmParams
, &cParams
);
282 assert(cctxParams
.ldmParams
.hashLog
>= cctxParams
.ldmParams
.bucketSizeLog
);
283 assert(cctxParams
.ldmParams
.hashRateLog
< 32);
286 if (ZSTD_CParams_useBlockSplitter(&cParams
)) {
287 DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params");
288 cctxParams
.splitBlocks
= 1;
291 cctxParams
.useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(cctxParams
.useRowMatchFinder
, &cParams
);
292 assert(!ZSTD_checkCParams(cParams
));
296 static ZSTD_CCtx_params
* ZSTD_createCCtxParams_advanced(
297 ZSTD_customMem customMem
)
299 ZSTD_CCtx_params
* params
;
300 if ((!customMem
.customAlloc
) ^ (!customMem
.customFree
)) return NULL
;
301 params
= (ZSTD_CCtx_params
*)ZSTD_customCalloc(
302 sizeof(ZSTD_CCtx_params
), customMem
);
303 if (!params
) { return NULL
; }
304 ZSTD_CCtxParams_init(params
, ZSTD_CLEVEL_DEFAULT
);
305 params
->customMem
= customMem
;
309 ZSTD_CCtx_params
* ZSTD_createCCtxParams(void)
311 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem
);
314 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params
* params
)
316 if (params
== NULL
) { return 0; }
317 ZSTD_customFree(params
, params
->customMem
);
321 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params
* params
)
323 return ZSTD_CCtxParams_init(params
, ZSTD_CLEVEL_DEFAULT
);
326 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params
* cctxParams
, int compressionLevel
) {
327 RETURN_ERROR_IF(!cctxParams
, GENERIC
, "NULL pointer!");
328 ZSTD_memset(cctxParams
, 0, sizeof(*cctxParams
));
329 cctxParams
->compressionLevel
= compressionLevel
;
330 cctxParams
->fParams
.contentSizeFlag
= 1;
334 #define ZSTD_NO_CLEVEL 0
337 * Initializes the cctxParams from params and compressionLevel.
338 * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
340 static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params
* cctxParams
, ZSTD_parameters
const* params
, int compressionLevel
)
342 assert(!ZSTD_checkCParams(params
->cParams
));
343 ZSTD_memset(cctxParams
, 0, sizeof(*cctxParams
));
344 cctxParams
->cParams
= params
->cParams
;
345 cctxParams
->fParams
= params
->fParams
;
346 /* Should not matter, as all cParams are presumed properly defined.
347 * But, set it for tracing anyway.
349 cctxParams
->compressionLevel
= compressionLevel
;
350 cctxParams
->useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(cctxParams
->useRowMatchFinder
, ¶ms
->cParams
);
351 DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams
->useRowMatchFinder
);
354 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params
* cctxParams
, ZSTD_parameters params
)
356 RETURN_ERROR_IF(!cctxParams
, GENERIC
, "NULL pointer!");
357 FORWARD_IF_ERROR( ZSTD_checkCParams(params
.cParams
) , "");
358 ZSTD_CCtxParams_init_internal(cctxParams
, ¶ms
, ZSTD_NO_CLEVEL
);
363 * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
364 * @param param Validated zstd parameters.
366 static void ZSTD_CCtxParams_setZstdParams(
367 ZSTD_CCtx_params
* cctxParams
, const ZSTD_parameters
* params
)
369 assert(!ZSTD_checkCParams(params
->cParams
));
370 cctxParams
->cParams
= params
->cParams
;
371 cctxParams
->fParams
= params
->fParams
;
372 /* Should not matter, as all cParams are presumed properly defined.
373 * But, set it for tracing anyway.
375 cctxParams
->compressionLevel
= ZSTD_NO_CLEVEL
;
378 ZSTD_bounds
ZSTD_cParam_getBounds(ZSTD_cParameter param
)
380 ZSTD_bounds bounds
= { 0, 0, 0 };
384 case ZSTD_c_compressionLevel
:
385 bounds
.lowerBound
= ZSTD_minCLevel();
386 bounds
.upperBound
= ZSTD_maxCLevel();
389 case ZSTD_c_windowLog
:
390 bounds
.lowerBound
= ZSTD_WINDOWLOG_MIN
;
391 bounds
.upperBound
= ZSTD_WINDOWLOG_MAX
;
395 bounds
.lowerBound
= ZSTD_HASHLOG_MIN
;
396 bounds
.upperBound
= ZSTD_HASHLOG_MAX
;
399 case ZSTD_c_chainLog
:
400 bounds
.lowerBound
= ZSTD_CHAINLOG_MIN
;
401 bounds
.upperBound
= ZSTD_CHAINLOG_MAX
;
404 case ZSTD_c_searchLog
:
405 bounds
.lowerBound
= ZSTD_SEARCHLOG_MIN
;
406 bounds
.upperBound
= ZSTD_SEARCHLOG_MAX
;
409 case ZSTD_c_minMatch
:
410 bounds
.lowerBound
= ZSTD_MINMATCH_MIN
;
411 bounds
.upperBound
= ZSTD_MINMATCH_MAX
;
414 case ZSTD_c_targetLength
:
415 bounds
.lowerBound
= ZSTD_TARGETLENGTH_MIN
;
416 bounds
.upperBound
= ZSTD_TARGETLENGTH_MAX
;
419 case ZSTD_c_strategy
:
420 bounds
.lowerBound
= ZSTD_STRATEGY_MIN
;
421 bounds
.upperBound
= ZSTD_STRATEGY_MAX
;
424 case ZSTD_c_contentSizeFlag
:
425 bounds
.lowerBound
= 0;
426 bounds
.upperBound
= 1;
429 case ZSTD_c_checksumFlag
:
430 bounds
.lowerBound
= 0;
431 bounds
.upperBound
= 1;
434 case ZSTD_c_dictIDFlag
:
435 bounds
.lowerBound
= 0;
436 bounds
.upperBound
= 1;
439 case ZSTD_c_nbWorkers
:
440 bounds
.lowerBound
= 0;
441 #ifdef ZSTD_MULTITHREAD
442 bounds
.upperBound
= ZSTDMT_NBWORKERS_MAX
;
444 bounds
.upperBound
= 0;
449 bounds
.lowerBound
= 0;
450 #ifdef ZSTD_MULTITHREAD
451 bounds
.upperBound
= ZSTDMT_JOBSIZE_MAX
;
453 bounds
.upperBound
= 0;
457 case ZSTD_c_overlapLog
:
458 #ifdef ZSTD_MULTITHREAD
459 bounds
.lowerBound
= ZSTD_OVERLAPLOG_MIN
;
460 bounds
.upperBound
= ZSTD_OVERLAPLOG_MAX
;
462 bounds
.lowerBound
= 0;
463 bounds
.upperBound
= 0;
467 case ZSTD_c_enableDedicatedDictSearch
:
468 bounds
.lowerBound
= 0;
469 bounds
.upperBound
= 1;
472 case ZSTD_c_enableLongDistanceMatching
:
473 bounds
.lowerBound
= 0;
474 bounds
.upperBound
= 1;
477 case ZSTD_c_ldmHashLog
:
478 bounds
.lowerBound
= ZSTD_LDM_HASHLOG_MIN
;
479 bounds
.upperBound
= ZSTD_LDM_HASHLOG_MAX
;
482 case ZSTD_c_ldmMinMatch
:
483 bounds
.lowerBound
= ZSTD_LDM_MINMATCH_MIN
;
484 bounds
.upperBound
= ZSTD_LDM_MINMATCH_MAX
;
487 case ZSTD_c_ldmBucketSizeLog
:
488 bounds
.lowerBound
= ZSTD_LDM_BUCKETSIZELOG_MIN
;
489 bounds
.upperBound
= ZSTD_LDM_BUCKETSIZELOG_MAX
;
492 case ZSTD_c_ldmHashRateLog
:
493 bounds
.lowerBound
= ZSTD_LDM_HASHRATELOG_MIN
;
494 bounds
.upperBound
= ZSTD_LDM_HASHRATELOG_MAX
;
497 /* experimental parameters */
498 case ZSTD_c_rsyncable
:
499 bounds
.lowerBound
= 0;
500 bounds
.upperBound
= 1;
503 case ZSTD_c_forceMaxWindow
:
504 bounds
.lowerBound
= 0;
505 bounds
.upperBound
= 1;
509 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1
< ZSTD_f_zstd1_magicless
);
510 bounds
.lowerBound
= ZSTD_f_zstd1
;
511 bounds
.upperBound
= ZSTD_f_zstd1_magicless
; /* note : how to ensure at compile time that this is the highest value enum ? */
514 case ZSTD_c_forceAttachDict
:
515 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach
< ZSTD_dictForceLoad
);
516 bounds
.lowerBound
= ZSTD_dictDefaultAttach
;
517 bounds
.upperBound
= ZSTD_dictForceLoad
; /* note : how to ensure at compile time that this is the highest value enum ? */
520 case ZSTD_c_literalCompressionMode
:
521 ZSTD_STATIC_ASSERT(ZSTD_lcm_auto
< ZSTD_lcm_huffman
&& ZSTD_lcm_huffman
< ZSTD_lcm_uncompressed
);
522 bounds
.lowerBound
= ZSTD_lcm_auto
;
523 bounds
.upperBound
= ZSTD_lcm_uncompressed
;
526 case ZSTD_c_targetCBlockSize
:
527 bounds
.lowerBound
= ZSTD_TARGETCBLOCKSIZE_MIN
;
528 bounds
.upperBound
= ZSTD_TARGETCBLOCKSIZE_MAX
;
531 case ZSTD_c_srcSizeHint
:
532 bounds
.lowerBound
= ZSTD_SRCSIZEHINT_MIN
;
533 bounds
.upperBound
= ZSTD_SRCSIZEHINT_MAX
;
536 case ZSTD_c_stableInBuffer
:
537 case ZSTD_c_stableOutBuffer
:
538 bounds
.lowerBound
= (int)ZSTD_bm_buffered
;
539 bounds
.upperBound
= (int)ZSTD_bm_stable
;
542 case ZSTD_c_blockDelimiters
:
543 bounds
.lowerBound
= (int)ZSTD_sf_noBlockDelimiters
;
544 bounds
.upperBound
= (int)ZSTD_sf_explicitBlockDelimiters
;
547 case ZSTD_c_validateSequences
:
548 bounds
.lowerBound
= 0;
549 bounds
.upperBound
= 1;
552 case ZSTD_c_splitBlocks
:
553 bounds
.lowerBound
= 0;
554 bounds
.upperBound
= 1;
557 case ZSTD_c_useRowMatchFinder
:
558 bounds
.lowerBound
= (int)ZSTD_urm_auto
;
559 bounds
.upperBound
= (int)ZSTD_urm_enableRowMatchFinder
;
563 bounds
.error
= ERROR(parameter_unsupported
);
568 /* ZSTD_cParam_clampBounds:
569 * Clamps the value into the bounded range.
571 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam
, int* value
)
573 ZSTD_bounds
const bounds
= ZSTD_cParam_getBounds(cParam
);
574 if (ZSTD_isError(bounds
.error
)) return bounds
.error
;
575 if (*value
< bounds
.lowerBound
) *value
= bounds
.lowerBound
;
576 if (*value
> bounds
.upperBound
) *value
= bounds
.upperBound
;
580 #define BOUNDCHECK(cParam, val) { \
581 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
582 parameter_outOfBound, "Param out of bounds"); \
586 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param
)
590 case ZSTD_c_compressionLevel
:
592 case ZSTD_c_chainLog
:
593 case ZSTD_c_searchLog
:
594 case ZSTD_c_minMatch
:
595 case ZSTD_c_targetLength
:
596 case ZSTD_c_strategy
:
600 case ZSTD_c_windowLog
:
601 case ZSTD_c_contentSizeFlag
:
602 case ZSTD_c_checksumFlag
:
603 case ZSTD_c_dictIDFlag
:
604 case ZSTD_c_forceMaxWindow
:
605 case ZSTD_c_nbWorkers
:
607 case ZSTD_c_overlapLog
:
608 case ZSTD_c_rsyncable
:
609 case ZSTD_c_enableDedicatedDictSearch
:
610 case ZSTD_c_enableLongDistanceMatching
:
611 case ZSTD_c_ldmHashLog
:
612 case ZSTD_c_ldmMinMatch
:
613 case ZSTD_c_ldmBucketSizeLog
:
614 case ZSTD_c_ldmHashRateLog
:
615 case ZSTD_c_forceAttachDict
:
616 case ZSTD_c_literalCompressionMode
:
617 case ZSTD_c_targetCBlockSize
:
618 case ZSTD_c_srcSizeHint
:
619 case ZSTD_c_stableInBuffer
:
620 case ZSTD_c_stableOutBuffer
:
621 case ZSTD_c_blockDelimiters
:
622 case ZSTD_c_validateSequences
:
623 case ZSTD_c_splitBlocks
:
624 case ZSTD_c_useRowMatchFinder
:
630 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx
* cctx
, ZSTD_cParameter param
, int value
)
632 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param
, value
);
633 if (cctx
->streamStage
!= zcss_init
) {
634 if (ZSTD_isUpdateAuthorized(param
)) {
635 cctx
->cParamsChanged
= 1;
637 RETURN_ERROR(stage_wrong
, "can only set params in ctx init stage");
642 case ZSTD_c_nbWorkers
:
643 RETURN_ERROR_IF((value
!=0) && cctx
->staticSize
, parameter_unsupported
,
644 "MT not compatible with static alloc");
647 case ZSTD_c_compressionLevel
:
648 case ZSTD_c_windowLog
:
650 case ZSTD_c_chainLog
:
651 case ZSTD_c_searchLog
:
652 case ZSTD_c_minMatch
:
653 case ZSTD_c_targetLength
:
654 case ZSTD_c_strategy
:
655 case ZSTD_c_ldmHashRateLog
:
657 case ZSTD_c_contentSizeFlag
:
658 case ZSTD_c_checksumFlag
:
659 case ZSTD_c_dictIDFlag
:
660 case ZSTD_c_forceMaxWindow
:
661 case ZSTD_c_forceAttachDict
:
662 case ZSTD_c_literalCompressionMode
:
664 case ZSTD_c_overlapLog
:
665 case ZSTD_c_rsyncable
:
666 case ZSTD_c_enableDedicatedDictSearch
:
667 case ZSTD_c_enableLongDistanceMatching
:
668 case ZSTD_c_ldmHashLog
:
669 case ZSTD_c_ldmMinMatch
:
670 case ZSTD_c_ldmBucketSizeLog
:
671 case ZSTD_c_targetCBlockSize
:
672 case ZSTD_c_srcSizeHint
:
673 case ZSTD_c_stableInBuffer
:
674 case ZSTD_c_stableOutBuffer
:
675 case ZSTD_c_blockDelimiters
:
676 case ZSTD_c_validateSequences
:
677 case ZSTD_c_splitBlocks
:
678 case ZSTD_c_useRowMatchFinder
:
681 default: RETURN_ERROR(parameter_unsupported
, "unknown parameter");
683 return ZSTD_CCtxParams_setParameter(&cctx
->requestedParams
, param
, value
);
686 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params
* CCtxParams
,
687 ZSTD_cParameter param
, int value
)
689 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param
, value
);
693 BOUNDCHECK(ZSTD_c_format
, value
);
694 CCtxParams
->format
= (ZSTD_format_e
)value
;
695 return (size_t)CCtxParams
->format
;
697 case ZSTD_c_compressionLevel
: {
698 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param
, &value
), "");
700 CCtxParams
->compressionLevel
= ZSTD_CLEVEL_DEFAULT
; /* 0 == default */
702 CCtxParams
->compressionLevel
= value
;
703 if (CCtxParams
->compressionLevel
>= 0) return (size_t)CCtxParams
->compressionLevel
;
704 return 0; /* return type (size_t) cannot represent negative values */
707 case ZSTD_c_windowLog
:
708 if (value
!=0) /* 0 => use default */
709 BOUNDCHECK(ZSTD_c_windowLog
, value
);
710 CCtxParams
->cParams
.windowLog
= (U32
)value
;
711 return CCtxParams
->cParams
.windowLog
;
713 case ZSTD_c_hashLog
:
714 if (value
!=0) /* 0 => use default */
715 BOUNDCHECK(ZSTD_c_hashLog
, value
);
716 CCtxParams
->cParams
.hashLog
= (U32
)value
;
717 return CCtxParams
->cParams
.hashLog
;
719 case ZSTD_c_chainLog
:
720 if (value
!=0) /* 0 => use default */
721 BOUNDCHECK(ZSTD_c_chainLog
, value
);
722 CCtxParams
->cParams
.chainLog
= (U32
)value
;
723 return CCtxParams
->cParams
.chainLog
;
725 case ZSTD_c_searchLog
:
726 if (value
!=0) /* 0 => use default */
727 BOUNDCHECK(ZSTD_c_searchLog
, value
);
728 CCtxParams
->cParams
.searchLog
= (U32
)value
;
729 return (size_t)value
;
731 case ZSTD_c_minMatch
:
732 if (value
!=0) /* 0 => use default */
733 BOUNDCHECK(ZSTD_c_minMatch
, value
);
734 CCtxParams
->cParams
.minMatch
= value
;
735 return CCtxParams
->cParams
.minMatch
;
737 case ZSTD_c_targetLength
:
738 BOUNDCHECK(ZSTD_c_targetLength
, value
);
739 CCtxParams
->cParams
.targetLength
= value
;
740 return CCtxParams
->cParams
.targetLength
;
742 case ZSTD_c_strategy
:
743 if (value
!=0) /* 0 => use default */
744 BOUNDCHECK(ZSTD_c_strategy
, value
);
745 CCtxParams
->cParams
.strategy
= (ZSTD_strategy
)value
;
746 return (size_t)CCtxParams
->cParams
.strategy
;
748 case ZSTD_c_contentSizeFlag
:
749 /* Content size written in frame header _when known_ (default:1) */
750 DEBUGLOG(4, "set content size flag = %u", (value
!=0));
751 CCtxParams
->fParams
.contentSizeFlag
= value
!= 0;
752 return CCtxParams
->fParams
.contentSizeFlag
;
754 case ZSTD_c_checksumFlag
:
755 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
756 CCtxParams
->fParams
.checksumFlag
= value
!= 0;
757 return CCtxParams
->fParams
.checksumFlag
;
759 case ZSTD_c_dictIDFlag
: /* When applicable, dictionary's dictID is provided in frame header (default:1) */
760 DEBUGLOG(4, "set dictIDFlag = %u", (value
!=0));
761 CCtxParams
->fParams
.noDictIDFlag
= !value
;
762 return !CCtxParams
->fParams
.noDictIDFlag
;
764 case ZSTD_c_forceMaxWindow
:
765 CCtxParams
->forceWindow
= (value
!= 0);
766 return CCtxParams
->forceWindow
;
768 case ZSTD_c_forceAttachDict
: {
769 const ZSTD_dictAttachPref_e pref
= (ZSTD_dictAttachPref_e
)value
;
770 BOUNDCHECK(ZSTD_c_forceAttachDict
, pref
);
771 CCtxParams
->attachDictPref
= pref
;
772 return CCtxParams
->attachDictPref
;
775 case ZSTD_c_literalCompressionMode
: {
776 const ZSTD_literalCompressionMode_e lcm
= (ZSTD_literalCompressionMode_e
)value
;
777 BOUNDCHECK(ZSTD_c_literalCompressionMode
, lcm
);
778 CCtxParams
->literalCompressionMode
= lcm
;
779 return CCtxParams
->literalCompressionMode
;
782 case ZSTD_c_nbWorkers
:
783 #ifndef ZSTD_MULTITHREAD
784 RETURN_ERROR_IF(value
!=0, parameter_unsupported
, "not compiled with multithreading");
787 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param
, &value
), "");
788 CCtxParams
->nbWorkers
= value
;
789 return CCtxParams
->nbWorkers
;
792 case ZSTD_c_jobSize
:
793 #ifndef ZSTD_MULTITHREAD
794 RETURN_ERROR_IF(value
!=0, parameter_unsupported
, "not compiled with multithreading");
797 /* Adjust to the minimum non-default value. */
798 if (value
!= 0 && value
< ZSTDMT_JOBSIZE_MIN
)
799 value
= ZSTDMT_JOBSIZE_MIN
;
800 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param
, &value
), "");
802 CCtxParams
->jobSize
= value
;
803 return CCtxParams
->jobSize
;
806 case ZSTD_c_overlapLog
:
807 #ifndef ZSTD_MULTITHREAD
808 RETURN_ERROR_IF(value
!=0, parameter_unsupported
, "not compiled with multithreading");
811 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog
, &value
), "");
812 CCtxParams
->overlapLog
= value
;
813 return CCtxParams
->overlapLog
;
816 case ZSTD_c_rsyncable
:
817 #ifndef ZSTD_MULTITHREAD
818 RETURN_ERROR_IF(value
!=0, parameter_unsupported
, "not compiled with multithreading");
821 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog
, &value
), "");
822 CCtxParams
->rsyncable
= value
;
823 return CCtxParams
->rsyncable
;
826 case ZSTD_c_enableDedicatedDictSearch
:
827 CCtxParams
->enableDedicatedDictSearch
= (value
!=0);
828 return CCtxParams
->enableDedicatedDictSearch
;
830 case ZSTD_c_enableLongDistanceMatching
:
831 CCtxParams
->ldmParams
.enableLdm
= (value
!=0);
832 return CCtxParams
->ldmParams
.enableLdm
;
834 case ZSTD_c_ldmHashLog
:
835 if (value
!=0) /* 0 ==> auto */
836 BOUNDCHECK(ZSTD_c_ldmHashLog
, value
);
837 CCtxParams
->ldmParams
.hashLog
= value
;
838 return CCtxParams
->ldmParams
.hashLog
;
840 case ZSTD_c_ldmMinMatch
:
841 if (value
!=0) /* 0 ==> default */
842 BOUNDCHECK(ZSTD_c_ldmMinMatch
, value
);
843 CCtxParams
->ldmParams
.minMatchLength
= value
;
844 return CCtxParams
->ldmParams
.minMatchLength
;
846 case ZSTD_c_ldmBucketSizeLog
:
847 if (value
!=0) /* 0 ==> default */
848 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog
, value
);
849 CCtxParams
->ldmParams
.bucketSizeLog
= value
;
850 return CCtxParams
->ldmParams
.bucketSizeLog
;
852 case ZSTD_c_ldmHashRateLog
:
853 RETURN_ERROR_IF(value
> ZSTD_WINDOWLOG_MAX
- ZSTD_HASHLOG_MIN
,
854 parameter_outOfBound
, "Param out of bounds!");
855 CCtxParams
->ldmParams
.hashRateLog
= value
;
856 return CCtxParams
->ldmParams
.hashRateLog
;
858 case ZSTD_c_targetCBlockSize
:
859 if (value
!=0) /* 0 ==> default */
860 BOUNDCHECK(ZSTD_c_targetCBlockSize
, value
);
861 CCtxParams
->targetCBlockSize
= value
;
862 return CCtxParams
->targetCBlockSize
;
864 case ZSTD_c_srcSizeHint
:
865 if (value
!=0) /* 0 ==> default */
866 BOUNDCHECK(ZSTD_c_srcSizeHint
, value
);
867 CCtxParams
->srcSizeHint
= value
;
868 return CCtxParams
->srcSizeHint
;
870 case ZSTD_c_stableInBuffer
:
871 BOUNDCHECK(ZSTD_c_stableInBuffer
, value
);
872 CCtxParams
->inBufferMode
= (ZSTD_bufferMode_e
)value
;
873 return CCtxParams
->inBufferMode
;
875 case ZSTD_c_stableOutBuffer
:
876 BOUNDCHECK(ZSTD_c_stableOutBuffer
, value
);
877 CCtxParams
->outBufferMode
= (ZSTD_bufferMode_e
)value
;
878 return CCtxParams
->outBufferMode
;
880 case ZSTD_c_blockDelimiters
:
881 BOUNDCHECK(ZSTD_c_blockDelimiters
, value
);
882 CCtxParams
->blockDelimiters
= (ZSTD_sequenceFormat_e
)value
;
883 return CCtxParams
->blockDelimiters
;
885 case ZSTD_c_validateSequences
:
886 BOUNDCHECK(ZSTD_c_validateSequences
, value
);
887 CCtxParams
->validateSequences
= value
;
888 return CCtxParams
->validateSequences
;
890 case ZSTD_c_splitBlocks
:
891 BOUNDCHECK(ZSTD_c_splitBlocks
, value
);
892 CCtxParams
->splitBlocks
= value
;
893 return CCtxParams
->splitBlocks
;
895 case ZSTD_c_useRowMatchFinder
:
896 BOUNDCHECK(ZSTD_c_useRowMatchFinder
, value
);
897 CCtxParams
->useRowMatchFinder
= (ZSTD_useRowMatchFinderMode_e
)value
;
898 return CCtxParams
->useRowMatchFinder
;
900 default: RETURN_ERROR(parameter_unsupported
, "unknown parameter");
904 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx
const* cctx
, ZSTD_cParameter param
, int* value
)
906 return ZSTD_CCtxParams_getParameter(&cctx
->requestedParams
, param
, value
);
909 size_t ZSTD_CCtxParams_getParameter(
910 ZSTD_CCtx_params
const* CCtxParams
, ZSTD_cParameter param
, int* value
)
915 *value
= CCtxParams
->format
;
917 case ZSTD_c_compressionLevel
:
918 *value
= CCtxParams
->compressionLevel
;
920 case ZSTD_c_windowLog
:
921 *value
= (int)CCtxParams
->cParams
.windowLog
;
923 case ZSTD_c_hashLog
:
924 *value
= (int)CCtxParams
->cParams
.hashLog
;
926 case ZSTD_c_chainLog
:
927 *value
= (int)CCtxParams
->cParams
.chainLog
;
929 case ZSTD_c_searchLog
:
930 *value
= CCtxParams
->cParams
.searchLog
;
932 case ZSTD_c_minMatch
:
933 *value
= CCtxParams
->cParams
.minMatch
;
935 case ZSTD_c_targetLength
:
936 *value
= CCtxParams
->cParams
.targetLength
;
938 case ZSTD_c_strategy
:
939 *value
= (unsigned)CCtxParams
->cParams
.strategy
;
941 case ZSTD_c_contentSizeFlag
:
942 *value
= CCtxParams
->fParams
.contentSizeFlag
;
944 case ZSTD_c_checksumFlag
:
945 *value
= CCtxParams
->fParams
.checksumFlag
;
947 case ZSTD_c_dictIDFlag
:
948 *value
= !CCtxParams
->fParams
.noDictIDFlag
;
950 case ZSTD_c_forceMaxWindow
:
951 *value
= CCtxParams
->forceWindow
;
953 case ZSTD_c_forceAttachDict
:
954 *value
= CCtxParams
->attachDictPref
;
956 case ZSTD_c_literalCompressionMode
:
957 *value
= CCtxParams
->literalCompressionMode
;
959 case ZSTD_c_nbWorkers
:
960 #ifndef ZSTD_MULTITHREAD
961 assert(CCtxParams
->nbWorkers
== 0);
963 *value
= CCtxParams
->nbWorkers
;
965 case ZSTD_c_jobSize
:
966 #ifndef ZSTD_MULTITHREAD
967 RETURN_ERROR(parameter_unsupported
, "not compiled with multithreading");
969 assert(CCtxParams
->jobSize
<= INT_MAX
);
970 *value
= (int)CCtxParams
->jobSize
;
973 case ZSTD_c_overlapLog
:
974 #ifndef ZSTD_MULTITHREAD
975 RETURN_ERROR(parameter_unsupported
, "not compiled with multithreading");
977 *value
= CCtxParams
->overlapLog
;
980 case ZSTD_c_rsyncable
:
981 #ifndef ZSTD_MULTITHREAD
982 RETURN_ERROR(parameter_unsupported
, "not compiled with multithreading");
984 *value
= CCtxParams
->rsyncable
;
987 case ZSTD_c_enableDedicatedDictSearch
:
988 *value
= CCtxParams
->enableDedicatedDictSearch
;
990 case ZSTD_c_enableLongDistanceMatching
:
991 *value
= CCtxParams
->ldmParams
.enableLdm
;
993 case ZSTD_c_ldmHashLog
:
994 *value
= CCtxParams
->ldmParams
.hashLog
;
996 case ZSTD_c_ldmMinMatch
:
997 *value
= CCtxParams
->ldmParams
.minMatchLength
;
999 case ZSTD_c_ldmBucketSizeLog
:
1000 *value
= CCtxParams
->ldmParams
.bucketSizeLog
;
1002 case ZSTD_c_ldmHashRateLog
:
1003 *value
= CCtxParams
->ldmParams
.hashRateLog
;
1005 case ZSTD_c_targetCBlockSize
:
1006 *value
= (int)CCtxParams
->targetCBlockSize
;
1008 case ZSTD_c_srcSizeHint
:
1009 *value
= (int)CCtxParams
->srcSizeHint
;
1011 case ZSTD_c_stableInBuffer
:
1012 *value
= (int)CCtxParams
->inBufferMode
;
1014 case ZSTD_c_stableOutBuffer
:
1015 *value
= (int)CCtxParams
->outBufferMode
;
1017 case ZSTD_c_blockDelimiters
:
1018 *value
= (int)CCtxParams
->blockDelimiters
;
1020 case ZSTD_c_validateSequences
:
1021 *value
= (int)CCtxParams
->validateSequences
;
1023 case ZSTD_c_splitBlocks
:
1024 *value
= (int)CCtxParams
->splitBlocks
;
1026 case ZSTD_c_useRowMatchFinder
:
1027 *value
= (int)CCtxParams
->useRowMatchFinder
;
1029 default: RETURN_ERROR(parameter_unsupported
, "unknown parameter");
1034 /** ZSTD_CCtx_setParametersUsingCCtxParams() :
1035 * just applies `params` into `cctx`
1036 * no action is performed, parameters are merely stored.
1037 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
1038 * This is possible even if a compression is ongoing.
1039 * In which case, new parameters will be applied on the fly, starting with next compression job.
1041 size_t ZSTD_CCtx_setParametersUsingCCtxParams(
1042 ZSTD_CCtx
* cctx
, const ZSTD_CCtx_params
* params
)
1044 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
1045 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1046 "The context is in the wrong stage!");
1047 RETURN_ERROR_IF(cctx
->cdict
, stage_wrong
,
1048 "Can't override parameters with cdict attached (some must "
1049 "be inherited from the cdict).");
1051 cctx
->requestedParams
= *params
;
1055 ZSTDLIB_API
size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx
* cctx
, unsigned long long pledgedSrcSize
)
1057 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32
)pledgedSrcSize
);
1058 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1059 "Can't set pledgedSrcSize when not in init stage.");
1060 cctx
->pledgedSrcSizePlusOne
= pledgedSrcSize
+1;
1064 static ZSTD_compressionParameters
ZSTD_dedicatedDictSearch_getCParams(
1065 int const compressionLevel
,
1066 size_t const dictSize
);
1067 static int ZSTD_dedicatedDictSearch_isSupported(
1068 const ZSTD_compressionParameters
* cParams
);
1069 static void ZSTD_dedicatedDictSearch_revertCParams(
1070 ZSTD_compressionParameters
* cParams
);
1073 * Initializes the local dict using the requested parameters.
1074 * NOTE: This does not use the pledged src size, because it may be used for more
1075 * than one compression.
1077 static size_t ZSTD_initLocalDict(ZSTD_CCtx
* cctx
)
1079 ZSTD_localDict
* const dl
= &cctx
->localDict
;
1080 if (dl
->dict
== NULL
) {
1081 /* No local dictionary. */
1082 assert(dl
->dictBuffer
== NULL
);
1083 assert(dl
->cdict
== NULL
);
1084 assert(dl
->dictSize
== 0);
1087 if (dl
->cdict
!= NULL
) {
1088 assert(cctx
->cdict
== dl
->cdict
);
1089 /* Local dictionary already initialized. */
1092 assert(dl
->dictSize
> 0);
1093 assert(cctx
->cdict
== NULL
);
1094 assert(cctx
->prefixDict
.dict
== NULL
);
1096 dl
->cdict
= ZSTD_createCDict_advanced2(
1100 dl
->dictContentType
,
1101 &cctx
->requestedParams
,
1103 RETURN_ERROR_IF(!dl
->cdict
, memory_allocation
, "ZSTD_createCDict_advanced failed");
1104 cctx
->cdict
= dl
->cdict
;
1108 size_t ZSTD_CCtx_loadDictionary_advanced(
1109 ZSTD_CCtx
* cctx
, const void* dict
, size_t dictSize
,
1110 ZSTD_dictLoadMethod_e dictLoadMethod
, ZSTD_dictContentType_e dictContentType
)
1112 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1113 "Can't load a dictionary when ctx is not in init stage.");
1114 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32
)dictSize
);
1115 ZSTD_clearAllDicts(cctx
); /* in case one already exists */
1116 if (dict
== NULL
|| dictSize
== 0) /* no dictionary mode */
1118 if (dictLoadMethod
== ZSTD_dlm_byRef
) {
1119 cctx
->localDict
.dict
= dict
;
1122 RETURN_ERROR_IF(cctx
->staticSize
, memory_allocation
,
1123 "no malloc for static CCtx");
1124 dictBuffer
= ZSTD_customMalloc(dictSize
, cctx
->customMem
);
1125 RETURN_ERROR_IF(!dictBuffer
, memory_allocation
, "NULL pointer!");
1126 ZSTD_memcpy(dictBuffer
, dict
, dictSize
);
1127 cctx
->localDict
.dictBuffer
= dictBuffer
;
1128 cctx
->localDict
.dict
= dictBuffer
;
1130 cctx
->localDict
.dictSize
= dictSize
;
1131 cctx
->localDict
.dictContentType
= dictContentType
;
1135 ZSTDLIB_API
size_t ZSTD_CCtx_loadDictionary_byReference(
1136 ZSTD_CCtx
* cctx
, const void* dict
, size_t dictSize
)
1138 return ZSTD_CCtx_loadDictionary_advanced(
1139 cctx
, dict
, dictSize
, ZSTD_dlm_byRef
, ZSTD_dct_auto
);
1142 ZSTDLIB_API
size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx
* cctx
, const void* dict
, size_t dictSize
)
1144 return ZSTD_CCtx_loadDictionary_advanced(
1145 cctx
, dict
, dictSize
, ZSTD_dlm_byCopy
, ZSTD_dct_auto
);
1149 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx
* cctx
, const ZSTD_CDict
* cdict
)
1151 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1152 "Can't ref a dict when ctx not in init stage.");
1153 /* Free the existing local cdict (if any) to save memory. */
1154 ZSTD_clearAllDicts(cctx
);
1155 cctx
->cdict
= cdict
;
1159 size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx
* cctx
, ZSTD_threadPool
* pool
)
1161 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1162 "Can't ref a pool when ctx not in init stage.");
1167 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx
* cctx
, const void* prefix
, size_t prefixSize
)
1169 return ZSTD_CCtx_refPrefix_advanced(cctx
, prefix
, prefixSize
, ZSTD_dct_rawContent
);
1172 size_t ZSTD_CCtx_refPrefix_advanced(
1173 ZSTD_CCtx
* cctx
, const void* prefix
, size_t prefixSize
, ZSTD_dictContentType_e dictContentType
)
1175 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1176 "Can't ref a prefix when ctx not in init stage.");
1177 ZSTD_clearAllDicts(cctx
);
1178 if (prefix
!= NULL
&& prefixSize
> 0) {
1179 cctx
->prefixDict
.dict
= prefix
;
1180 cctx
->prefixDict
.dictSize
= prefixSize
;
1181 cctx
->prefixDict
.dictContentType
= dictContentType
;
1186 /*! ZSTD_CCtx_reset() :
1187 * Also dumps dictionary */
1188 size_t ZSTD_CCtx_reset(ZSTD_CCtx
* cctx
, ZSTD_ResetDirective reset
)
1190 if ( (reset
== ZSTD_reset_session_only
)
1191 || (reset
== ZSTD_reset_session_and_parameters
) ) {
1192 cctx
->streamStage
= zcss_init
;
1193 cctx
->pledgedSrcSizePlusOne
= 0;
1195 if ( (reset
== ZSTD_reset_parameters
)
1196 || (reset
== ZSTD_reset_session_and_parameters
) ) {
1197 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1198 "Can't reset parameters only when not in init stage.");
1199 ZSTD_clearAllDicts(cctx
);
1200 return ZSTD_CCtxParams_reset(&cctx
->requestedParams
);
1206 /** ZSTD_checkCParams() :
1207 control CParam values remain within authorized range.
1208 @return : 0, or an error code if one value is beyond authorized range */
1209 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams
)
1211 BOUNDCHECK(ZSTD_c_windowLog
, (int)cParams
.windowLog
);
1212 BOUNDCHECK(ZSTD_c_chainLog
, (int)cParams
.chainLog
);
1213 BOUNDCHECK(ZSTD_c_hashLog
, (int)cParams
.hashLog
);
1214 BOUNDCHECK(ZSTD_c_searchLog
, (int)cParams
.searchLog
);
1215 BOUNDCHECK(ZSTD_c_minMatch
, (int)cParams
.minMatch
);
1216 BOUNDCHECK(ZSTD_c_targetLength
,(int)cParams
.targetLength
);
1217 BOUNDCHECK(ZSTD_c_strategy
, cParams
.strategy
);
1221 /** ZSTD_clampCParams() :
1222 * make CParam values within valid range.
1223 * @return : valid CParams */
1224 static ZSTD_compressionParameters
1225 ZSTD_clampCParams(ZSTD_compressionParameters cParams
)
1227 # define CLAMP_TYPE(cParam, val, type) { \
1228 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
1229 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
1230 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
1232 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
1233 CLAMP(ZSTD_c_windowLog
, cParams
.windowLog
);
1234 CLAMP(ZSTD_c_chainLog
, cParams
.chainLog
);
1235 CLAMP(ZSTD_c_hashLog
, cParams
.hashLog
);
1236 CLAMP(ZSTD_c_searchLog
, cParams
.searchLog
);
1237 CLAMP(ZSTD_c_minMatch
, cParams
.minMatch
);
1238 CLAMP(ZSTD_c_targetLength
,cParams
.targetLength
);
1239 CLAMP_TYPE(ZSTD_c_strategy
,cParams
.strategy
, ZSTD_strategy
);
1243 /** ZSTD_cycleLog() :
1244 * condition for correct operation : hashLog > 1 */
1245 U32
ZSTD_cycleLog(U32 hashLog
, ZSTD_strategy strat
)
1247 U32
const btScale
= ((U32
)strat
>= (U32
)ZSTD_btlazy2
);
1248 return hashLog
- btScale
;
1251 /** ZSTD_dictAndWindowLog() :
1252 * Returns an adjusted window log that is large enough to fit the source and the dictionary.
1253 * The zstd format says that the entire dictionary is valid if one byte of the dictionary
1254 * is within the window. So the hashLog and chainLog should be large enough to reference both
1255 * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
1256 * the hashLog and windowLog.
1257 * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
1259 static U32
ZSTD_dictAndWindowLog(U32 windowLog
, U64 srcSize
, U64 dictSize
)
1261 const U64 maxWindowSize
= 1ULL << ZSTD_WINDOWLOG_MAX
;
1262 /* No dictionary ==> No change */
1263 if (dictSize
== 0) {
1266 assert(windowLog
<= ZSTD_WINDOWLOG_MAX
);
1267 assert(srcSize
!= ZSTD_CONTENTSIZE_UNKNOWN
); /* Handled in ZSTD_adjustCParams_internal() */
1269 U64
const windowSize
= 1ULL << windowLog
;
1270 U64
const dictAndWindowSize
= dictSize
+ windowSize
;
1271 /* If the window size is already large enough to fit both the source and the dictionary
1272 * then just use the window size. Otherwise adjust so that it fits the dictionary and
1275 if (windowSize
>= dictSize
+ srcSize
) {
1276 return windowLog
; /* Window size large enough already */
1277 } else if (dictAndWindowSize
>= maxWindowSize
) {
1278 return ZSTD_WINDOWLOG_MAX
; /* Larger than max window log */
1280 return ZSTD_highbit32((U32
)dictAndWindowSize
- 1) + 1;
1285 /** ZSTD_adjustCParams_internal() :
1286 * optimize `cPar` for a specified input (`srcSize` and `dictSize`).
1287 * mostly downsize to reduce memory consumption and initialization latency.
1288 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
1289 * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
1290 * note : `srcSize==0` means 0!
1291 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
1292 static ZSTD_compressionParameters
1293 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar
,
1294 unsigned long long srcSize
,
1296 ZSTD_cParamMode_e mode
)
1298 const U64 minSrcSize
= 513; /* (1<<9) + 1 */
1299 const U64 maxWindowResize
= 1ULL << (ZSTD_WINDOWLOG_MAX
-1);
1300 assert(ZSTD_checkCParams(cPar
)==0);
1303 case ZSTD_cpm_unknown
:
1304 case ZSTD_cpm_noAttachDict
:
1305 /* If we don't know the source size, don't make any
1306 * assumptions about it. We will already have selected
1307 * smaller parameters if a dictionary is in use.
1310 case ZSTD_cpm_createCDict
:
1311 /* Assume a small source size when creating a dictionary
1312 * with an unkown source size.
1314 if (dictSize
&& srcSize
== ZSTD_CONTENTSIZE_UNKNOWN
)
1315 srcSize
= minSrcSize
;
1317 case ZSTD_cpm_attachDict
:
1318 /* Dictionary has its own dedicated parameters which have
1319 * already been selected. We are selecting parameters
1320 * for only the source.
1329 /* resize windowLog if input is small enough, to use less memory */
1330 if ( (srcSize
< maxWindowResize
)
1331 && (dictSize
< maxWindowResize
) ) {
1332 U32
const tSize
= (U32
)(srcSize
+ dictSize
);
1333 static U32
const hashSizeMin
= 1 << ZSTD_HASHLOG_MIN
;
1334 U32
const srcLog
= (tSize
< hashSizeMin
) ? ZSTD_HASHLOG_MIN
:
1335 ZSTD_highbit32(tSize
-1) + 1;
1336 if (cPar
.windowLog
> srcLog
) cPar
.windowLog
= srcLog
;
1338 if (srcSize
!= ZSTD_CONTENTSIZE_UNKNOWN
) {
1339 U32
const dictAndWindowLog
= ZSTD_dictAndWindowLog(cPar
.windowLog
, (U64
)srcSize
, (U64
)dictSize
);
1340 U32
const cycleLog
= ZSTD_cycleLog(cPar
.chainLog
, cPar
.strategy
);
1341 if (cPar
.hashLog
> dictAndWindowLog
+1) cPar
.hashLog
= dictAndWindowLog
+1;
1342 if (cycleLog
> dictAndWindowLog
)
1343 cPar
.chainLog
-= (cycleLog
- dictAndWindowLog
);
1346 if (cPar
.windowLog
< ZSTD_WINDOWLOG_ABSOLUTEMIN
)
1347 cPar
.windowLog
= ZSTD_WINDOWLOG_ABSOLUTEMIN
; /* minimum wlog required for valid frame header */
1352 ZSTD_compressionParameters
1353 ZSTD_adjustCParams(ZSTD_compressionParameters cPar
,
1354 unsigned long long srcSize
,
1357 cPar
= ZSTD_clampCParams(cPar
); /* resulting cPar is necessarily valid (all parameters within range) */
1358 if (srcSize
== 0) srcSize
= ZSTD_CONTENTSIZE_UNKNOWN
;
1359 return ZSTD_adjustCParams_internal(cPar
, srcSize
, dictSize
, ZSTD_cpm_unknown
);
1362 static ZSTD_compressionParameters
ZSTD_getCParams_internal(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
);
1363 static ZSTD_parameters
ZSTD_getParams_internal(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
);
1365 static void ZSTD_overrideCParams(
1366 ZSTD_compressionParameters
* cParams
,
1367 const ZSTD_compressionParameters
* overrides
)
1369 if (overrides
->windowLog
) cParams
->windowLog
= overrides
->windowLog
;
1370 if (overrides
->hashLog
) cParams
->hashLog
= overrides
->hashLog
;
1371 if (overrides
->chainLog
) cParams
->chainLog
= overrides
->chainLog
;
1372 if (overrides
->searchLog
) cParams
->searchLog
= overrides
->searchLog
;
1373 if (overrides
->minMatch
) cParams
->minMatch
= overrides
->minMatch
;
1374 if (overrides
->targetLength
) cParams
->targetLength
= overrides
->targetLength
;
1375 if (overrides
->strategy
) cParams
->strategy
= overrides
->strategy
;
1378 ZSTD_compressionParameters
ZSTD_getCParamsFromCCtxParams(
1379 const ZSTD_CCtx_params
* CCtxParams
, U64 srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
)
1381 ZSTD_compressionParameters cParams
;
1382 if (srcSizeHint
== ZSTD_CONTENTSIZE_UNKNOWN
&& CCtxParams
->srcSizeHint
> 0) {
1383 srcSizeHint
= CCtxParams
->srcSizeHint
;
1385 cParams
= ZSTD_getCParams_internal(CCtxParams
->compressionLevel
, srcSizeHint
, dictSize
, mode
);
1386 if (CCtxParams
->ldmParams
.enableLdm
) cParams
.windowLog
= ZSTD_LDM_DEFAULT_WINDOW_LOG
;
1387 ZSTD_overrideCParams(&cParams
, &CCtxParams
->cParams
);
1388 assert(!ZSTD_checkCParams(cParams
));
1389 /* srcSizeHint == 0 means 0 */
1390 return ZSTD_adjustCParams_internal(cParams
, srcSizeHint
, dictSize
, mode
);
1394 ZSTD_sizeof_matchState(const ZSTD_compressionParameters
* const cParams
,
1395 const ZSTD_useRowMatchFinderMode_e useRowMatchFinder
,
1396 const U32 enableDedicatedDictSearch
,
1399 /* chain table size should be 0 for fast or row-hash strategies */
1400 size_t const chainSize
= ZSTD_allocateChainTable(cParams
->strategy
, useRowMatchFinder
, enableDedicatedDictSearch
&& !forCCtx
)
1401 ? ((size_t)1 << cParams
->chainLog
)
1403 size_t const hSize
= ((size_t)1) << cParams
->hashLog
;
1404 U32
const hashLog3
= (forCCtx
&& cParams
->minMatch
==3) ? MIN(ZSTD_HASHLOG3_MAX
, cParams
->windowLog
) : 0;
1405 size_t const h3Size
= hashLog3
? ((size_t)1) << hashLog3
: 0;
1406 /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
1407 * surrounded by redzones in ASAN. */
1408 size_t const tableSpace
= chainSize
* sizeof(U32
)
1409 + hSize
* sizeof(U32
)
1410 + h3Size
* sizeof(U32
);
1411 size_t const optPotentialSpace
=
1412 ZSTD_cwksp_aligned_alloc_size((MaxML
+1) * sizeof(U32
))
1413 + ZSTD_cwksp_aligned_alloc_size((MaxLL
+1) * sizeof(U32
))
1414 + ZSTD_cwksp_aligned_alloc_size((MaxOff
+1) * sizeof(U32
))
1415 + ZSTD_cwksp_aligned_alloc_size((1<<Litbits
) * sizeof(U32
))
1416 + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM
+1) * sizeof(ZSTD_match_t
))
1417 + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM
+1) * sizeof(ZSTD_optimal_t
));
1418 size_t const lazyAdditionalSpace
= ZSTD_rowMatchFinderUsed(cParams
->strategy
, useRowMatchFinder
)
1419 ? ZSTD_cwksp_aligned_alloc_size(hSize
*sizeof(U16
))
1421 size_t const optSpace
= (forCCtx
&& (cParams
->strategy
>= ZSTD_btopt
))
1424 size_t const slackSpace
= ZSTD_cwksp_slack_space_required();
1426 /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
1427 ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN
>= 4 && ZSTD_WINDOWLOG_MIN
>= 4 && ZSTD_CHAINLOG_MIN
>= 4);
1428 assert(useRowMatchFinder
!= ZSTD_urm_auto
);
1430 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
1431 (U32
)chainSize
, (U32
)hSize
, (U32
)h3Size
);
1432 return tableSpace
+ optSpace
+ slackSpace
+ lazyAdditionalSpace
;
1435 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1436 const ZSTD_compressionParameters
* cParams
,
1437 const ldmParams_t
* ldmParams
,
1439 const ZSTD_useRowMatchFinderMode_e useRowMatchFinder
,
1440 const size_t buffInSize
,
1441 const size_t buffOutSize
,
1442 const U64 pledgedSrcSize
)
1444 size_t const windowSize
= MAX(1, (size_t)MIN(((U64
)1 << cParams
->windowLog
), pledgedSrcSize
));
1445 size_t const blockSize
= MIN(ZSTD_BLOCKSIZE_MAX
, windowSize
);
1446 U32
const divider
= (cParams
->minMatch
==3) ? 3 : 4;
1447 size_t const maxNbSeq
= blockSize
/ divider
;
1448 size_t const tokenSpace
= ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH
+ blockSize
)
1449 + ZSTD_cwksp_aligned_alloc_size(maxNbSeq
* sizeof(seqDef
))
1450 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq
* sizeof(BYTE
));
1451 size_t const entropySpace
= ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE
);
1452 size_t const blockStateSpace
= 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t
));
1453 size_t const matchStateSize
= ZSTD_sizeof_matchState(cParams
, useRowMatchFinder
, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
1455 size_t const ldmSpace
= ZSTD_ldm_getTableSize(*ldmParams
);
1456 size_t const maxNbLdmSeq
= ZSTD_ldm_getMaxNbSeq(*ldmParams
, blockSize
);
1457 size_t const ldmSeqSpace
= ldmParams
->enableLdm
?
1458 ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq
* sizeof(rawSeq
)) : 0;
1461 size_t const bufferSpace
= ZSTD_cwksp_alloc_size(buffInSize
)
1462 + ZSTD_cwksp_alloc_size(buffOutSize
);
1464 size_t const cctxSpace
= isStatic
? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx
)) : 0;
1466 size_t const neededSpace
=
1476 DEBUGLOG(5, "estimate workspace : %u", (U32
)neededSpace
);
1480 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params
* params
)
1482 ZSTD_compressionParameters
const cParams
=
1483 ZSTD_getCParamsFromCCtxParams(params
, ZSTD_CONTENTSIZE_UNKNOWN
, 0, ZSTD_cpm_noAttachDict
);
1484 ZSTD_useRowMatchFinderMode_e
const useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(params
->useRowMatchFinder
,
1487 RETURN_ERROR_IF(params
->nbWorkers
> 0, GENERIC
, "Estimate CCtx size is supported for single-threaded compression only.");
1488 /* estimateCCtxSize is for one-shot compression. So no buffers should
1489 * be needed. However, we still allocate two 0-sized buffers, which can
1490 * take space under ASAN. */
1491 return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1492 &cParams
, ¶ms
->ldmParams
, 1, useRowMatchFinder
, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN
);
1495 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams
)
1497 ZSTD_CCtx_params initialParams
= ZSTD_makeCCtxParamsFromCParams(cParams
);
1498 if (ZSTD_rowMatchFinderSupported(cParams
.strategy
)) {
1499 /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1500 size_t noRowCCtxSize
;
1502 initialParams
.useRowMatchFinder
= ZSTD_urm_disableRowMatchFinder
;
1503 noRowCCtxSize
= ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams
);
1504 initialParams
.useRowMatchFinder
= ZSTD_urm_enableRowMatchFinder
;
1505 rowCCtxSize
= ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams
);
1506 return MAX(noRowCCtxSize
, rowCCtxSize
);
1508 return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams
);
1512 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel
)
1515 size_t largestSize
= 0;
1516 static const unsigned long long srcSizeTiers
[4] = {16 KB
, 128 KB
, 256 KB
, ZSTD_CONTENTSIZE_UNKNOWN
};
1517 for (; tier
< 4; ++tier
) {
1518 /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
1519 ZSTD_compressionParameters
const cParams
= ZSTD_getCParams_internal(compressionLevel
, srcSizeTiers
[tier
], 0, ZSTD_cpm_noAttachDict
);
1520 largestSize
= MAX(ZSTD_estimateCCtxSize_usingCParams(cParams
), largestSize
);
1525 size_t ZSTD_estimateCCtxSize(int compressionLevel
)
1528 size_t memBudget
= 0;
1529 for (level
=MIN(compressionLevel
, 1); level
<=compressionLevel
; level
++) {
1530 /* Ensure monotonically increasing memory usage as compression level increases */
1531 size_t const newMB
= ZSTD_estimateCCtxSize_internal(level
);
1532 if (newMB
> memBudget
) memBudget
= newMB
;
1537 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params
* params
)
1539 RETURN_ERROR_IF(params
->nbWorkers
> 0, GENERIC
, "Estimate CCtx size is supported for single-threaded compression only.");
1540 { ZSTD_compressionParameters
const cParams
=
1541 ZSTD_getCParamsFromCCtxParams(params
, ZSTD_CONTENTSIZE_UNKNOWN
, 0, ZSTD_cpm_noAttachDict
);
1542 size_t const blockSize
= MIN(ZSTD_BLOCKSIZE_MAX
, (size_t)1 << cParams
.windowLog
);
1543 size_t const inBuffSize
= (params
->inBufferMode
== ZSTD_bm_buffered
)
1544 ? ((size_t)1 << cParams
.windowLog
) + blockSize
1546 size_t const outBuffSize
= (params
->outBufferMode
== ZSTD_bm_buffered
)
1547 ? ZSTD_compressBound(blockSize
) + 1
1549 ZSTD_useRowMatchFinderMode_e
const useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(params
->useRowMatchFinder
, ¶ms
->cParams
);
1551 return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1552 &cParams
, ¶ms
->ldmParams
, 1, useRowMatchFinder
, inBuffSize
, outBuffSize
,
1553 ZSTD_CONTENTSIZE_UNKNOWN
);
1557 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams
)
1559 ZSTD_CCtx_params initialParams
= ZSTD_makeCCtxParamsFromCParams(cParams
);
1560 if (ZSTD_rowMatchFinderSupported(cParams
.strategy
)) {
1561 /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1562 size_t noRowCCtxSize
;
1564 initialParams
.useRowMatchFinder
= ZSTD_urm_disableRowMatchFinder
;
1565 noRowCCtxSize
= ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams
);
1566 initialParams
.useRowMatchFinder
= ZSTD_urm_enableRowMatchFinder
;
1567 rowCCtxSize
= ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams
);
1568 return MAX(noRowCCtxSize
, rowCCtxSize
);
1570 return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams
);
1574 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel
)
1576 ZSTD_compressionParameters
const cParams
= ZSTD_getCParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, 0, ZSTD_cpm_noAttachDict
);
1577 return ZSTD_estimateCStreamSize_usingCParams(cParams
);
1580 size_t ZSTD_estimateCStreamSize(int compressionLevel
)
1583 size_t memBudget
= 0;
1584 for (level
=MIN(compressionLevel
, 1); level
<=compressionLevel
; level
++) {
1585 size_t const newMB
= ZSTD_estimateCStreamSize_internal(level
);
1586 if (newMB
> memBudget
) memBudget
= newMB
;
1591 /* ZSTD_getFrameProgression():
1592 * tells how much data has been consumed (input) and produced (output) for current frame.
1593 * able to count progression inside worker threads (non-blocking mode).
1595 ZSTD_frameProgression
ZSTD_getFrameProgression(const ZSTD_CCtx
* cctx
)
1597 #ifdef ZSTD_MULTITHREAD
1598 if (cctx
->appliedParams
.nbWorkers
> 0) {
1599 return ZSTDMT_getFrameProgression(cctx
->mtctx
);
1602 { ZSTD_frameProgression fp
;
1603 size_t const buffered
= (cctx
->inBuff
== NULL
) ? 0 :
1604 cctx
->inBuffPos
- cctx
->inToCompress
;
1605 if (buffered
) assert(cctx
->inBuffPos
>= cctx
->inToCompress
);
1606 assert(buffered
<= ZSTD_BLOCKSIZE_MAX
);
1607 fp
.ingested
= cctx
->consumedSrcSize
+ buffered
;
1608 fp
.consumed
= cctx
->consumedSrcSize
;
1609 fp
.produced
= cctx
->producedCSize
;
1610 fp
.flushed
= cctx
->producedCSize
; /* simplified; some data might still be left within streaming output buffer */
1611 fp
.currentJobID
= 0;
1612 fp
.nbActiveWorkers
= 0;
1616 /*! ZSTD_toFlushNow()
1617 * Only useful for multithreading scenarios currently (nbWorkers >= 1).
1619 size_t ZSTD_toFlushNow(ZSTD_CCtx
* cctx
)
1621 #ifdef ZSTD_MULTITHREAD
1622 if (cctx
->appliedParams
.nbWorkers
> 0) {
1623 return ZSTDMT_toFlushNow(cctx
->mtctx
);
1627 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
1630 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1
,
1631 ZSTD_compressionParameters cParams2
)
1635 assert(cParams1
.windowLog
== cParams2
.windowLog
);
1636 assert(cParams1
.chainLog
== cParams2
.chainLog
);
1637 assert(cParams1
.hashLog
== cParams2
.hashLog
);
1638 assert(cParams1
.searchLog
== cParams2
.searchLog
);
1639 assert(cParams1
.minMatch
== cParams2
.minMatch
);
1640 assert(cParams1
.targetLength
== cParams2
.targetLength
);
1641 assert(cParams1
.strategy
== cParams2
.strategy
);
1644 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t
* bs
)
1647 for (i
= 0; i
< ZSTD_REP_NUM
; ++i
)
1648 bs
->rep
[i
] = repStartValue
[i
];
1649 bs
->entropy
.huf
.repeatMode
= HUF_repeat_none
;
1650 bs
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_none
;
1651 bs
->entropy
.fse
.matchlength_repeatMode
= FSE_repeat_none
;
1652 bs
->entropy
.fse
.litlength_repeatMode
= FSE_repeat_none
;
1655 /*! ZSTD_invalidateMatchState()
1656 * Invalidate all the matches in the match finder tables.
1657 * Requires nextSrc and base to be set (can be NULL).
1659 static void ZSTD_invalidateMatchState(ZSTD_matchState_t
* ms
)
1661 ZSTD_window_clear(&ms
->window
);
1663 ms
->nextToUpdate
= ms
->window
.dictLimit
;
1664 ms
->loadedDictEnd
= 0;
1665 ms
->opt
.litLengthSum
= 0; /* force reset of btopt stats */
1666 ms
->dictMatchState
= NULL
;
1670 * Controls, for this matchState reset, whether the tables need to be cleared /
1671 * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
1672 * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
1673 * subsequent operation will overwrite the table space anyways (e.g., copying
1674 * the matchState contents in from a CDict).
1679 } ZSTD_compResetPolicy_e
;
1682 * Controls, for this matchState reset, whether indexing can continue where it
1683 * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
1689 } ZSTD_indexResetPolicy_e
;
1692 ZSTD_resetTarget_CDict
,
1693 ZSTD_resetTarget_CCtx
1694 } ZSTD_resetTarget_e
;
1698 ZSTD_reset_matchState(ZSTD_matchState_t
* ms
,
1700 const ZSTD_compressionParameters
* cParams
,
1701 const ZSTD_useRowMatchFinderMode_e useRowMatchFinder
,
1702 const ZSTD_compResetPolicy_e crp
,
1703 const ZSTD_indexResetPolicy_e forceResetIndex
,
1704 const ZSTD_resetTarget_e forWho
)
1706 /* disable chain table allocation for fast or row-based strategies */
1707 size_t const chainSize
= ZSTD_allocateChainTable(cParams
->strategy
, useRowMatchFinder
,
1708 ms
->dedicatedDictSearch
&& (forWho
== ZSTD_resetTarget_CDict
))
1709 ? ((size_t)1 << cParams
->chainLog
)
1711 size_t const hSize
= ((size_t)1) << cParams
->hashLog
;
1712 U32
const hashLog3
= ((forWho
== ZSTD_resetTarget_CCtx
) && cParams
->minMatch
==3) ? MIN(ZSTD_HASHLOG3_MAX
, cParams
->windowLog
) : 0;
1713 size_t const h3Size
= hashLog3
? ((size_t)1) << hashLog3
: 0;
1715 DEBUGLOG(4, "reset indices : %u", forceResetIndex
== ZSTDirp_reset
);
1716 assert(useRowMatchFinder
!= ZSTD_urm_auto
);
1717 if (forceResetIndex
== ZSTDirp_reset
) {
1718 ZSTD_window_init(&ms
->window
);
1719 ZSTD_cwksp_mark_tables_dirty(ws
);
1722 ms
->hashLog3
= hashLog3
;
1724 ZSTD_invalidateMatchState(ms
);
1726 assert(!ZSTD_cwksp_reserve_failed(ws
)); /* check that allocation hasn't already failed */
1728 ZSTD_cwksp_clear_tables(ws
);
1730 DEBUGLOG(5, "reserving table space");
1732 ms
->hashTable
= (U32
*)ZSTD_cwksp_reserve_table(ws
, hSize
* sizeof(U32
));
1733 ms
->chainTable
= (U32
*)ZSTD_cwksp_reserve_table(ws
, chainSize
* sizeof(U32
));
1734 ms
->hashTable3
= (U32
*)ZSTD_cwksp_reserve_table(ws
, h3Size
* sizeof(U32
));
1735 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws
), memory_allocation
,
1736 "failed a workspace allocation in ZSTD_reset_matchState");
1738 DEBUGLOG(4, "reset table : %u", crp
!=ZSTDcrp_leaveDirty
);
1739 if (crp
!=ZSTDcrp_leaveDirty
) {
1740 /* reset tables only */
1741 ZSTD_cwksp_clean_tables(ws
);
1744 /* opt parser space */
1745 if ((forWho
== ZSTD_resetTarget_CCtx
) && (cParams
->strategy
>= ZSTD_btopt
)) {
1746 DEBUGLOG(4, "reserving optimal parser space");
1747 ms
->opt
.litFreq
= (unsigned*)ZSTD_cwksp_reserve_aligned(ws
, (1<<Litbits
) * sizeof(unsigned));
1748 ms
->opt
.litLengthFreq
= (unsigned*)ZSTD_cwksp_reserve_aligned(ws
, (MaxLL
+1) * sizeof(unsigned));
1749 ms
->opt
.matchLengthFreq
= (unsigned*)ZSTD_cwksp_reserve_aligned(ws
, (MaxML
+1) * sizeof(unsigned));
1750 ms
->opt
.offCodeFreq
= (unsigned*)ZSTD_cwksp_reserve_aligned(ws
, (MaxOff
+1) * sizeof(unsigned));
1751 ms
->opt
.matchTable
= (ZSTD_match_t
*)ZSTD_cwksp_reserve_aligned(ws
, (ZSTD_OPT_NUM
+1) * sizeof(ZSTD_match_t
));
1752 ms
->opt
.priceTable
= (ZSTD_optimal_t
*)ZSTD_cwksp_reserve_aligned(ws
, (ZSTD_OPT_NUM
+1) * sizeof(ZSTD_optimal_t
));
1755 if (ZSTD_rowMatchFinderUsed(cParams
->strategy
, useRowMatchFinder
)) {
1756 { /* Row match finder needs an additional table of hashes ("tags") */
1757 size_t const tagTableSize
= hSize
*sizeof(U16
);
1758 ms
->tagTable
= (U16
*)ZSTD_cwksp_reserve_aligned(ws
, tagTableSize
);
1759 if (ms
->tagTable
) ZSTD_memset(ms
->tagTable
, 0, tagTableSize
);
1761 { /* Switch to 32-entry rows if searchLog is 5 (or more) */
1762 U32
const rowLog
= cParams
->searchLog
< 5 ? 4 : 5;
1763 assert(cParams
->hashLog
> rowLog
);
1764 ms
->rowHashLog
= cParams
->hashLog
- rowLog
;
1768 ms
->cParams
= *cParams
;
1770 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws
), memory_allocation
,
1771 "failed a workspace allocation in ZSTD_reset_matchState");
1775 /* ZSTD_indexTooCloseToMax() :
1776 * minor optimization : prefer memset() rather than reduceIndex()
1777 * which is measurably slow in some circumstances (reported for Visual Studio).
1778 * Works when re-using a context for a lot of smallish inputs :
1779 * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
1780 * memset() will be triggered before reduceIndex().
1782 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
1783 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w
)
1785 return (size_t)(w
.nextSrc
- w
.base
) > (ZSTD_CURRENT_MAX
- ZSTD_INDEXOVERFLOW_MARGIN
);
1788 /** ZSTD_dictTooBig():
1789 * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
1790 * one go generically. So we ensure that in that case we reset the tables to zero,
1791 * so that we can load as much of the dictionary as possible.
1793 static int ZSTD_dictTooBig(size_t const loadedDictSize
)
1795 return loadedDictSize
> ZSTD_CHUNKSIZE_MAX
;
1798 /*! ZSTD_resetCCtx_internal() :
1799 * @param loadedDictSize The size of the dictionary to be loaded
1800 * into the context, if any. If no dictionary is used, or the
1801 * dictionary is being attached / copied, then pass 0.
1802 * note : `params` are assumed fully validated at this stage.
1804 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx
* zc
,
1805 ZSTD_CCtx_params
const* params
,
1806 U64
const pledgedSrcSize
,
1807 size_t const loadedDictSize
,
1808 ZSTD_compResetPolicy_e
const crp
,
1809 ZSTD_buffered_policy_e
const zbuff
)
1811 ZSTD_cwksp
* const ws
= &zc
->workspace
;
1812 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d",
1813 (U32
)pledgedSrcSize
, params
->cParams
.windowLog
, (int)params
->useRowMatchFinder
);
1814 assert(!ZSTD_isError(ZSTD_checkCParams(params
->cParams
)));
1816 zc
->isFirstBlock
= 1;
1818 /* Set applied params early so we can modify them for LDM,
1819 * and point params at the applied params.
1821 zc
->appliedParams
= *params
;
1822 params
= &zc
->appliedParams
;
1824 assert(params
->useRowMatchFinder
!= ZSTD_urm_auto
);
1825 if (params
->ldmParams
.enableLdm
) {
1826 /* Adjust long distance matching parameters */
1827 ZSTD_ldm_adjustParameters(&zc
->appliedParams
.ldmParams
, ¶ms
->cParams
);
1828 assert(params
->ldmParams
.hashLog
>= params
->ldmParams
.bucketSizeLog
);
1829 assert(params
->ldmParams
.hashRateLog
< 32);
1832 { size_t const windowSize
= MAX(1, (size_t)MIN(((U64
)1 << params
->cParams
.windowLog
), pledgedSrcSize
));
1833 size_t const blockSize
= MIN(ZSTD_BLOCKSIZE_MAX
, windowSize
);
1834 U32
const divider
= (params
->cParams
.minMatch
==3) ? 3 : 4;
1835 size_t const maxNbSeq
= blockSize
/ divider
;
1836 size_t const buffOutSize
= (zbuff
== ZSTDb_buffered
&& params
->outBufferMode
== ZSTD_bm_buffered
)
1837 ? ZSTD_compressBound(blockSize
) + 1
1839 size_t const buffInSize
= (zbuff
== ZSTDb_buffered
&& params
->inBufferMode
== ZSTD_bm_buffered
)
1840 ? windowSize
+ blockSize
1842 size_t const maxNbLdmSeq
= ZSTD_ldm_getMaxNbSeq(params
->ldmParams
, blockSize
);
1844 int const indexTooClose
= ZSTD_indexTooCloseToMax(zc
->blockState
.matchState
.window
);
1845 int const dictTooBig
= ZSTD_dictTooBig(loadedDictSize
);
1846 ZSTD_indexResetPolicy_e needsIndexReset
=
1847 (indexTooClose
|| dictTooBig
|| !zc
->initialized
) ? ZSTDirp_reset
: ZSTDirp_continue
;
1849 size_t const neededSpace
=
1850 ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1851 ¶ms
->cParams
, ¶ms
->ldmParams
, zc
->staticSize
!= 0, params
->useRowMatchFinder
,
1852 buffInSize
, buffOutSize
, pledgedSrcSize
);
1853 int resizeWorkspace
;
1855 FORWARD_IF_ERROR(neededSpace
, "cctx size estimate failed!");
1857 if (!zc
->staticSize
) ZSTD_cwksp_bump_oversized_duration(ws
, 0);
1859 { /* Check if workspace is large enough, alloc a new one if needed */
1860 int const workspaceTooSmall
= ZSTD_cwksp_sizeof(ws
) < neededSpace
;
1861 int const workspaceWasteful
= ZSTD_cwksp_check_wasteful(ws
, neededSpace
);
1862 resizeWorkspace
= workspaceTooSmall
|| workspaceWasteful
;
1863 DEBUGLOG(4, "Need %zu B workspace", neededSpace
);
1864 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize
, blockSize
);
1866 if (resizeWorkspace
) {
1867 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
1868 ZSTD_cwksp_sizeof(ws
) >> 10,
1871 RETURN_ERROR_IF(zc
->staticSize
, memory_allocation
, "static cctx : no resize");
1873 needsIndexReset
= ZSTDirp_reset
;
1875 ZSTD_cwksp_free(ws
, zc
->customMem
);
1876 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws
, neededSpace
, zc
->customMem
), "");
1878 DEBUGLOG(5, "reserving object space");
1879 /* Statically sized space.
1880 * entropyWorkspace never moves,
1881 * though prev/next block swap places */
1882 assert(ZSTD_cwksp_check_available(ws
, 2 * sizeof(ZSTD_compressedBlockState_t
)));
1883 zc
->blockState
.prevCBlock
= (ZSTD_compressedBlockState_t
*) ZSTD_cwksp_reserve_object(ws
, sizeof(ZSTD_compressedBlockState_t
));
1884 RETURN_ERROR_IF(zc
->blockState
.prevCBlock
== NULL
, memory_allocation
, "couldn't allocate prevCBlock");
1885 zc
->blockState
.nextCBlock
= (ZSTD_compressedBlockState_t
*) ZSTD_cwksp_reserve_object(ws
, sizeof(ZSTD_compressedBlockState_t
));
1886 RETURN_ERROR_IF(zc
->blockState
.nextCBlock
== NULL
, memory_allocation
, "couldn't allocate nextCBlock");
1887 zc
->entropyWorkspace
= (U32
*) ZSTD_cwksp_reserve_object(ws
, ENTROPY_WORKSPACE_SIZE
);
1888 RETURN_ERROR_IF(zc
->blockState
.nextCBlock
== NULL
, memory_allocation
, "couldn't allocate entropyWorkspace");
1891 ZSTD_cwksp_clear(ws
);
1894 zc
->blockState
.matchState
.cParams
= params
->cParams
;
1895 zc
->pledgedSrcSizePlusOne
= pledgedSrcSize
+1;
1896 zc
->consumedSrcSize
= 0;
1897 zc
->producedCSize
= 0;
1898 if (pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
)
1899 zc
->appliedParams
.fParams
.contentSizeFlag
= 0;
1900 DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1901 (unsigned)pledgedSrcSize
, zc
->appliedParams
.fParams
.contentSizeFlag
);
1902 zc
->blockSize
= blockSize
;
1904 XXH64_reset(&zc
->xxhState
, 0);
1905 zc
->stage
= ZSTDcs_init
;
1907 zc
->dictContentSize
= 0;
1909 ZSTD_reset_compressedBlockState(zc
->blockState
.prevCBlock
);
1911 /* ZSTD_wildcopy() is used to copy into the literals buffer,
1912 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1914 zc
->seqStore
.litStart
= ZSTD_cwksp_reserve_buffer(ws
, blockSize
+ WILDCOPY_OVERLENGTH
);
1915 zc
->seqStore
.maxNbLit
= blockSize
;
1918 zc
->bufferedPolicy
= zbuff
;
1919 zc
->inBuffSize
= buffInSize
;
1920 zc
->inBuff
= (char*)ZSTD_cwksp_reserve_buffer(ws
, buffInSize
);
1921 zc
->outBuffSize
= buffOutSize
;
1922 zc
->outBuff
= (char*)ZSTD_cwksp_reserve_buffer(ws
, buffOutSize
);
1924 /* ldm bucketOffsets table */
1925 if (params
->ldmParams
.enableLdm
) {
1926 /* TODO: avoid memset? */
1927 size_t const numBuckets
=
1928 ((size_t)1) << (params
->ldmParams
.hashLog
-
1929 params
->ldmParams
.bucketSizeLog
);
1930 zc
->ldmState
.bucketOffsets
= ZSTD_cwksp_reserve_buffer(ws
, numBuckets
);
1931 ZSTD_memset(zc
->ldmState
.bucketOffsets
, 0, numBuckets
);
1934 /* sequences storage */
1935 ZSTD_referenceExternalSequences(zc
, NULL
, 0);
1936 zc
->seqStore
.maxNbSeq
= maxNbSeq
;
1937 zc
->seqStore
.llCode
= ZSTD_cwksp_reserve_buffer(ws
, maxNbSeq
* sizeof(BYTE
));
1938 zc
->seqStore
.mlCode
= ZSTD_cwksp_reserve_buffer(ws
, maxNbSeq
* sizeof(BYTE
));
1939 zc
->seqStore
.ofCode
= ZSTD_cwksp_reserve_buffer(ws
, maxNbSeq
* sizeof(BYTE
));
1940 zc
->seqStore
.sequencesStart
= (seqDef
*)ZSTD_cwksp_reserve_aligned(ws
, maxNbSeq
* sizeof(seqDef
));
1942 FORWARD_IF_ERROR(ZSTD_reset_matchState(
1943 &zc
->blockState
.matchState
,
1946 params
->useRowMatchFinder
,
1949 ZSTD_resetTarget_CCtx
), "");
1951 /* ldm hash table */
1952 if (params
->ldmParams
.enableLdm
) {
1953 /* TODO: avoid memset? */
1954 size_t const ldmHSize
= ((size_t)1) << params
->ldmParams
.hashLog
;
1955 zc
->ldmState
.hashTable
= (ldmEntry_t
*)ZSTD_cwksp_reserve_aligned(ws
, ldmHSize
* sizeof(ldmEntry_t
));
1956 ZSTD_memset(zc
->ldmState
.hashTable
, 0, ldmHSize
* sizeof(ldmEntry_t
));
1957 zc
->ldmSequences
= (rawSeq
*)ZSTD_cwksp_reserve_aligned(ws
, maxNbLdmSeq
* sizeof(rawSeq
));
1958 zc
->maxNbLdmSequences
= maxNbLdmSeq
;
1960 ZSTD_window_init(&zc
->ldmState
.window
);
1961 zc
->ldmState
.loadedDictEnd
= 0;
1964 assert(ZSTD_cwksp_estimated_space_within_bounds(ws
, neededSpace
, resizeWorkspace
));
1965 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws
));
1967 zc
->initialized
= 1;
1973 /* ZSTD_invalidateRepCodes() :
1974 * ensures next compression will not use repcodes from previous block.
1975 * Note : only works with regular variant;
1976 * do not use with extDict variant ! */
1977 void ZSTD_invalidateRepCodes(ZSTD_CCtx
* cctx
) {
1979 for (i
=0; i
<ZSTD_REP_NUM
; i
++) cctx
->blockState
.prevCBlock
->rep
[i
] = 0;
1980 assert(!ZSTD_window_hasExtDict(cctx
->blockState
.matchState
.window
));
1983 /* These are the approximate sizes for each strategy past which copying the
1984 * dictionary tables into the working context is faster than using them
1987 static const size_t attachDictSizeCutoffs
[ZSTD_STRATEGY_MAX
+1] = {
1989 8 KB
, /* ZSTD_fast */
1990 16 KB
, /* ZSTD_dfast */
1991 32 KB
, /* ZSTD_greedy */
1992 32 KB
, /* ZSTD_lazy */
1993 32 KB
, /* ZSTD_lazy2 */
1994 32 KB
, /* ZSTD_btlazy2 */
1995 32 KB
, /* ZSTD_btopt */
1996 8 KB
, /* ZSTD_btultra */
1997 8 KB
/* ZSTD_btultra2 */
2000 static int ZSTD_shouldAttachDict(const ZSTD_CDict
* cdict
,
2001 const ZSTD_CCtx_params
* params
,
2004 size_t cutoff
= attachDictSizeCutoffs
[cdict
->matchState
.cParams
.strategy
];
2005 int const dedicatedDictSearch
= cdict
->matchState
.dedicatedDictSearch
;
2006 return dedicatedDictSearch
2007 || ( ( pledgedSrcSize
<= cutoff
2008 || pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
2009 || params
->attachDictPref
== ZSTD_dictForceAttach
)
2010 && params
->attachDictPref
!= ZSTD_dictForceCopy
2011 && !params
->forceWindow
); /* dictMatchState isn't correctly
2012 * handled in _enforceMaxDist */
2016 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx
* cctx
,
2017 const ZSTD_CDict
* cdict
,
2018 ZSTD_CCtx_params params
,
2020 ZSTD_buffered_policy_e zbuff
)
2022 DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
2023 (unsigned long long)pledgedSrcSize
);
2025 ZSTD_compressionParameters adjusted_cdict_cParams
= cdict
->matchState
.cParams
;
2026 unsigned const windowLog
= params
.cParams
.windowLog
;
2027 assert(windowLog
!= 0);
2028 /* Resize working context table params for input only, since the dict
2029 * has its own tables. */
2030 /* pledgedSrcSize == 0 means 0! */
2032 if (cdict
->matchState
.dedicatedDictSearch
) {
2033 ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams
);
2036 params
.cParams
= ZSTD_adjustCParams_internal(adjusted_cdict_cParams
, pledgedSrcSize
,
2037 cdict
->dictContentSize
, ZSTD_cpm_attachDict
);
2038 params
.cParams
.windowLog
= windowLog
;
2039 params
.useRowMatchFinder
= cdict
->useRowMatchFinder
; /* cdict overrides */
2040 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx
, ¶ms
, pledgedSrcSize
,
2041 /* loadedDictSize */ 0,
2042 ZSTDcrp_makeClean
, zbuff
), "");
2043 assert(cctx
->appliedParams
.cParams
.strategy
== adjusted_cdict_cParams
.strategy
);
2046 { const U32 cdictEnd
= (U32
)( cdict
->matchState
.window
.nextSrc
2047 - cdict
->matchState
.window
.base
);
2048 const U32 cdictLen
= cdictEnd
- cdict
->matchState
.window
.dictLimit
;
2049 if (cdictLen
== 0) {
2050 /* don't even attach dictionaries with no contents */
2051 DEBUGLOG(4, "skipping attaching empty dictionary");
2053 DEBUGLOG(4, "attaching dictionary into context");
2054 cctx
->blockState
.matchState
.dictMatchState
= &cdict
->matchState
;
2056 /* prep working match state so dict matches never have negative indices
2057 * when they are translated to the working context's index space. */
2058 if (cctx
->blockState
.matchState
.window
.dictLimit
< cdictEnd
) {
2059 cctx
->blockState
.matchState
.window
.nextSrc
=
2060 cctx
->blockState
.matchState
.window
.base
+ cdictEnd
;
2061 ZSTD_window_clear(&cctx
->blockState
.matchState
.window
);
2063 /* loadedDictEnd is expressed within the referential of the active context */
2064 cctx
->blockState
.matchState
.loadedDictEnd
= cctx
->blockState
.matchState
.window
.dictLimit
;
2067 cctx
->dictID
= cdict
->dictID
;
2068 cctx
->dictContentSize
= cdict
->dictContentSize
;
2070 /* copy block state */
2071 ZSTD_memcpy(cctx
->blockState
.prevCBlock
, &cdict
->cBlockState
, sizeof(cdict
->cBlockState
));
2076 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx
* cctx
,
2077 const ZSTD_CDict
* cdict
,
2078 ZSTD_CCtx_params params
,
2080 ZSTD_buffered_policy_e zbuff
)
2082 const ZSTD_compressionParameters
*cdict_cParams
= &cdict
->matchState
.cParams
;
2084 assert(!cdict
->matchState
.dedicatedDictSearch
);
2085 DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
2086 (unsigned long long)pledgedSrcSize
);
2088 { unsigned const windowLog
= params
.cParams
.windowLog
;
2089 assert(windowLog
!= 0);
2090 /* Copy only compression parameters related to tables. */
2091 params
.cParams
= *cdict_cParams
;
2092 params
.cParams
.windowLog
= windowLog
;
2093 params
.useRowMatchFinder
= cdict
->useRowMatchFinder
;
2094 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx
, ¶ms
, pledgedSrcSize
,
2095 /* loadedDictSize */ 0,
2096 ZSTDcrp_leaveDirty
, zbuff
), "");
2097 assert(cctx
->appliedParams
.cParams
.strategy
== cdict_cParams
->strategy
);
2098 assert(cctx
->appliedParams
.cParams
.hashLog
== cdict_cParams
->hashLog
);
2099 assert(cctx
->appliedParams
.cParams
.chainLog
== cdict_cParams
->chainLog
);
2102 ZSTD_cwksp_mark_tables_dirty(&cctx
->workspace
);
2103 assert(params
.useRowMatchFinder
!= ZSTD_urm_auto
);
2106 { size_t const chainSize
= ZSTD_allocateChainTable(cdict_cParams
->strategy
, cdict
->useRowMatchFinder
, 0 /* DDS guaranteed disabled */)
2107 ? ((size_t)1 << cdict_cParams
->chainLog
)
2109 size_t const hSize
= (size_t)1 << cdict_cParams
->hashLog
;
2111 ZSTD_memcpy(cctx
->blockState
.matchState
.hashTable
,
2112 cdict
->matchState
.hashTable
,
2113 hSize
* sizeof(U32
));
2114 /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
2115 if (ZSTD_allocateChainTable(cctx
->appliedParams
.cParams
.strategy
, cctx
->appliedParams
.useRowMatchFinder
, 0 /* forDDSDict */)) {
2116 ZSTD_memcpy(cctx
->blockState
.matchState
.chainTable
,
2117 cdict
->matchState
.chainTable
,
2118 chainSize
* sizeof(U32
));
2120 /* copy tag table */
2121 if (ZSTD_rowMatchFinderUsed(cdict_cParams
->strategy
, cdict
->useRowMatchFinder
)) {
2122 size_t const tagTableSize
= hSize
*sizeof(U16
);
2123 ZSTD_memcpy(cctx
->blockState
.matchState
.tagTable
,
2124 cdict
->matchState
.tagTable
,
2129 /* Zero the hashTable3, since the cdict never fills it */
2130 { int const h3log
= cctx
->blockState
.matchState
.hashLog3
;
2131 size_t const h3Size
= h3log
? ((size_t)1 << h3log
) : 0;
2132 assert(cdict
->matchState
.hashLog3
== 0);
2133 ZSTD_memset(cctx
->blockState
.matchState
.hashTable3
, 0, h3Size
* sizeof(U32
));
2136 ZSTD_cwksp_mark_tables_clean(&cctx
->workspace
);
2138 /* copy dictionary offsets */
2139 { ZSTD_matchState_t
const* srcMatchState
= &cdict
->matchState
;
2140 ZSTD_matchState_t
* dstMatchState
= &cctx
->blockState
.matchState
;
2141 dstMatchState
->window
= srcMatchState
->window
;
2142 dstMatchState
->nextToUpdate
= srcMatchState
->nextToUpdate
;
2143 dstMatchState
->loadedDictEnd
= srcMatchState
->loadedDictEnd
;
2146 cctx
->dictID
= cdict
->dictID
;
2147 cctx
->dictContentSize
= cdict
->dictContentSize
;
2149 /* copy block state */
2150 ZSTD_memcpy(cctx
->blockState
.prevCBlock
, &cdict
->cBlockState
, sizeof(cdict
->cBlockState
));
2155 /* We have a choice between copying the dictionary context into the working
2156 * context, or referencing the dictionary context from the working context
2157 * in-place. We decide here which strategy to use. */
2158 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx
* cctx
,
2159 const ZSTD_CDict
* cdict
,
2160 const ZSTD_CCtx_params
* params
,
2162 ZSTD_buffered_policy_e zbuff
)
2165 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
2166 (unsigned)pledgedSrcSize
);
2168 if (ZSTD_shouldAttachDict(cdict
, params
, pledgedSrcSize
)) {
2169 return ZSTD_resetCCtx_byAttachingCDict(
2170 cctx
, cdict
, *params
, pledgedSrcSize
, zbuff
);
2172 return ZSTD_resetCCtx_byCopyingCDict(
2173 cctx
, cdict
, *params
, pledgedSrcSize
, zbuff
);
2177 /*! ZSTD_copyCCtx_internal() :
2178 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
2179 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
2180 * The "context", in this case, refers to the hash and chain tables,
2181 * entropy tables, and dictionary references.
2182 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
2183 * @return : 0, or an error code */
2184 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx
* dstCCtx
,
2185 const ZSTD_CCtx
* srcCCtx
,
2186 ZSTD_frameParameters fParams
,
2188 ZSTD_buffered_policy_e zbuff
)
2190 RETURN_ERROR_IF(srcCCtx
->stage
!=ZSTDcs_init
, stage_wrong
,
2191 "Can't copy a ctx that's not in init stage.");
2192 DEBUGLOG(5, "ZSTD_copyCCtx_internal");
2193 ZSTD_memcpy(&dstCCtx
->customMem
, &srcCCtx
->customMem
, sizeof(ZSTD_customMem
));
2194 { ZSTD_CCtx_params params
= dstCCtx
->requestedParams
;
2195 /* Copy only compression parameters related to tables. */
2196 params
.cParams
= srcCCtx
->appliedParams
.cParams
;
2197 assert(srcCCtx
->appliedParams
.useRowMatchFinder
!= ZSTD_urm_auto
);
2198 params
.useRowMatchFinder
= srcCCtx
->appliedParams
.useRowMatchFinder
;
2199 params
.fParams
= fParams
;
2200 ZSTD_resetCCtx_internal(dstCCtx
, ¶ms
, pledgedSrcSize
,
2201 /* loadedDictSize */ 0,
2202 ZSTDcrp_leaveDirty
, zbuff
);
2203 assert(dstCCtx
->appliedParams
.cParams
.windowLog
== srcCCtx
->appliedParams
.cParams
.windowLog
);
2204 assert(dstCCtx
->appliedParams
.cParams
.strategy
== srcCCtx
->appliedParams
.cParams
.strategy
);
2205 assert(dstCCtx
->appliedParams
.cParams
.hashLog
== srcCCtx
->appliedParams
.cParams
.hashLog
);
2206 assert(dstCCtx
->appliedParams
.cParams
.chainLog
== srcCCtx
->appliedParams
.cParams
.chainLog
);
2207 assert(dstCCtx
->blockState
.matchState
.hashLog3
== srcCCtx
->blockState
.matchState
.hashLog3
);
2210 ZSTD_cwksp_mark_tables_dirty(&dstCCtx
->workspace
);
2213 { size_t const chainSize
= ZSTD_allocateChainTable(srcCCtx
->appliedParams
.cParams
.strategy
,
2214 srcCCtx
->appliedParams
.useRowMatchFinder
,
2216 ? ((size_t)1 << srcCCtx
->appliedParams
.cParams
.chainLog
)
2218 size_t const hSize
= (size_t)1 << srcCCtx
->appliedParams
.cParams
.hashLog
;
2219 int const h3log
= srcCCtx
->blockState
.matchState
.hashLog3
;
2220 size_t const h3Size
= h3log
? ((size_t)1 << h3log
) : 0;
2222 ZSTD_memcpy(dstCCtx
->blockState
.matchState
.hashTable
,
2223 srcCCtx
->blockState
.matchState
.hashTable
,
2224 hSize
* sizeof(U32
));
2225 ZSTD_memcpy(dstCCtx
->blockState
.matchState
.chainTable
,
2226 srcCCtx
->blockState
.matchState
.chainTable
,
2227 chainSize
* sizeof(U32
));
2228 ZSTD_memcpy(dstCCtx
->blockState
.matchState
.hashTable3
,
2229 srcCCtx
->blockState
.matchState
.hashTable3
,
2230 h3Size
* sizeof(U32
));
2233 ZSTD_cwksp_mark_tables_clean(&dstCCtx
->workspace
);
2235 /* copy dictionary offsets */
2237 const ZSTD_matchState_t
* srcMatchState
= &srcCCtx
->blockState
.matchState
;
2238 ZSTD_matchState_t
* dstMatchState
= &dstCCtx
->blockState
.matchState
;
2239 dstMatchState
->window
= srcMatchState
->window
;
2240 dstMatchState
->nextToUpdate
= srcMatchState
->nextToUpdate
;
2241 dstMatchState
->loadedDictEnd
= srcMatchState
->loadedDictEnd
;
2243 dstCCtx
->dictID
= srcCCtx
->dictID
;
2244 dstCCtx
->dictContentSize
= srcCCtx
->dictContentSize
;
2246 /* copy block state */
2247 ZSTD_memcpy(dstCCtx
->blockState
.prevCBlock
, srcCCtx
->blockState
.prevCBlock
, sizeof(*srcCCtx
->blockState
.prevCBlock
));
2252 /*! ZSTD_copyCCtx() :
2253 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
2254 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
2255 * pledgedSrcSize==0 means "unknown".
2256 * @return : 0, or an error code */
2257 size_t ZSTD_copyCCtx(ZSTD_CCtx
* dstCCtx
, const ZSTD_CCtx
* srcCCtx
, unsigned long long pledgedSrcSize
)
2259 ZSTD_frameParameters fParams
= { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
2260 ZSTD_buffered_policy_e
const zbuff
= srcCCtx
->bufferedPolicy
;
2261 ZSTD_STATIC_ASSERT((U32
)ZSTDb_buffered
==1);
2262 if (pledgedSrcSize
==0) pledgedSrcSize
= ZSTD_CONTENTSIZE_UNKNOWN
;
2263 fParams
.contentSizeFlag
= (pledgedSrcSize
!= ZSTD_CONTENTSIZE_UNKNOWN
);
2265 return ZSTD_copyCCtx_internal(dstCCtx
, srcCCtx
,
2266 fParams
, pledgedSrcSize
,
2271 #define ZSTD_ROWSIZE 16
2272 /*! ZSTD_reduceTable() :
2273 * reduce table indexes by `reducerValue`, or squash to zero.
2274 * PreserveMark preserves "unsorted mark" for btlazy2 strategy.
2275 * It must be set to a clear 0/1 value, to remove branch during inlining.
2276 * Presume table size is a multiple of ZSTD_ROWSIZE
2277 * to help auto-vectorization */
2278 FORCE_INLINE_TEMPLATE
void
2279 ZSTD_reduceTable_internal (U32
* const table
, U32
const size
, U32
const reducerValue
, int const preserveMark
)
2281 int const nbRows
= (int)size
/ ZSTD_ROWSIZE
;
2284 assert((size
& (ZSTD_ROWSIZE
-1)) == 0); /* multiple of ZSTD_ROWSIZE */
2285 assert(size
< (1U<<31)); /* can be casted to int */
2287 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
2288 /* To validate that the table re-use logic is sound, and that we don't
2289 * access table space that we haven't cleaned, we re-"poison" the table
2290 * space every time we mark it dirty.
2292 * This function however is intended to operate on those dirty tables and
2293 * re-clean them. So when this function is used correctly, we can unpoison
2294 * the memory it operated on. This introduces a blind spot though, since
2295 * if we now try to operate on __actually__ poisoned memory, we will not
2297 __msan_unpoison(table
, size
* sizeof(U32
));
2300 for (rowNb
=0 ; rowNb
< nbRows
; rowNb
++) {
2302 for (column
=0; column
<ZSTD_ROWSIZE
; column
++) {
2304 U32
const adder
= (table
[cellNb
] == ZSTD_DUBT_UNSORTED_MARK
) ? reducerValue
: 0;
2305 table
[cellNb
] += adder
;
2307 if (table
[cellNb
] < reducerValue
) table
[cellNb
] = 0;
2308 else table
[cellNb
] -= reducerValue
;
2313 static void ZSTD_reduceTable(U32
* const table
, U32
const size
, U32
const reducerValue
)
2315 ZSTD_reduceTable_internal(table
, size
, reducerValue
, 0);
2318 static void ZSTD_reduceTable_btlazy2(U32
* const table
, U32
const size
, U32
const reducerValue
)
2320 ZSTD_reduceTable_internal(table
, size
, reducerValue
, 1);
2323 /*! ZSTD_reduceIndex() :
2324 * rescale all indexes to avoid future overflow (indexes are U32) */
2325 static void ZSTD_reduceIndex (ZSTD_matchState_t
* ms
, ZSTD_CCtx_params
const* params
, const U32 reducerValue
)
2327 { U32
const hSize
= (U32
)1 << params
->cParams
.hashLog
;
2328 ZSTD_reduceTable(ms
->hashTable
, hSize
, reducerValue
);
2331 if (ZSTD_allocateChainTable(params
->cParams
.strategy
, params
->useRowMatchFinder
, (U32
)ms
->dedicatedDictSearch
)) {
2332 U32
const chainSize
= (U32
)1 << params
->cParams
.chainLog
;
2333 if (params
->cParams
.strategy
== ZSTD_btlazy2
)
2334 ZSTD_reduceTable_btlazy2(ms
->chainTable
, chainSize
, reducerValue
);
2336 ZSTD_reduceTable(ms
->chainTable
, chainSize
, reducerValue
);
2340 U32
const h3Size
= (U32
)1 << ms
->hashLog3
;
2341 ZSTD_reduceTable(ms
->hashTable3
, h3Size
, reducerValue
);
2346 /*-*******************************************************
2347 * Block entropic compression
2348 *********************************************************/
2350 /* See doc/zstd_compression_format.md for detailed format description */
2352 void ZSTD_seqToCodes(const seqStore_t
* seqStorePtr
)
2354 const seqDef
* const sequences
= seqStorePtr
->sequencesStart
;
2355 BYTE
* const llCodeTable
= seqStorePtr
->llCode
;
2356 BYTE
* const ofCodeTable
= seqStorePtr
->ofCode
;
2357 BYTE
* const mlCodeTable
= seqStorePtr
->mlCode
;
2358 U32
const nbSeq
= (U32
)(seqStorePtr
->sequences
- seqStorePtr
->sequencesStart
);
2360 assert(nbSeq
<= seqStorePtr
->maxNbSeq
);
2361 for (u
=0; u
<nbSeq
; u
++) {
2362 U32
const llv
= sequences
[u
].litLength
;
2363 U32
const mlv
= sequences
[u
].matchLength
;
2364 llCodeTable
[u
] = (BYTE
)ZSTD_LLcode(llv
);
2365 ofCodeTable
[u
] = (BYTE
)ZSTD_highbit32(sequences
[u
].offset
);
2366 mlCodeTable
[u
] = (BYTE
)ZSTD_MLcode(mlv
);
2368 if (seqStorePtr
->longLengthType
==ZSTD_llt_literalLength
)
2369 llCodeTable
[seqStorePtr
->longLengthPos
] = MaxLL
;
2370 if (seqStorePtr
->longLengthType
==ZSTD_llt_matchLength
)
2371 mlCodeTable
[seqStorePtr
->longLengthPos
] = MaxML
;
2374 /* ZSTD_useTargetCBlockSize():
2375 * Returns if target compressed block size param is being used.
2376 * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
2377 * Returns 1 if true, 0 otherwise. */
2378 static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params
* cctxParams
)
2380 DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams
->targetCBlockSize
);
2381 return (cctxParams
->targetCBlockSize
!= 0);
2384 /* ZSTD_blockSplitterEnabled():
2385 * Returns if block splitting param is being used
2386 * If used, compression will do best effort to split a block in order to improve compression ratio.
2387 * Returns 1 if true, 0 otherwise. */
2388 static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params
* cctxParams
)
2390 DEBUGLOG(5, "ZSTD_blockSplitterEnabled(splitBlocks=%d)", cctxParams
->splitBlocks
);
2391 return (cctxParams
->splitBlocks
!= 0);
2394 /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
2395 * and size of the sequences statistics
2402 size_t lastCountSize
; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
2403 } ZSTD_symbolEncodingTypeStats_t
;
2405 /* ZSTD_buildSequencesStatistics():
2406 * Returns the size of the statistics for a given set of sequences, or a ZSTD error code,
2407 * Also modifies LLtype, Offtype, MLtype, and lastNCount to the appropriate values.
2409 * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
2411 static ZSTD_symbolEncodingTypeStats_t
2412 ZSTD_buildSequencesStatistics(seqStore_t
* seqStorePtr
, size_t nbSeq
,
2413 const ZSTD_fseCTables_t
* prevEntropy
, ZSTD_fseCTables_t
* nextEntropy
,
2414 BYTE
* dst
, const BYTE
* const dstEnd
,
2415 ZSTD_strategy strategy
, unsigned* countWorkspace
,
2416 void* entropyWorkspace
, size_t entropyWkspSize
) {
2417 BYTE
* const ostart
= dst
;
2418 const BYTE
* const oend
= dstEnd
;
2420 FSE_CTable
* CTable_LitLength
= nextEntropy
->litlengthCTable
;
2421 FSE_CTable
* CTable_OffsetBits
= nextEntropy
->offcodeCTable
;
2422 FSE_CTable
* CTable_MatchLength
= nextEntropy
->matchlengthCTable
;
2423 const BYTE
* const ofCodeTable
= seqStorePtr
->ofCode
;
2424 const BYTE
* const llCodeTable
= seqStorePtr
->llCode
;
2425 const BYTE
* const mlCodeTable
= seqStorePtr
->mlCode
;
2426 ZSTD_symbolEncodingTypeStats_t stats
;
2428 stats
.lastCountSize
= 0;
2429 /* convert length/distances into codes */
2430 ZSTD_seqToCodes(seqStorePtr
);
2432 /* build CTable for Literal Lengths */
2433 { unsigned max
= MaxLL
;
2434 size_t const mostFrequent
= HIST_countFast_wksp(countWorkspace
, &max
, llCodeTable
, nbSeq
, entropyWorkspace
, entropyWkspSize
); /* can't fail */
2435 DEBUGLOG(5, "Building LL table");
2436 nextEntropy
->litlength_repeatMode
= prevEntropy
->litlength_repeatMode
;
2437 stats
.LLtype
= ZSTD_selectEncodingType(&nextEntropy
->litlength_repeatMode
,
2438 countWorkspace
, max
, mostFrequent
, nbSeq
,
2439 LLFSELog
, prevEntropy
->litlengthCTable
,
2440 LL_defaultNorm
, LL_defaultNormLog
,
2441 ZSTD_defaultAllowed
, strategy
);
2442 assert(set_basic
< set_compressed
&& set_rle
< set_compressed
);
2443 assert(!(stats
.LLtype
< set_compressed
&& nextEntropy
->litlength_repeatMode
!= FSE_repeat_none
)); /* We don't copy tables */
2444 { size_t const countSize
= ZSTD_buildCTable(
2445 op
, (size_t)(oend
- op
),
2446 CTable_LitLength
, LLFSELog
, (symbolEncodingType_e
)stats
.LLtype
,
2447 countWorkspace
, max
, llCodeTable
, nbSeq
,
2448 LL_defaultNorm
, LL_defaultNormLog
, MaxLL
,
2449 prevEntropy
->litlengthCTable
,
2450 sizeof(prevEntropy
->litlengthCTable
),
2451 entropyWorkspace
, entropyWkspSize
);
2452 if (ZSTD_isError(countSize
)) {
2453 DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
2454 stats
.size
= countSize
;
2457 if (stats
.LLtype
== set_compressed
)
2458 stats
.lastCountSize
= countSize
;
2462 /* build CTable for Offsets */
2463 { unsigned max
= MaxOff
;
2464 size_t const mostFrequent
= HIST_countFast_wksp(
2465 countWorkspace
, &max
, ofCodeTable
, nbSeq
, entropyWorkspace
, entropyWkspSize
); /* can't fail */
2466 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2467 ZSTD_defaultPolicy_e
const defaultPolicy
= (max
<= DefaultMaxOff
) ? ZSTD_defaultAllowed
: ZSTD_defaultDisallowed
;
2468 DEBUGLOG(5, "Building OF table");
2469 nextEntropy
->offcode_repeatMode
= prevEntropy
->offcode_repeatMode
;
2470 stats
.Offtype
= ZSTD_selectEncodingType(&nextEntropy
->offcode_repeatMode
,
2471 countWorkspace
, max
, mostFrequent
, nbSeq
,
2472 OffFSELog
, prevEntropy
->offcodeCTable
,
2473 OF_defaultNorm
, OF_defaultNormLog
,
2474 defaultPolicy
, strategy
);
2475 assert(!(stats
.Offtype
< set_compressed
&& nextEntropy
->offcode_repeatMode
!= FSE_repeat_none
)); /* We don't copy tables */
2476 { size_t const countSize
= ZSTD_buildCTable(
2477 op
, (size_t)(oend
- op
),
2478 CTable_OffsetBits
, OffFSELog
, (symbolEncodingType_e
)stats
.Offtype
,
2479 countWorkspace
, max
, ofCodeTable
, nbSeq
,
2480 OF_defaultNorm
, OF_defaultNormLog
, DefaultMaxOff
,
2481 prevEntropy
->offcodeCTable
,
2482 sizeof(prevEntropy
->offcodeCTable
),
2483 entropyWorkspace
, entropyWkspSize
);
2484 if (ZSTD_isError(countSize
)) {
2485 DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
2486 stats
.size
= countSize
;
2489 if (stats
.Offtype
== set_compressed
)
2490 stats
.lastCountSize
= countSize
;
2494 /* build CTable for MatchLengths */
2495 { unsigned max
= MaxML
;
2496 size_t const mostFrequent
= HIST_countFast_wksp(
2497 countWorkspace
, &max
, mlCodeTable
, nbSeq
, entropyWorkspace
, entropyWkspSize
); /* can't fail */
2498 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend
-op
));
2499 nextEntropy
->matchlength_repeatMode
= prevEntropy
->matchlength_repeatMode
;
2500 stats
.MLtype
= ZSTD_selectEncodingType(&nextEntropy
->matchlength_repeatMode
,
2501 countWorkspace
, max
, mostFrequent
, nbSeq
,
2502 MLFSELog
, prevEntropy
->matchlengthCTable
,
2503 ML_defaultNorm
, ML_defaultNormLog
,
2504 ZSTD_defaultAllowed
, strategy
);
2505 assert(!(stats
.MLtype
< set_compressed
&& nextEntropy
->matchlength_repeatMode
!= FSE_repeat_none
)); /* We don't copy tables */
2506 { size_t const countSize
= ZSTD_buildCTable(
2507 op
, (size_t)(oend
- op
),
2508 CTable_MatchLength
, MLFSELog
, (symbolEncodingType_e
)stats
.MLtype
,
2509 countWorkspace
, max
, mlCodeTable
, nbSeq
,
2510 ML_defaultNorm
, ML_defaultNormLog
, MaxML
,
2511 prevEntropy
->matchlengthCTable
,
2512 sizeof(prevEntropy
->matchlengthCTable
),
2513 entropyWorkspace
, entropyWkspSize
);
2514 if (ZSTD_isError(countSize
)) {
2515 DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
2516 stats
.size
= countSize
;
2519 if (stats
.MLtype
== set_compressed
)
2520 stats
.lastCountSize
= countSize
;
2524 stats
.size
= (size_t)(op
-ostart
);
2528 /* ZSTD_entropyCompressSeqStore_internal():
2529 * compresses both literals and sequences
2530 * Returns compressed size of block, or a zstd error.
2533 ZSTD_entropyCompressSeqStore_internal(seqStore_t
* seqStorePtr
,
2534 const ZSTD_entropyCTables_t
* prevEntropy
,
2535 ZSTD_entropyCTables_t
* nextEntropy
,
2536 const ZSTD_CCtx_params
* cctxParams
,
2537 void* dst
, size_t dstCapacity
,
2538 void* entropyWorkspace
, size_t entropyWkspSize
,
2541 const int longOffsets
= cctxParams
->cParams
.windowLog
> STREAM_ACCUMULATOR_MIN
;
2542 ZSTD_strategy
const strategy
= cctxParams
->cParams
.strategy
;
2543 unsigned* count
= (unsigned*)entropyWorkspace
;
2544 FSE_CTable
* CTable_LitLength
= nextEntropy
->fse
.litlengthCTable
;
2545 FSE_CTable
* CTable_OffsetBits
= nextEntropy
->fse
.offcodeCTable
;
2546 FSE_CTable
* CTable_MatchLength
= nextEntropy
->fse
.matchlengthCTable
;
2547 const seqDef
* const sequences
= seqStorePtr
->sequencesStart
;
2548 const size_t nbSeq
= seqStorePtr
->sequences
- seqStorePtr
->sequencesStart
;
2549 const BYTE
* const ofCodeTable
= seqStorePtr
->ofCode
;
2550 const BYTE
* const llCodeTable
= seqStorePtr
->llCode
;
2551 const BYTE
* const mlCodeTable
= seqStorePtr
->mlCode
;
2552 BYTE
* const ostart
= (BYTE
*)dst
;
2553 BYTE
* const oend
= ostart
+ dstCapacity
;
2555 size_t lastCountSize
;
2557 entropyWorkspace
= count
+ (MaxSeq
+ 1);
2558 entropyWkspSize
-= (MaxSeq
+ 1) * sizeof(*count
);
2560 DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq
);
2561 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE
>= (1<<MAX(MLFSELog
,LLFSELog
)));
2562 assert(entropyWkspSize
>= HUF_WORKSPACE_SIZE
);
2564 /* Compress literals */
2565 { const BYTE
* const literals
= seqStorePtr
->litStart
;
2566 size_t const litSize
= (size_t)(seqStorePtr
->lit
- literals
);
2567 size_t const cSize
= ZSTD_compressLiterals(
2568 &prevEntropy
->huf
, &nextEntropy
->huf
,
2569 cctxParams
->cParams
.strategy
,
2570 ZSTD_disableLiteralsCompression(cctxParams
),
2573 entropyWorkspace
, entropyWkspSize
,
2575 FORWARD_IF_ERROR(cSize
, "ZSTD_compressLiterals failed");
2576 assert(cSize
<= dstCapacity
);
2580 /* Sequences Header */
2581 RETURN_ERROR_IF((oend
-op
) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
2582 dstSize_tooSmall
, "Can't fit seq hdr in output buf!");
2584 *op
++ = (BYTE
)nbSeq
;
2585 } else if (nbSeq
< LONGNBSEQ
) {
2586 op
[0] = (BYTE
)((nbSeq
>>8) + 0x80);
2587 op
[1] = (BYTE
)nbSeq
;
2591 MEM_writeLE16(op
+1, (U16
)(nbSeq
- LONGNBSEQ
));
2596 /* Copy the old tables over as if we repeated them */
2597 ZSTD_memcpy(&nextEntropy
->fse
, &prevEntropy
->fse
, sizeof(prevEntropy
->fse
));
2598 return (size_t)(op
- ostart
);
2601 ZSTD_symbolEncodingTypeStats_t stats
;
2602 BYTE
* seqHead
= op
++;
2603 /* build stats for sequences */
2604 stats
= ZSTD_buildSequencesStatistics(seqStorePtr
, nbSeq
,
2605 &prevEntropy
->fse
, &nextEntropy
->fse
,
2608 entropyWorkspace
, entropyWkspSize
);
2609 FORWARD_IF_ERROR(stats
.size
, "ZSTD_buildSequencesStatistics failed!");
2610 *seqHead
= (BYTE
)((stats
.LLtype
<<6) + (stats
.Offtype
<<4) + (stats
.MLtype
<<2));
2611 lastCountSize
= stats
.lastCountSize
;
2615 { size_t const bitstreamSize
= ZSTD_encodeSequences(
2616 op
, (size_t)(oend
- op
),
2617 CTable_MatchLength
, mlCodeTable
,
2618 CTable_OffsetBits
, ofCodeTable
,
2619 CTable_LitLength
, llCodeTable
,
2622 FORWARD_IF_ERROR(bitstreamSize
, "ZSTD_encodeSequences failed");
2623 op
+= bitstreamSize
;
2625 /* zstd versions <= 1.3.4 mistakenly report corruption when
2626 * FSE_readNCount() receives a buffer < 4 bytes.
2627 * Fixed by https://github.com/facebook/zstd/pull/1146.
2628 * This can happen when the last set_compressed table present is 2
2629 * bytes and the bitstream is only one byte.
2630 * In this exceedingly rare case, we will simply emit an uncompressed
2631 * block, since it isn't worth optimizing.
2633 if (lastCountSize
&& (lastCountSize
+ bitstreamSize
) < 4) {
2634 /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2635 assert(lastCountSize
+ bitstreamSize
== 3);
2636 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2637 "emitting an uncompressed block.");
2642 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op
- ostart
));
2643 return (size_t)(op
- ostart
);
2647 ZSTD_entropyCompressSeqStore(seqStore_t
* seqStorePtr
,
2648 const ZSTD_entropyCTables_t
* prevEntropy
,
2649 ZSTD_entropyCTables_t
* nextEntropy
,
2650 const ZSTD_CCtx_params
* cctxParams
,
2651 void* dst
, size_t dstCapacity
,
2653 void* entropyWorkspace
, size_t entropyWkspSize
,
2656 size_t const cSize
= ZSTD_entropyCompressSeqStore_internal(
2657 seqStorePtr
, prevEntropy
, nextEntropy
, cctxParams
,
2659 entropyWorkspace
, entropyWkspSize
, bmi2
);
2660 if (cSize
== 0) return 0;
2661 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2662 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
2664 if ((cSize
== ERROR(dstSize_tooSmall
)) & (srcSize
<= dstCapacity
))
2665 return 0; /* block not compressed */
2666 FORWARD_IF_ERROR(cSize
, "ZSTD_entropyCompressSeqStore_internal failed");
2668 /* Check compressibility */
2669 { size_t const maxCSize
= srcSize
- ZSTD_minGain(srcSize
, cctxParams
->cParams
.strategy
);
2670 if (cSize
>= maxCSize
) return 0; /* block not compressed */
2672 DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize
);
2676 /* ZSTD_selectBlockCompressor() :
2677 * Not static, but internal use only (used by long distance matcher)
2678 * assumption : strat is a valid strategy */
2679 ZSTD_blockCompressor
ZSTD_selectBlockCompressor(ZSTD_strategy strat
, ZSTD_useRowMatchFinderMode_e useRowMatchFinder
, ZSTD_dictMode_e dictMode
)
2681 static const ZSTD_blockCompressor blockCompressor
[4][ZSTD_STRATEGY_MAX
+1] = {
2682 { ZSTD_compressBlock_fast
/* default for 0 */,
2683 ZSTD_compressBlock_fast
,
2684 ZSTD_compressBlock_doubleFast
,
2685 ZSTD_compressBlock_greedy
,
2686 ZSTD_compressBlock_lazy
,
2687 ZSTD_compressBlock_lazy2
,
2688 ZSTD_compressBlock_btlazy2
,
2689 ZSTD_compressBlock_btopt
,
2690 ZSTD_compressBlock_btultra
,
2691 ZSTD_compressBlock_btultra2
},
2692 { ZSTD_compressBlock_fast_extDict
/* default for 0 */,
2693 ZSTD_compressBlock_fast_extDict
,
2694 ZSTD_compressBlock_doubleFast_extDict
,
2695 ZSTD_compressBlock_greedy_extDict
,
2696 ZSTD_compressBlock_lazy_extDict
,
2697 ZSTD_compressBlock_lazy2_extDict
,
2698 ZSTD_compressBlock_btlazy2_extDict
,
2699 ZSTD_compressBlock_btopt_extDict
,
2700 ZSTD_compressBlock_btultra_extDict
,
2701 ZSTD_compressBlock_btultra_extDict
},
2702 { ZSTD_compressBlock_fast_dictMatchState
/* default for 0 */,
2703 ZSTD_compressBlock_fast_dictMatchState
,
2704 ZSTD_compressBlock_doubleFast_dictMatchState
,
2705 ZSTD_compressBlock_greedy_dictMatchState
,
2706 ZSTD_compressBlock_lazy_dictMatchState
,
2707 ZSTD_compressBlock_lazy2_dictMatchState
,
2708 ZSTD_compressBlock_btlazy2_dictMatchState
,
2709 ZSTD_compressBlock_btopt_dictMatchState
,
2710 ZSTD_compressBlock_btultra_dictMatchState
,
2711 ZSTD_compressBlock_btultra_dictMatchState
},
2712 { NULL
/* default for 0 */,
2715 ZSTD_compressBlock_greedy_dedicatedDictSearch
,
2716 ZSTD_compressBlock_lazy_dedicatedDictSearch
,
2717 ZSTD_compressBlock_lazy2_dedicatedDictSearch
,
2723 ZSTD_blockCompressor selectedCompressor
;
2724 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast
== 1);
2726 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy
, strat
));
2727 DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode
, (int)strat
, (int)useRowMatchFinder
);
2728 if (ZSTD_rowMatchFinderUsed(strat
, useRowMatchFinder
)) {
2729 static const ZSTD_blockCompressor rowBasedBlockCompressors
[4][3] = {
2730 { ZSTD_compressBlock_greedy_row
,
2731 ZSTD_compressBlock_lazy_row
,
2732 ZSTD_compressBlock_lazy2_row
},
2733 { ZSTD_compressBlock_greedy_extDict_row
,
2734 ZSTD_compressBlock_lazy_extDict_row
,
2735 ZSTD_compressBlock_lazy2_extDict_row
},
2736 { ZSTD_compressBlock_greedy_dictMatchState_row
,
2737 ZSTD_compressBlock_lazy_dictMatchState_row
,
2738 ZSTD_compressBlock_lazy2_dictMatchState_row
},
2739 { ZSTD_compressBlock_greedy_dedicatedDictSearch_row
,
2740 ZSTD_compressBlock_lazy_dedicatedDictSearch_row
,
2741 ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
}
2743 DEBUGLOG(4, "Selecting a row-based matchfinder");
2744 assert(useRowMatchFinder
!= ZSTD_urm_auto
);
2745 selectedCompressor
= rowBasedBlockCompressors
[(int)dictMode
][(int)strat
- (int)ZSTD_greedy
];
2747 selectedCompressor
= blockCompressor
[(int)dictMode
][(int)strat
];
2749 assert(selectedCompressor
!= NULL
);
2750 return selectedCompressor
;
2753 static void ZSTD_storeLastLiterals(seqStore_t
* seqStorePtr
,
2754 const BYTE
* anchor
, size_t lastLLSize
)
2756 ZSTD_memcpy(seqStorePtr
->lit
, anchor
, lastLLSize
);
2757 seqStorePtr
->lit
+= lastLLSize
;
2760 void ZSTD_resetSeqStore(seqStore_t
* ssPtr
)
2762 ssPtr
->lit
= ssPtr
->litStart
;
2763 ssPtr
->sequences
= ssPtr
->sequencesStart
;
2764 ssPtr
->longLengthType
= ZSTD_llt_none
;
2767 typedef enum { ZSTDbss_compress
, ZSTDbss_noCompress
} ZSTD_buildSeqStore_e
;
2769 static size_t ZSTD_buildSeqStore(ZSTD_CCtx
* zc
, const void* src
, size_t srcSize
)
2771 ZSTD_matchState_t
* const ms
= &zc
->blockState
.matchState
;
2772 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize
);
2773 assert(srcSize
<= ZSTD_BLOCKSIZE_MAX
);
2774 /* Assert that we have correctly flushed the ctx params into the ms's copy */
2775 ZSTD_assertEqualCParams(zc
->appliedParams
.cParams
, ms
->cParams
);
2776 if (srcSize
< MIN_CBLOCK_SIZE
+ZSTD_blockHeaderSize
+1) {
2777 if (zc
->appliedParams
.cParams
.strategy
>= ZSTD_btopt
) {
2778 ZSTD_ldm_skipRawSeqStoreBytes(&zc
->externSeqStore
, srcSize
);
2780 ZSTD_ldm_skipSequences(&zc
->externSeqStore
, srcSize
, zc
->appliedParams
.cParams
.minMatch
);
2782 return ZSTDbss_noCompress
; /* don't even attempt compression below a certain srcSize */
2784 ZSTD_resetSeqStore(&(zc
->seqStore
));
2785 /* required for optimal parser to read stats from dictionary */
2786 ms
->opt
.symbolCosts
= &zc
->blockState
.prevCBlock
->entropy
;
2787 /* tell the optimal parser how we expect to compress literals */
2788 ms
->opt
.literalCompressionMode
= zc
->appliedParams
.literalCompressionMode
;
2789 /* a gap between an attached dict and the current window is not safe,
2790 * they must remain adjacent,
2791 * and when that stops being the case, the dict must be unset */
2792 assert(ms
->dictMatchState
== NULL
|| ms
->loadedDictEnd
== ms
->window
.dictLimit
);
2794 /* limited update after a very long match */
2795 { const BYTE
* const base
= ms
->window
.base
;
2796 const BYTE
* const istart
= (const BYTE
*)src
;
2797 const U32 curr
= (U32
)(istart
-base
);
2798 if (sizeof(ptrdiff_t)==8) assert(istart
- base
< (ptrdiff_t)(U32
)(-1)); /* ensure no overflow */
2799 if (curr
> ms
->nextToUpdate
+ 384)
2800 ms
->nextToUpdate
= curr
- MIN(192, (U32
)(curr
- ms
->nextToUpdate
- 384));
2803 /* select and store sequences */
2804 { ZSTD_dictMode_e
const dictMode
= ZSTD_matchState_dictMode(ms
);
2807 for (i
= 0; i
< ZSTD_REP_NUM
; ++i
)
2808 zc
->blockState
.nextCBlock
->rep
[i
] = zc
->blockState
.prevCBlock
->rep
[i
];
2810 if (zc
->externSeqStore
.pos
< zc
->externSeqStore
.size
) {
2811 assert(!zc
->appliedParams
.ldmParams
.enableLdm
);
2812 /* Updates ldmSeqStore.pos */
2814 ZSTD_ldm_blockCompress(&zc
->externSeqStore
,
2816 zc
->blockState
.nextCBlock
->rep
,
2817 zc
->appliedParams
.useRowMatchFinder
,
2819 assert(zc
->externSeqStore
.pos
<= zc
->externSeqStore
.size
);
2820 } else if (zc
->appliedParams
.ldmParams
.enableLdm
) {
2821 rawSeqStore_t ldmSeqStore
= kNullRawSeqStore
;
2823 ldmSeqStore
.seq
= zc
->ldmSequences
;
2824 ldmSeqStore
.capacity
= zc
->maxNbLdmSequences
;
2825 /* Updates ldmSeqStore.size */
2826 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc
->ldmState
, &ldmSeqStore
,
2827 &zc
->appliedParams
.ldmParams
,
2829 /* Updates ldmSeqStore.pos */
2831 ZSTD_ldm_blockCompress(&ldmSeqStore
,
2833 zc
->blockState
.nextCBlock
->rep
,
2834 zc
->appliedParams
.useRowMatchFinder
,
2836 assert(ldmSeqStore
.pos
== ldmSeqStore
.size
);
2837 } else { /* not long range mode */
2838 ZSTD_blockCompressor
const blockCompressor
= ZSTD_selectBlockCompressor(zc
->appliedParams
.cParams
.strategy
,
2839 zc
->appliedParams
.useRowMatchFinder
,
2841 ms
->ldmSeqStore
= NULL
;
2842 lastLLSize
= blockCompressor(ms
, &zc
->seqStore
, zc
->blockState
.nextCBlock
->rep
, src
, srcSize
);
2844 { const BYTE
* const lastLiterals
= (const BYTE
*)src
+ srcSize
- lastLLSize
;
2845 ZSTD_storeLastLiterals(&zc
->seqStore
, lastLiterals
, lastLLSize
);
2847 return ZSTDbss_compress
;
2850 static void ZSTD_copyBlockSequences(ZSTD_CCtx
* zc
)
2852 const seqStore_t
* seqStore
= ZSTD_getSeqStore(zc
);
2853 const seqDef
* seqStoreSeqs
= seqStore
->sequencesStart
;
2854 size_t seqStoreSeqSize
= seqStore
->sequences
- seqStoreSeqs
;
2855 size_t seqStoreLiteralsSize
= (size_t)(seqStore
->lit
- seqStore
->litStart
);
2856 size_t literalsRead
= 0;
2859 ZSTD_Sequence
* outSeqs
= &zc
->seqCollector
.seqStart
[zc
->seqCollector
.seqIndex
];
2861 repcodes_t updatedRepcodes
;
2863 assert(zc
->seqCollector
.seqIndex
+ 1 < zc
->seqCollector
.maxSequences
);
2864 /* Ensure we have enough space for last literals "sequence" */
2865 assert(zc
->seqCollector
.maxSequences
>= seqStoreSeqSize
+ 1);
2866 ZSTD_memcpy(updatedRepcodes
.rep
, zc
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
2867 for (i
= 0; i
< seqStoreSeqSize
; ++i
) {
2868 U32 rawOffset
= seqStoreSeqs
[i
].offset
- ZSTD_REP_NUM
;
2869 outSeqs
[i
].litLength
= seqStoreSeqs
[i
].litLength
;
2870 outSeqs
[i
].matchLength
= seqStoreSeqs
[i
].matchLength
+ MINMATCH
;
2873 if (i
== seqStore
->longLengthPos
) {
2874 if (seqStore
->longLengthType
== ZSTD_llt_literalLength
) {
2875 outSeqs
[i
].litLength
+= 0x10000;
2876 } else if (seqStore
->longLengthType
== ZSTD_llt_matchLength
) {
2877 outSeqs
[i
].matchLength
+= 0x10000;
2881 if (seqStoreSeqs
[i
].offset
<= ZSTD_REP_NUM
) {
2882 /* Derive the correct offset corresponding to a repcode */
2883 outSeqs
[i
].rep
= seqStoreSeqs
[i
].offset
;
2884 if (outSeqs
[i
].litLength
!= 0) {
2885 rawOffset
= updatedRepcodes
.rep
[outSeqs
[i
].rep
- 1];
2887 if (outSeqs
[i
].rep
== 3) {
2888 rawOffset
= updatedRepcodes
.rep
[0] - 1;
2890 rawOffset
= updatedRepcodes
.rep
[outSeqs
[i
].rep
];
2894 outSeqs
[i
].offset
= rawOffset
;
2895 /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
2896 so we provide seqStoreSeqs[i].offset - 1 */
2897 updatedRepcodes
= ZSTD_updateRep(updatedRepcodes
.rep
,
2898 seqStoreSeqs
[i
].offset
- 1,
2899 seqStoreSeqs
[i
].litLength
== 0);
2900 literalsRead
+= outSeqs
[i
].litLength
;
2902 /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
2903 * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
2904 * for the block boundary, according to the API.
2906 assert(seqStoreLiteralsSize
>= literalsRead
);
2907 lastLLSize
= seqStoreLiteralsSize
- literalsRead
;
2908 outSeqs
[i
].litLength
= (U32
)lastLLSize
;
2909 outSeqs
[i
].matchLength
= outSeqs
[i
].offset
= outSeqs
[i
].rep
= 0;
2911 zc
->seqCollector
.seqIndex
+= seqStoreSeqSize
;
2914 size_t ZSTD_generateSequences(ZSTD_CCtx
* zc
, ZSTD_Sequence
* outSeqs
,
2915 size_t outSeqsSize
, const void* src
, size_t srcSize
)
2917 const size_t dstCapacity
= ZSTD_compressBound(srcSize
);
2918 void* dst
= ZSTD_customMalloc(dstCapacity
, ZSTD_defaultCMem
);
2919 SeqCollector seqCollector
;
2921 RETURN_ERROR_IF(dst
== NULL
, memory_allocation
, "NULL pointer!");
2923 seqCollector
.collectSequences
= 1;
2924 seqCollector
.seqStart
= outSeqs
;
2925 seqCollector
.seqIndex
= 0;
2926 seqCollector
.maxSequences
= outSeqsSize
;
2927 zc
->seqCollector
= seqCollector
;
2929 ZSTD_compress2(zc
, dst
, dstCapacity
, src
, srcSize
);
2930 ZSTD_customFree(dst
, ZSTD_defaultCMem
);
2931 return zc
->seqCollector
.seqIndex
;
2934 size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence
* sequences
, size_t seqsSize
) {
2937 for (; in
< seqsSize
; ++in
) {
2938 if (sequences
[in
].offset
== 0 && sequences
[in
].matchLength
== 0) {
2939 if (in
!= seqsSize
- 1) {
2940 sequences
[in
+1].litLength
+= sequences
[in
].litLength
;
2943 sequences
[out
] = sequences
[in
];
2950 /* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
2951 static int ZSTD_isRLE(const BYTE
* src
, size_t length
) {
2952 const BYTE
* ip
= src
;
2953 const BYTE value
= ip
[0];
2954 const size_t valueST
= (size_t)((U64
)value
* 0x0101010101010101ULL
);
2955 const size_t unrollSize
= sizeof(size_t) * 4;
2956 const size_t unrollMask
= unrollSize
- 1;
2957 const size_t prefixLength
= length
& unrollMask
;
2960 if (length
== 1) return 1;
2961 /* Check if prefix is RLE first before using unrolled loop */
2962 if (prefixLength
&& ZSTD_count(ip
+1, ip
, ip
+prefixLength
) != prefixLength
-1) {
2965 for (i
= prefixLength
; i
!= length
; i
+= unrollSize
) {
2966 for (u
= 0; u
< unrollSize
; u
+= sizeof(size_t)) {
2967 if (MEM_readST(ip
+ i
+ u
) != valueST
) {
2975 /* Returns true if the given block may be RLE.
2976 * This is just a heuristic based on the compressibility.
2977 * It may return both false positives and false negatives.
2979 static int ZSTD_maybeRLE(seqStore_t
const* seqStore
)
2981 size_t const nbSeqs
= (size_t)(seqStore
->sequences
- seqStore
->sequencesStart
);
2982 size_t const nbLits
= (size_t)(seqStore
->lit
- seqStore
->litStart
);
2984 return nbSeqs
< 4 && nbLits
< 10;
2987 static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t
* const bs
)
2989 ZSTD_compressedBlockState_t
* const tmp
= bs
->prevCBlock
;
2990 bs
->prevCBlock
= bs
->nextCBlock
;
2991 bs
->nextCBlock
= tmp
;
2994 /* Writes the block header */
2995 static void writeBlockHeader(void* op
, size_t cSize
, size_t blockSize
, U32 lastBlock
) {
2996 U32
const cBlockHeader
= cSize
== 1 ?
2997 lastBlock
+ (((U32
)bt_rle
)<<1) + (U32
)(blockSize
<< 3) :
2998 lastBlock
+ (((U32
)bt_compressed
)<<1) + (U32
)(cSize
<< 3);
2999 MEM_writeLE24(op
, cBlockHeader
);
3000 DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize
, blockSize
, lastBlock
);
3003 /** ZSTD_buildBlockEntropyStats_literals() :
3004 * Builds entropy for the literals.
3005 * Stores literals block type (raw, rle, compressed, repeat) and
3006 * huffman description table to hufMetadata.
3007 * Requires ENTROPY_WORKSPACE_SIZE workspace
3008 * @return : size of huffman description table or error code */
3009 static size_t ZSTD_buildBlockEntropyStats_literals(void* const src
, size_t srcSize
,
3010 const ZSTD_hufCTables_t
* prevHuf
,
3011 ZSTD_hufCTables_t
* nextHuf
,
3012 ZSTD_hufCTablesMetadata_t
* hufMetadata
,
3013 const int disableLiteralsCompression
,
3014 void* workspace
, size_t wkspSize
)
3016 BYTE
* const wkspStart
= (BYTE
*)workspace
;
3017 BYTE
* const wkspEnd
= wkspStart
+ wkspSize
;
3018 BYTE
* const countWkspStart
= wkspStart
;
3019 unsigned* const countWksp
= (unsigned*)workspace
;
3020 const size_t countWkspSize
= (HUF_SYMBOLVALUE_MAX
+ 1) * sizeof(unsigned);
3021 BYTE
* const nodeWksp
= countWkspStart
+ countWkspSize
;
3022 const size_t nodeWkspSize
= wkspEnd
-nodeWksp
;
3023 unsigned maxSymbolValue
= HUF_SYMBOLVALUE_MAX
;
3024 unsigned huffLog
= HUF_TABLELOG_DEFAULT
;
3025 HUF_repeat repeat
= prevHuf
->repeatMode
;
3026 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize
);
3028 /* Prepare nextEntropy assuming reusing the existing table */
3029 ZSTD_memcpy(nextHuf
, prevHuf
, sizeof(*prevHuf
));
3031 if (disableLiteralsCompression
) {
3032 DEBUGLOG(5, "set_basic - disabled");
3033 hufMetadata
->hType
= set_basic
;
3037 /* small ? don't even attempt compression (speed opt) */
3038 #ifndef COMPRESS_LITERALS_SIZE_MIN
3039 #define COMPRESS_LITERALS_SIZE_MIN 63
3041 { size_t const minLitSize
= (prevHuf
->repeatMode
== HUF_repeat_valid
) ? 6 : COMPRESS_LITERALS_SIZE_MIN
;
3042 if (srcSize
<= minLitSize
) {
3043 DEBUGLOG(5, "set_basic - too small");
3044 hufMetadata
->hType
= set_basic
;
3049 /* Scan input and build symbol stats */
3050 { size_t const largest
= HIST_count_wksp (countWksp
, &maxSymbolValue
, (const BYTE
*)src
, srcSize
, workspace
, wkspSize
);
3051 FORWARD_IF_ERROR(largest
, "HIST_count_wksp failed");
3052 if (largest
== srcSize
) {
3053 DEBUGLOG(5, "set_rle");
3054 hufMetadata
->hType
= set_rle
;
3057 if (largest
<= (srcSize
>> 7)+4) {
3058 DEBUGLOG(5, "set_basic - no gain");
3059 hufMetadata
->hType
= set_basic
;
3064 /* Validate the previous Huffman table */
3065 if (repeat
== HUF_repeat_check
&& !HUF_validateCTable((HUF_CElt
const*)prevHuf
->CTable
, countWksp
, maxSymbolValue
)) {
3066 repeat
= HUF_repeat_none
;
3069 /* Build Huffman Tree */
3070 ZSTD_memset(nextHuf
->CTable
, 0, sizeof(nextHuf
->CTable
));
3071 huffLog
= HUF_optimalTableLog(huffLog
, srcSize
, maxSymbolValue
);
3072 { size_t const maxBits
= HUF_buildCTable_wksp((HUF_CElt
*)nextHuf
->CTable
, countWksp
,
3073 maxSymbolValue
, huffLog
,
3074 nodeWksp
, nodeWkspSize
);
3075 FORWARD_IF_ERROR(maxBits
, "HUF_buildCTable_wksp");
3076 huffLog
= (U32
)maxBits
;
3077 { /* Build and write the CTable */
3078 size_t const newCSize
= HUF_estimateCompressedSize(
3079 (HUF_CElt
*)nextHuf
->CTable
, countWksp
, maxSymbolValue
);
3080 size_t const hSize
= HUF_writeCTable_wksp(
3081 hufMetadata
->hufDesBuffer
, sizeof(hufMetadata
->hufDesBuffer
),
3082 (HUF_CElt
*)nextHuf
->CTable
, maxSymbolValue
, huffLog
,
3083 nodeWksp
, nodeWkspSize
);
3084 /* Check against repeating the previous CTable */
3085 if (repeat
!= HUF_repeat_none
) {
3086 size_t const oldCSize
= HUF_estimateCompressedSize(
3087 (HUF_CElt
const*)prevHuf
->CTable
, countWksp
, maxSymbolValue
);
3088 if (oldCSize
< srcSize
&& (oldCSize
<= hSize
+ newCSize
|| hSize
+ 12 >= srcSize
)) {
3089 DEBUGLOG(5, "set_repeat - smaller");
3090 ZSTD_memcpy(nextHuf
, prevHuf
, sizeof(*prevHuf
));
3091 hufMetadata
->hType
= set_repeat
;
3095 if (newCSize
+ hSize
>= srcSize
) {
3096 DEBUGLOG(5, "set_basic - no gains");
3097 ZSTD_memcpy(nextHuf
, prevHuf
, sizeof(*prevHuf
));
3098 hufMetadata
->hType
= set_basic
;
3101 DEBUGLOG(5, "set_compressed (hSize=%u)", (U32
)hSize
);
3102 hufMetadata
->hType
= set_compressed
;
3103 nextHuf
->repeatMode
= HUF_repeat_check
;
3109 /** ZSTD_buildBlockEntropyStats_sequences() :
3110 * Builds entropy for the sequences.
3111 * Stores symbol compression modes and fse table to fseMetadata.
3112 * Requires ENTROPY_WORKSPACE_SIZE wksp.
3113 * @return : size of fse tables or error code */
3114 static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t
* seqStorePtr
,
3115 const ZSTD_fseCTables_t
* prevEntropy
,
3116 ZSTD_fseCTables_t
* nextEntropy
,
3117 const ZSTD_CCtx_params
* cctxParams
,
3118 ZSTD_fseCTablesMetadata_t
* fseMetadata
,
3119 void* workspace
, size_t wkspSize
)
3121 ZSTD_strategy
const strategy
= cctxParams
->cParams
.strategy
;
3122 size_t const nbSeq
= seqStorePtr
->sequences
- seqStorePtr
->sequencesStart
;
3123 BYTE
* const ostart
= fseMetadata
->fseTablesBuffer
;
3124 BYTE
* const oend
= ostart
+ sizeof(fseMetadata
->fseTablesBuffer
);
3126 unsigned* countWorkspace
= (unsigned*)workspace
;
3127 unsigned* entropyWorkspace
= countWorkspace
+ (MaxSeq
+ 1);
3128 size_t entropyWorkspaceSize
= wkspSize
- (MaxSeq
+ 1) * sizeof(*countWorkspace
);
3129 ZSTD_symbolEncodingTypeStats_t stats
;
3131 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq
);
3132 stats
= ZSTD_buildSequencesStatistics(seqStorePtr
, nbSeq
,
3133 prevEntropy
, nextEntropy
, op
, oend
,
3134 strategy
, countWorkspace
,
3135 entropyWorkspace
, entropyWorkspaceSize
);
3136 FORWARD_IF_ERROR(stats
.size
, "ZSTD_buildSequencesStatistics failed!");
3137 fseMetadata
->llType
= (symbolEncodingType_e
) stats
.LLtype
;
3138 fseMetadata
->ofType
= (symbolEncodingType_e
) stats
.Offtype
;
3139 fseMetadata
->mlType
= (symbolEncodingType_e
) stats
.MLtype
;
3140 fseMetadata
->lastCountSize
= stats
.lastCountSize
;
3145 /** ZSTD_buildBlockEntropyStats() :
3146 * Builds entropy for the block.
3147 * Requires workspace size ENTROPY_WORKSPACE_SIZE
3149 * @return : 0 on success or error code
3151 size_t ZSTD_buildBlockEntropyStats(seqStore_t
* seqStorePtr
,
3152 const ZSTD_entropyCTables_t
* prevEntropy
,
3153 ZSTD_entropyCTables_t
* nextEntropy
,
3154 const ZSTD_CCtx_params
* cctxParams
,
3155 ZSTD_entropyCTablesMetadata_t
* entropyMetadata
,
3156 void* workspace
, size_t wkspSize
)
3158 size_t const litSize
= seqStorePtr
->lit
- seqStorePtr
->litStart
;
3159 entropyMetadata
->hufMetadata
.hufDesSize
=
3160 ZSTD_buildBlockEntropyStats_literals(seqStorePtr
->litStart
, litSize
,
3161 &prevEntropy
->huf
, &nextEntropy
->huf
,
3162 &entropyMetadata
->hufMetadata
,
3163 ZSTD_disableLiteralsCompression(cctxParams
),
3164 workspace
, wkspSize
);
3165 FORWARD_IF_ERROR(entropyMetadata
->hufMetadata
.hufDesSize
, "ZSTD_buildBlockEntropyStats_literals failed");
3166 entropyMetadata
->fseMetadata
.fseTablesSize
=
3167 ZSTD_buildBlockEntropyStats_sequences(seqStorePtr
,
3168 &prevEntropy
->fse
, &nextEntropy
->fse
,
3170 &entropyMetadata
->fseMetadata
,
3171 workspace
, wkspSize
);
3172 FORWARD_IF_ERROR(entropyMetadata
->fseMetadata
.fseTablesSize
, "ZSTD_buildBlockEntropyStats_sequences failed");
3176 /* Returns the size estimate for the literals section (header + content) of a block */
3177 static size_t ZSTD_estimateBlockSize_literal(const BYTE
* literals
, size_t litSize
,
3178 const ZSTD_hufCTables_t
* huf
,
3179 const ZSTD_hufCTablesMetadata_t
* hufMetadata
,
3180 void* workspace
, size_t wkspSize
,
3183 unsigned* const countWksp
= (unsigned*)workspace
;
3184 unsigned maxSymbolValue
= HUF_SYMBOLVALUE_MAX
;
3185 size_t literalSectionHeaderSize
= 3 + (litSize
>= 1 KB
) + (litSize
>= 16 KB
);
3186 U32 singleStream
= litSize
< 256;
3188 if (hufMetadata
->hType
== set_basic
) return litSize
;
3189 else if (hufMetadata
->hType
== set_rle
) return 1;
3190 else if (hufMetadata
->hType
== set_compressed
|| hufMetadata
->hType
== set_repeat
) {
3191 size_t const largest
= HIST_count_wksp (countWksp
, &maxSymbolValue
, (const BYTE
*)literals
, litSize
, workspace
, wkspSize
);
3192 if (ZSTD_isError(largest
)) return litSize
;
3193 { size_t cLitSizeEstimate
= HUF_estimateCompressedSize((const HUF_CElt
*)huf
->CTable
, countWksp
, maxSymbolValue
);
3194 if (writeEntropy
) cLitSizeEstimate
+= hufMetadata
->hufDesSize
;
3195 if (!singleStream
) cLitSizeEstimate
+= 6; /* multi-stream huffman uses 6-byte jump table */
3196 return cLitSizeEstimate
+ literalSectionHeaderSize
;
3198 assert(0); /* impossible */
3202 /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
3203 static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type
,
3204 const BYTE
* codeTable
, size_t nbSeq
, unsigned maxCode
,
3205 const FSE_CTable
* fseCTable
,
3206 const U32
* additionalBits
,
3207 short const* defaultNorm
, U32 defaultNormLog
, U32 defaultMax
,
3208 void* workspace
, size_t wkspSize
)
3210 unsigned* const countWksp
= (unsigned*)workspace
;
3211 const BYTE
* ctp
= codeTable
;
3212 const BYTE
* const ctStart
= ctp
;
3213 const BYTE
* const ctEnd
= ctStart
+ nbSeq
;
3214 size_t cSymbolTypeSizeEstimateInBits
= 0;
3215 unsigned max
= maxCode
;
3217 HIST_countFast_wksp(countWksp
, &max
, codeTable
, nbSeq
, workspace
, wkspSize
); /* can't fail */
3218 if (type
== set_basic
) {
3219 /* We selected this encoding type, so it must be valid. */
3220 assert(max
<= defaultMax
);
3222 cSymbolTypeSizeEstimateInBits
= ZSTD_crossEntropyCost(defaultNorm
, defaultNormLog
, countWksp
, max
);
3223 } else if (type
== set_rle
) {
3224 cSymbolTypeSizeEstimateInBits
= 0;
3225 } else if (type
== set_compressed
|| type
== set_repeat
) {
3226 cSymbolTypeSizeEstimateInBits
= ZSTD_fseBitCost(fseCTable
, countWksp
, max
);
3228 if (ZSTD_isError(cSymbolTypeSizeEstimateInBits
)) {
3231 while (ctp
< ctEnd
) {
3232 if (additionalBits
) cSymbolTypeSizeEstimateInBits
+= additionalBits
[*ctp
];
3233 else cSymbolTypeSizeEstimateInBits
+= *ctp
; /* for offset, offset code is also the number of additional bits */
3236 return cSymbolTypeSizeEstimateInBits
>> 3;
3239 /* Returns the size estimate for the sequences section (header + content) of a block */
3240 static size_t ZSTD_estimateBlockSize_sequences(const BYTE
* ofCodeTable
,
3241 const BYTE
* llCodeTable
,
3242 const BYTE
* mlCodeTable
,
3244 const ZSTD_fseCTables_t
* fseTables
,
3245 const ZSTD_fseCTablesMetadata_t
* fseMetadata
,
3246 void* workspace
, size_t wkspSize
,
3249 size_t sequencesSectionHeaderSize
= 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq
>= 128) + (nbSeq
>= LONGNBSEQ
);
3250 size_t cSeqSizeEstimate
= 0;
3251 cSeqSizeEstimate
+= ZSTD_estimateBlockSize_symbolType(fseMetadata
->ofType
, ofCodeTable
, nbSeq
, MaxOff
,
3252 fseTables
->offcodeCTable
, NULL
,
3253 OF_defaultNorm
, OF_defaultNormLog
, DefaultMaxOff
,
3254 workspace
, wkspSize
);
3255 cSeqSizeEstimate
+= ZSTD_estimateBlockSize_symbolType(fseMetadata
->llType
, llCodeTable
, nbSeq
, MaxLL
,
3256 fseTables
->litlengthCTable
, LL_bits
,
3257 LL_defaultNorm
, LL_defaultNormLog
, MaxLL
,
3258 workspace
, wkspSize
);
3259 cSeqSizeEstimate
+= ZSTD_estimateBlockSize_symbolType(fseMetadata
->mlType
, mlCodeTable
, nbSeq
, MaxML
,
3260 fseTables
->matchlengthCTable
, ML_bits
,
3261 ML_defaultNorm
, ML_defaultNormLog
, MaxML
,
3262 workspace
, wkspSize
);
3263 if (writeEntropy
) cSeqSizeEstimate
+= fseMetadata
->fseTablesSize
;
3264 return cSeqSizeEstimate
+ sequencesSectionHeaderSize
;
3267 /* Returns the size estimate for a given stream of literals, of, ll, ml */
3268 static size_t ZSTD_estimateBlockSize(const BYTE
* literals
, size_t litSize
,
3269 const BYTE
* ofCodeTable
,
3270 const BYTE
* llCodeTable
,
3271 const BYTE
* mlCodeTable
,
3273 const ZSTD_entropyCTables_t
* entropy
,
3274 const ZSTD_entropyCTablesMetadata_t
* entropyMetadata
,
3275 void* workspace
, size_t wkspSize
,
3276 int writeLitEntropy
, int writeSeqEntropy
) {
3277 size_t const literalsSize
= ZSTD_estimateBlockSize_literal(literals
, litSize
,
3278 &entropy
->huf
, &entropyMetadata
->hufMetadata
,
3279 workspace
, wkspSize
, writeLitEntropy
);
3280 size_t const seqSize
= ZSTD_estimateBlockSize_sequences(ofCodeTable
, llCodeTable
, mlCodeTable
,
3281 nbSeq
, &entropy
->fse
, &entropyMetadata
->fseMetadata
,
3282 workspace
, wkspSize
, writeSeqEntropy
);
3283 return seqSize
+ literalsSize
+ ZSTD_blockHeaderSize
;
3286 /* Builds entropy statistics and uses them for blocksize estimation.
3288 * Returns the estimated compressed size of the seqStore, or a zstd error.
3290 static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t
* seqStore
, const ZSTD_CCtx
* zc
) {
3291 ZSTD_entropyCTablesMetadata_t entropyMetadata
;
3292 FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore
,
3293 &zc
->blockState
.prevCBlock
->entropy
,
3294 &zc
->blockState
.nextCBlock
->entropy
,
3297 zc
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
/* statically allocated in resetCCtx */), "");
3298 return ZSTD_estimateBlockSize(seqStore
->litStart
, (size_t)(seqStore
->lit
- seqStore
->litStart
),
3299 seqStore
->ofCode
, seqStore
->llCode
, seqStore
->mlCode
,
3300 (size_t)(seqStore
->sequences
- seqStore
->sequencesStart
),
3301 &zc
->blockState
.nextCBlock
->entropy
, &entropyMetadata
, zc
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
,
3302 (int)(entropyMetadata
.hufMetadata
.hType
== set_compressed
), 1);
3305 /* Returns literals bytes represented in a seqStore */
3306 static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t
* const seqStore
) {
3307 size_t literalsBytes
= 0;
3308 size_t const nbSeqs
= seqStore
->sequences
- seqStore
->sequencesStart
;
3310 for (i
= 0; i
< nbSeqs
; ++i
) {
3311 seqDef seq
= seqStore
->sequencesStart
[i
];
3312 literalsBytes
+= seq
.litLength
;
3313 if (i
== seqStore
->longLengthPos
&& seqStore
->longLengthType
== ZSTD_llt_literalLength
) {
3314 literalsBytes
+= 0x10000;
3317 return literalsBytes
;
3320 /* Returns match bytes represented in a seqStore */
3321 static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t
* const seqStore
) {
3322 size_t matchBytes
= 0;
3323 size_t const nbSeqs
= seqStore
->sequences
- seqStore
->sequencesStart
;
3325 for (i
= 0; i
< nbSeqs
; ++i
) {
3326 seqDef seq
= seqStore
->sequencesStart
[i
];
3327 matchBytes
+= seq
.matchLength
+ MINMATCH
;
3328 if (i
== seqStore
->longLengthPos
&& seqStore
->longLengthType
== ZSTD_llt_matchLength
) {
3329 matchBytes
+= 0x10000;
3335 /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
3336 * Stores the result in resultSeqStore.
3338 static void ZSTD_deriveSeqStoreChunk(seqStore_t
* resultSeqStore
,
3339 const seqStore_t
* originalSeqStore
,
3340 size_t startIdx
, size_t endIdx
) {
3341 BYTE
* const litEnd
= originalSeqStore
->lit
;
3342 size_t literalsBytes
;
3343 size_t literalsBytesPreceding
= 0;
3345 *resultSeqStore
= *originalSeqStore
;
3347 resultSeqStore
->sequences
= originalSeqStore
->sequencesStart
+ startIdx
;
3348 literalsBytesPreceding
= ZSTD_countSeqStoreLiteralsBytes(resultSeqStore
);
3351 /* Move longLengthPos into the correct position if necessary */
3352 if (originalSeqStore
->longLengthType
!= ZSTD_llt_none
) {
3353 if (originalSeqStore
->longLengthPos
< startIdx
|| originalSeqStore
->longLengthPos
> endIdx
) {
3354 resultSeqStore
->longLengthType
= ZSTD_llt_none
;
3356 resultSeqStore
->longLengthPos
-= (U32
)startIdx
;
3359 resultSeqStore
->sequencesStart
= originalSeqStore
->sequencesStart
+ startIdx
;
3360 resultSeqStore
->sequences
= originalSeqStore
->sequencesStart
+ endIdx
;
3361 literalsBytes
= ZSTD_countSeqStoreLiteralsBytes(resultSeqStore
);
3362 resultSeqStore
->litStart
+= literalsBytesPreceding
;
3363 if (endIdx
== (size_t)(originalSeqStore
->sequences
- originalSeqStore
->sequencesStart
)) {
3364 /* This accounts for possible last literals if the derived chunk reaches the end of the block */
3365 resultSeqStore
->lit
= litEnd
;
3367 resultSeqStore
->lit
= resultSeqStore
->litStart
+literalsBytes
;
3369 resultSeqStore
->llCode
+= startIdx
;
3370 resultSeqStore
->mlCode
+= startIdx
;
3371 resultSeqStore
->ofCode
+= startIdx
;
3375 * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
3376 * offCode must be an offCode representing a repcode, therefore in the range of [0, 2].
3378 static U32
ZSTD_resolveRepcodeToRawOffset(const U32 rep
[ZSTD_REP_NUM
], const U32 offCode
, const U32 ll0
) {
3379 U32
const adjustedOffCode
= offCode
+ ll0
;
3380 assert(offCode
< ZSTD_REP_NUM
);
3381 if (adjustedOffCode
== ZSTD_REP_NUM
) {
3382 /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
3386 return rep
[adjustedOffCode
];
3390 * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
3391 * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within
3392 * the seqStore that may be invalid.
3394 * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in
3395 * accordance with the seqStore.
3397 static void ZSTD_seqStore_resolveOffCodes(repcodes_t
* const dRepcodes
, repcodes_t
* const cRepcodes
,
3398 seqStore_t
* const seqStore
, U32
const nbSeq
) {
3400 for (; idx
< nbSeq
; ++idx
) {
3401 seqDef
* const seq
= seqStore
->sequencesStart
+ idx
;
3402 U32
const ll0
= (seq
->litLength
== 0);
3403 U32 offCode
= seq
->offset
- 1;
3404 assert(seq
->offset
> 0);
3405 if (offCode
<= ZSTD_REP_MOVE
) {
3406 U32
const dRawOffset
= ZSTD_resolveRepcodeToRawOffset(dRepcodes
->rep
, offCode
, ll0
);
3407 U32
const cRawOffset
= ZSTD_resolveRepcodeToRawOffset(cRepcodes
->rep
, offCode
, ll0
);
3408 /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
3409 * the repcode with the offset it actually references, determined by the compression
3412 if (dRawOffset
!= cRawOffset
) {
3413 seq
->offset
= cRawOffset
+ ZSTD_REP_NUM
;
3416 /* Compression repcode history is always updated with values directly from the unmodified seqStore.
3417 * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
3419 *dRepcodes
= ZSTD_updateRep(dRepcodes
->rep
, seq
->offset
- 1, ll0
);
3420 *cRepcodes
= ZSTD_updateRep(cRepcodes
->rep
, offCode
, ll0
);
3424 /* ZSTD_compressSeqStore_singleBlock():
3425 * Compresses a seqStore into a block with a block header, into the buffer dst.
3427 * Returns the total size of that block (including header) or a ZSTD error code.
3429 static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx
* zc
, seqStore_t
* const seqStore
,
3430 repcodes_t
* const dRep
, repcodes_t
* const cRep
,
3431 void* dst
, size_t dstCapacity
,
3432 const void* src
, size_t srcSize
,
3433 U32 lastBlock
, U32 isPartition
) {
3434 const U32 rleMaxLength
= 25;
3435 BYTE
* op
= (BYTE
*)dst
;
3436 const BYTE
* ip
= (const BYTE
*)src
;
3440 /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
3441 repcodes_t
const dRepOriginal
= *dRep
;
3443 ZSTD_seqStore_resolveOffCodes(dRep
, cRep
, seqStore
, (U32
)(seqStore
->sequences
- seqStore
->sequencesStart
));
3445 cSeqsSize
= ZSTD_entropyCompressSeqStore(seqStore
,
3446 &zc
->blockState
.prevCBlock
->entropy
, &zc
->blockState
.nextCBlock
->entropy
,
3448 op
+ ZSTD_blockHeaderSize
, dstCapacity
- ZSTD_blockHeaderSize
,
3450 zc
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
/* statically allocated in resetCCtx */,
3452 FORWARD_IF_ERROR(cSeqsSize
, "ZSTD_entropyCompressSeqStore failed!");
3454 if (!zc
->isFirstBlock
&&
3455 cSeqsSize
< rleMaxLength
&&
3456 ZSTD_isRLE((BYTE
const*)src
, srcSize
)) {
3457 /* We don't want to emit our first block as a RLE even if it qualifies because
3458 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3459 * This is only an issue for zstd <= v1.4.3
3464 if (zc
->seqCollector
.collectSequences
) {
3465 ZSTD_copyBlockSequences(zc
);
3466 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3470 if (zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
3471 zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
3473 if (cSeqsSize
== 0) {
3474 cSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, srcSize
, lastBlock
);
3475 FORWARD_IF_ERROR(cSize
, "Nocompress block failed");
3476 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize
);
3477 *dRep
= dRepOriginal
; /* reset simulated decompression repcode history */
3478 } else if (cSeqsSize
== 1) {
3479 cSize
= ZSTD_rleCompressBlock(op
, dstCapacity
, *ip
, srcSize
, lastBlock
);
3480 FORWARD_IF_ERROR(cSize
, "RLE compress block failed");
3481 DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize
);
3482 *dRep
= dRepOriginal
; /* reset simulated decompression repcode history */
3484 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3485 writeBlockHeader(op
, cSeqsSize
, srcSize
, lastBlock
);
3486 cSize
= ZSTD_blockHeaderSize
+ cSeqsSize
;
3487 DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize
);
3492 /* Struct to keep track of where we are in our recursive calls. */
3494 U32
* splitLocations
; /* Array of split indices */
3495 size_t idx
; /* The current index within splitLocations being worked on */
3498 #define MIN_SEQUENCES_BLOCK_SPLITTING 300
3499 #define MAX_NB_SPLITS 196
3501 /* Helper function to perform the recursive search for block splits.
3502 * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
3503 * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
3504 * we do not recurse.
3506 * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
3507 * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
3508 * In practice, recursion depth usually doesn't go beyond 4.
3510 * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
3511 * maximum of 128 KB, this value is actually impossible to reach.
3513 static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits
* splits
, size_t startIdx
, size_t endIdx
,
3514 const ZSTD_CCtx
* zc
, const seqStore_t
* origSeqStore
) {
3515 seqStore_t fullSeqStoreChunk
;
3516 seqStore_t firstHalfSeqStore
;
3517 seqStore_t secondHalfSeqStore
;
3518 size_t estimatedOriginalSize
;
3519 size_t estimatedFirstHalfSize
;
3520 size_t estimatedSecondHalfSize
;
3521 size_t midIdx
= (startIdx
+ endIdx
)/2;
3523 if (endIdx
- startIdx
< MIN_SEQUENCES_BLOCK_SPLITTING
|| splits
->idx
>= MAX_NB_SPLITS
) {
3526 ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk
, origSeqStore
, startIdx
, endIdx
);
3527 ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore
, origSeqStore
, startIdx
, midIdx
);
3528 ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore
, origSeqStore
, midIdx
, endIdx
);
3529 estimatedOriginalSize
= ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk
, zc
);
3530 estimatedFirstHalfSize
= ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore
, zc
);
3531 estimatedSecondHalfSize
= ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore
, zc
);
3532 DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
3533 estimatedOriginalSize
, estimatedFirstHalfSize
, estimatedSecondHalfSize
);
3534 if (ZSTD_isError(estimatedOriginalSize
) || ZSTD_isError(estimatedFirstHalfSize
) || ZSTD_isError(estimatedSecondHalfSize
)) {
3537 if (estimatedFirstHalfSize
+ estimatedSecondHalfSize
< estimatedOriginalSize
) {
3538 ZSTD_deriveBlockSplitsHelper(splits
, startIdx
, midIdx
, zc
, origSeqStore
);
3539 splits
->splitLocations
[splits
->idx
] = (U32
)midIdx
;
3541 ZSTD_deriveBlockSplitsHelper(splits
, midIdx
, endIdx
, zc
, origSeqStore
);
3545 /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
3547 * Returns the number of splits made (which equals the size of the partition table - 1).
3549 static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx
* zc
, U32 partitions
[], U32 nbSeq
) {
3550 seqStoreSplits splits
= {partitions
, 0};
3552 DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
3553 /* Refuse to try and split anything with less than 4 sequences */
3556 ZSTD_deriveBlockSplitsHelper(&splits
, 0, nbSeq
, zc
, &zc
->seqStore
);
3557 splits
.splitLocations
[splits
.idx
] = nbSeq
;
3558 DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits
.idx
+1);
3562 /* ZSTD_compressBlock_splitBlock():
3563 * Attempts to split a given block into multiple blocks to improve compression ratio.
3565 * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
3567 static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx
* zc
, void* dst
, size_t dstCapacity
,
3568 const void* src
, size_t blockSize
, U32 lastBlock
, U32 nbSeq
) {
3570 const BYTE
* ip
= (const BYTE
*)src
;
3571 BYTE
* op
= (BYTE
*)dst
;
3572 U32 partitions
[MAX_NB_SPLITS
];
3574 size_t srcBytesTotal
= 0;
3575 size_t numSplits
= ZSTD_deriveBlockSplits(zc
, partitions
, nbSeq
);
3576 seqStore_t nextSeqStore
;
3577 seqStore_t currSeqStore
;
3579 /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
3580 * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
3581 * separate repcode histories that simulate repcode history on compression and decompression side,
3582 * and use the histories to determine whether we must replace a particular repcode with its raw offset.
3584 * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
3585 * or RLE. This allows us to retrieve the offset value that an invalid repcode references within
3586 * a nocompress/RLE block.
3587 * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
3588 * the replacement offset value rather than the original repcode to update the repcode history.
3589 * dRep also will be the final repcode history sent to the next block.
3591 * See ZSTD_seqStore_resolveOffCodes() for more details.
3595 ZSTD_memcpy(dRep
.rep
, zc
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
3596 ZSTD_memcpy(cRep
.rep
, zc
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
3598 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
3599 (unsigned)dstCapacity
, (unsigned)zc
->blockState
.matchState
.window
.dictLimit
,
3600 (unsigned)zc
->blockState
.matchState
.nextToUpdate
);
3602 if (numSplits
== 0) {
3603 size_t cSizeSingleBlock
= ZSTD_compressSeqStore_singleBlock(zc
, &zc
->seqStore
,
3607 lastBlock
, 0 /* isPartition */);
3608 FORWARD_IF_ERROR(cSizeSingleBlock
, "Compressing single block from splitBlock_internal() failed!");
3609 DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
3610 assert(cSizeSingleBlock
<= ZSTD_BLOCKSIZE_MAX
+ ZSTD_blockHeaderSize
);
3611 return cSizeSingleBlock
;
3614 ZSTD_deriveSeqStoreChunk(&currSeqStore
, &zc
->seqStore
, 0, partitions
[0]);
3615 for (i
= 0; i
<= numSplits
; ++i
) {
3618 U32
const lastPartition
= (i
== numSplits
);
3619 U32 lastBlockEntireSrc
= 0;
3621 srcBytes
= ZSTD_countSeqStoreLiteralsBytes(&currSeqStore
) + ZSTD_countSeqStoreMatchBytes(&currSeqStore
);
3622 srcBytesTotal
+= srcBytes
;
3623 if (lastPartition
) {
3624 /* This is the final partition, need to account for possible last literals */
3625 srcBytes
+= blockSize
- srcBytesTotal
;
3626 lastBlockEntireSrc
= lastBlock
;
3628 ZSTD_deriveSeqStoreChunk(&nextSeqStore
, &zc
->seqStore
, partitions
[i
], partitions
[i
+1]);
3631 cSizeChunk
= ZSTD_compressSeqStore_singleBlock(zc
, &currSeqStore
,
3635 lastBlockEntireSrc
, 1 /* isPartition */);
3636 DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore
, zc
), cSizeChunk
);
3637 FORWARD_IF_ERROR(cSizeChunk
, "Compressing chunk failed!");
3641 dstCapacity
-= cSizeChunk
;
3642 cSize
+= cSizeChunk
;
3643 currSeqStore
= nextSeqStore
;
3644 assert(cSizeChunk
<= ZSTD_BLOCKSIZE_MAX
+ ZSTD_blockHeaderSize
);
3646 /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
3647 * for the next block.
3649 ZSTD_memcpy(zc
->blockState
.prevCBlock
->rep
, dRep
.rep
, sizeof(repcodes_t
));
3653 static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx
* zc
,
3654 void* dst
, size_t dstCapacity
,
3655 const void* src
, size_t srcSize
, U32 lastBlock
) {
3656 const BYTE
* ip
= (const BYTE
*)src
;
3657 BYTE
* op
= (BYTE
*)dst
;
3660 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
3662 { const size_t bss
= ZSTD_buildSeqStore(zc
, src
, srcSize
);
3663 FORWARD_IF_ERROR(bss
, "ZSTD_buildSeqStore failed");
3664 if (bss
== ZSTDbss_noCompress
) {
3665 if (zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
3666 zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
3667 cSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, srcSize
, lastBlock
);
3668 FORWARD_IF_ERROR(cSize
, "ZSTD_noCompressBlock failed");
3669 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
3672 nbSeq
= (U32
)(zc
->seqStore
.sequences
- zc
->seqStore
.sequencesStart
);
3675 assert(zc
->appliedParams
.splitBlocks
== 1);
3676 cSize
= ZSTD_compressBlock_splitBlock_internal(zc
, dst
, dstCapacity
, src
, srcSize
, lastBlock
, nbSeq
);
3677 FORWARD_IF_ERROR(cSize
, "Splitting blocks failed!");
3681 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx
* zc
,
3682 void* dst
, size_t dstCapacity
,
3683 const void* src
, size_t srcSize
, U32 frame
)
3685 /* This the upper bound for the length of an rle block.
3686 * This isn't the actual upper bound. Finding the real threshold
3687 * needs further investigation.
3689 const U32 rleMaxLength
= 25;
3691 const BYTE
* ip
= (const BYTE
*)src
;
3692 BYTE
* op
= (BYTE
*)dst
;
3693 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
3694 (unsigned)dstCapacity
, (unsigned)zc
->blockState
.matchState
.window
.dictLimit
,
3695 (unsigned)zc
->blockState
.matchState
.nextToUpdate
);
3697 { const size_t bss
= ZSTD_buildSeqStore(zc
, src
, srcSize
);
3698 FORWARD_IF_ERROR(bss
, "ZSTD_buildSeqStore failed");
3699 if (bss
== ZSTDbss_noCompress
) { cSize
= 0; goto out
; }
3702 if (zc
->seqCollector
.collectSequences
) {
3703 ZSTD_copyBlockSequences(zc
);
3704 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3708 /* encode sequences and literals */
3709 cSize
= ZSTD_entropyCompressSeqStore(&zc
->seqStore
,
3710 &zc
->blockState
.prevCBlock
->entropy
, &zc
->blockState
.nextCBlock
->entropy
,
3714 zc
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
/* statically allocated in resetCCtx */,
3717 if (zc
->seqCollector
.collectSequences
) {
3718 ZSTD_copyBlockSequences(zc
);
3724 /* We don't want to emit our first block as a RLE even if it qualifies because
3725 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3726 * This is only an issue for zstd <= v1.4.3
3728 !zc
->isFirstBlock
&&
3729 cSize
< rleMaxLength
&&
3730 ZSTD_isRLE(ip
, srcSize
))
3737 if (!ZSTD_isError(cSize
) && cSize
> 1) {
3738 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3740 /* We check that dictionaries have offset codes available for the first
3741 * block. After the first block, the offcode table might not have large
3742 * enough codes to represent the offsets in the data.
3744 if (zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
3745 zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
3750 static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx
* zc
,
3751 void* dst
, size_t dstCapacity
,
3752 const void* src
, size_t srcSize
,
3753 const size_t bss
, U32 lastBlock
)
3755 DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
3756 if (bss
== ZSTDbss_compress
) {
3757 if (/* We don't want to emit our first block as a RLE even if it qualifies because
3758 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3759 * This is only an issue for zstd <= v1.4.3
3761 !zc
->isFirstBlock
&&
3762 ZSTD_maybeRLE(&zc
->seqStore
) &&
3763 ZSTD_isRLE((BYTE
const*)src
, srcSize
))
3765 return ZSTD_rleCompressBlock(dst
, dstCapacity
, *(BYTE
const*)src
, srcSize
, lastBlock
);
3767 /* Attempt superblock compression.
3769 * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
3770 * standard ZSTD_compressBound(). This is a problem, because even if we have
3771 * space now, taking an extra byte now could cause us to run out of space later
3772 * and violate ZSTD_compressBound().
3774 * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
3776 * In order to respect ZSTD_compressBound() we must attempt to emit a raw
3777 * uncompressed block in these cases:
3778 * * cSize == 0: Return code for an uncompressed block.
3779 * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
3780 * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
3782 * * cSize >= blockBound(srcSize): We have expanded the block too much so
3783 * emit an uncompressed block.
3786 size_t const cSize
= ZSTD_compressSuperBlock(zc
, dst
, dstCapacity
, src
, srcSize
, lastBlock
);
3787 if (cSize
!= ERROR(dstSize_tooSmall
)) {
3788 size_t const maxCSize
= srcSize
- ZSTD_minGain(srcSize
, zc
->appliedParams
.cParams
.strategy
);
3789 FORWARD_IF_ERROR(cSize
, "ZSTD_compressSuperBlock failed");
3790 if (cSize
!= 0 && cSize
< maxCSize
+ ZSTD_blockHeaderSize
) {
3791 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3798 DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
3799 /* Superblock compression failed, attempt to emit a single no compress block.
3800 * The decoder will be able to stream this block since it is uncompressed.
3802 return ZSTD_noCompressBlock(dst
, dstCapacity
, src
, srcSize
, lastBlock
);
3805 static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx
* zc
,
3806 void* dst
, size_t dstCapacity
,
3807 const void* src
, size_t srcSize
,
3811 const size_t bss
= ZSTD_buildSeqStore(zc
, src
, srcSize
);
3812 DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
3813 (unsigned)dstCapacity
, (unsigned)zc
->blockState
.matchState
.window
.dictLimit
, (unsigned)zc
->blockState
.matchState
.nextToUpdate
, srcSize
);
3814 FORWARD_IF_ERROR(bss
, "ZSTD_buildSeqStore failed");
3816 cSize
= ZSTD_compressBlock_targetCBlockSize_body(zc
, dst
, dstCapacity
, src
, srcSize
, bss
, lastBlock
);
3817 FORWARD_IF_ERROR(cSize
, "ZSTD_compressBlock_targetCBlockSize_body failed");
3819 if (zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
3820 zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
3825 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t
* ms
,
3827 ZSTD_CCtx_params
const* params
,
3831 U32
const cycleLog
= ZSTD_cycleLog(params
->cParams
.chainLog
, params
->cParams
.strategy
);
3832 U32
const maxDist
= (U32
)1 << params
->cParams
.windowLog
;
3833 if (ZSTD_window_needOverflowCorrection(ms
->window
, cycleLog
, maxDist
, ms
->loadedDictEnd
, ip
, iend
)) {
3834 U32
const correction
= ZSTD_window_correctOverflow(&ms
->window
, cycleLog
, maxDist
, ip
);
3835 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX
<= 30);
3836 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32
<= 30);
3837 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX
<= 31);
3838 ZSTD_cwksp_mark_tables_dirty(ws
);
3839 ZSTD_reduceIndex(ms
, params
, correction
);
3840 ZSTD_cwksp_mark_tables_clean(ws
);
3841 if (ms
->nextToUpdate
< correction
) ms
->nextToUpdate
= 0;
3842 else ms
->nextToUpdate
-= correction
;
3843 /* invalidate dictionaries on overflow correction */
3844 ms
->loadedDictEnd
= 0;
3845 ms
->dictMatchState
= NULL
;
3849 /*! ZSTD_compress_frameChunk() :
3850 * Compress a chunk of data into one or multiple blocks.
3851 * All blocks will be terminated, all input will be consumed.
3852 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
3853 * Frame is supposed already started (header already produced)
3854 * @return : compressed size, or an error code
3856 static size_t ZSTD_compress_frameChunk(ZSTD_CCtx
* cctx
,
3857 void* dst
, size_t dstCapacity
,
3858 const void* src
, size_t srcSize
,
3861 size_t blockSize
= cctx
->blockSize
;
3862 size_t remaining
= srcSize
;
3863 const BYTE
* ip
= (const BYTE
*)src
;
3864 BYTE
* const ostart
= (BYTE
*)dst
;
3866 U32
const maxDist
= (U32
)1 << cctx
->appliedParams
.cParams
.windowLog
;
3868 assert(cctx
->appliedParams
.cParams
.windowLog
<= ZSTD_WINDOWLOG_MAX
);
3870 DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize
);
3871 if (cctx
->appliedParams
.fParams
.checksumFlag
&& srcSize
)
3872 XXH64_update(&cctx
->xxhState
, src
, srcSize
);
3875 ZSTD_matchState_t
* const ms
= &cctx
->blockState
.matchState
;
3876 U32
const lastBlock
= lastFrameChunk
& (blockSize
>= remaining
);
3878 RETURN_ERROR_IF(dstCapacity
< ZSTD_blockHeaderSize
+ MIN_CBLOCK_SIZE
,
3880 "not enough space to store compressed block");
3881 if (remaining
< blockSize
) blockSize
= remaining
;
3883 ZSTD_overflowCorrectIfNeeded(
3884 ms
, &cctx
->workspace
, &cctx
->appliedParams
, ip
, ip
+ blockSize
);
3885 ZSTD_checkDictValidity(&ms
->window
, ip
+ blockSize
, maxDist
, &ms
->loadedDictEnd
, &ms
->dictMatchState
);
3887 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
3888 if (ms
->nextToUpdate
< ms
->window
.lowLimit
) ms
->nextToUpdate
= ms
->window
.lowLimit
;
3891 if (ZSTD_useTargetCBlockSize(&cctx
->appliedParams
)) {
3892 cSize
= ZSTD_compressBlock_targetCBlockSize(cctx
, op
, dstCapacity
, ip
, blockSize
, lastBlock
);
3893 FORWARD_IF_ERROR(cSize
, "ZSTD_compressBlock_targetCBlockSize failed");
3895 assert(cSize
<= blockSize
+ ZSTD_blockHeaderSize
);
3896 } else if (ZSTD_blockSplitterEnabled(&cctx
->appliedParams
)) {
3897 cSize
= ZSTD_compressBlock_splitBlock(cctx
, op
, dstCapacity
, ip
, blockSize
, lastBlock
);
3898 FORWARD_IF_ERROR(cSize
, "ZSTD_compressBlock_splitBlock failed");
3899 assert(cSize
> 0 || cctx
->seqCollector
.collectSequences
== 1);
3901 cSize
= ZSTD_compressBlock_internal(cctx
,
3902 op
+ZSTD_blockHeaderSize
, dstCapacity
-ZSTD_blockHeaderSize
,
3903 ip
, blockSize
, 1 /* frame */);
3904 FORWARD_IF_ERROR(cSize
, "ZSTD_compressBlock_internal failed");
3906 if (cSize
== 0) { /* block is not compressible */
3907 cSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, blockSize
, lastBlock
);
3908 FORWARD_IF_ERROR(cSize
, "ZSTD_noCompressBlock failed");
3910 U32
const cBlockHeader
= cSize
== 1 ?
3911 lastBlock
+ (((U32
)bt_rle
)<<1) + (U32
)(blockSize
<< 3) :
3912 lastBlock
+ (((U32
)bt_compressed
)<<1) + (U32
)(cSize
<< 3);
3913 MEM_writeLE24(op
, cBlockHeader
);
3914 cSize
+= ZSTD_blockHeaderSize
;
3920 assert(remaining
>= blockSize
);
3921 remaining
-= blockSize
;
3923 assert(dstCapacity
>= cSize
);
3924 dstCapacity
-= cSize
;
3925 cctx
->isFirstBlock
= 0;
3926 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
3930 if (lastFrameChunk
&& (op
>ostart
)) cctx
->stage
= ZSTDcs_ending
;
3931 return (size_t)(op
-ostart
);
3935 static size_t ZSTD_writeFrameHeader(void* dst
, size_t dstCapacity
,
3936 const ZSTD_CCtx_params
* params
, U64 pledgedSrcSize
, U32 dictID
)
3937 { BYTE
* const op
= (BYTE
*)dst
;
3938 U32
const dictIDSizeCodeLength
= (dictID
>0) + (dictID
>=256) + (dictID
>=65536); /* 0-3 */
3939 U32
const dictIDSizeCode
= params
->fParams
.noDictIDFlag
? 0 : dictIDSizeCodeLength
; /* 0-3 */
3940 U32
const checksumFlag
= params
->fParams
.checksumFlag
>0;
3941 U32
const windowSize
= (U32
)1 << params
->cParams
.windowLog
;
3942 U32
const singleSegment
= params
->fParams
.contentSizeFlag
&& (windowSize
>= pledgedSrcSize
);
3943 BYTE
const windowLogByte
= (BYTE
)((params
->cParams
.windowLog
- ZSTD_WINDOWLOG_ABSOLUTEMIN
) << 3);
3944 U32
const fcsCode
= params
->fParams
.contentSizeFlag
?
3945 (pledgedSrcSize
>=256) + (pledgedSrcSize
>=65536+256) + (pledgedSrcSize
>=0xFFFFFFFFU
) : 0; /* 0-3 */
3946 BYTE
const frameHeaderDescriptionByte
= (BYTE
)(dictIDSizeCode
+ (checksumFlag
<<2) + (singleSegment
<<5) + (fcsCode
<<6) );
3949 assert(!(params
->fParams
.contentSizeFlag
&& pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
));
3950 RETURN_ERROR_IF(dstCapacity
< ZSTD_FRAMEHEADERSIZE_MAX
, dstSize_tooSmall
,
3951 "dst buf is too small to fit worst-case frame header size.");
3952 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
3953 !params
->fParams
.noDictIDFlag
, (unsigned)dictID
, (unsigned)dictIDSizeCode
);
3954 if (params
->format
== ZSTD_f_zstd1
) {
3955 MEM_writeLE32(dst
, ZSTD_MAGICNUMBER
);
3958 op
[pos
++] = frameHeaderDescriptionByte
;
3959 if (!singleSegment
) op
[pos
++] = windowLogByte
;
3960 switch(dictIDSizeCode
)
3962 default: assert(0); /* impossible */
3964 case 1 : op
[pos
] = (BYTE
)(dictID
); pos
++; break;
3965 case 2 : MEM_writeLE16(op
+pos
, (U16
)dictID
); pos
+=2; break;
3966 case 3 : MEM_writeLE32(op
+pos
, dictID
); pos
+=4; break;
3970 default: assert(0); /* impossible */
3971 case 0 : if (singleSegment
) op
[pos
++] = (BYTE
)(pledgedSrcSize
); break;
3972 case 1 : MEM_writeLE16(op
+pos
, (U16
)(pledgedSrcSize
-256)); pos
+=2; break;
3973 case 2 : MEM_writeLE32(op
+pos
, (U32
)(pledgedSrcSize
)); pos
+=4; break;
3974 case 3 : MEM_writeLE64(op
+pos
, (U64
)(pledgedSrcSize
)); pos
+=8; break;
3979 /* ZSTD_writeSkippableFrame_advanced() :
3980 * Writes out a skippable frame with the specified magic number variant (16 are supported),
3981 * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
3983 * Returns the total number of bytes written, or a ZSTD error code.
3985 size_t ZSTD_writeSkippableFrame(void* dst
, size_t dstCapacity
,
3986 const void* src
, size_t srcSize
, unsigned magicVariant
) {
3987 BYTE
* op
= (BYTE
*)dst
;
3988 RETURN_ERROR_IF(dstCapacity
< srcSize
+ ZSTD_SKIPPABLEHEADERSIZE
/* Skippable frame overhead */,
3989 dstSize_tooSmall
, "Not enough room for skippable frame");
3990 RETURN_ERROR_IF(srcSize
> (unsigned)0xFFFFFFFF, srcSize_wrong
, "Src size too large for skippable frame");
3991 RETURN_ERROR_IF(magicVariant
> 15, parameter_outOfBound
, "Skippable frame magic number variant not supported");
3993 MEM_writeLE32(op
, (U32
)(ZSTD_MAGIC_SKIPPABLE_START
+ magicVariant
));
3994 MEM_writeLE32(op
+4, (U32
)srcSize
);
3995 ZSTD_memcpy(op
+8, src
, srcSize
);
3996 return srcSize
+ ZSTD_SKIPPABLEHEADERSIZE
;
3999 /* ZSTD_writeLastEmptyBlock() :
4000 * output an empty Block with end-of-frame mark to complete a frame
4001 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
4002 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
4004 size_t ZSTD_writeLastEmptyBlock(void* dst
, size_t dstCapacity
)
4006 RETURN_ERROR_IF(dstCapacity
< ZSTD_blockHeaderSize
, dstSize_tooSmall
,
4007 "dst buf is too small to write frame trailer empty block.");
4008 { U32
const cBlockHeader24
= 1 /*lastBlock*/ + (((U32
)bt_raw
)<<1); /* 0 size */
4009 MEM_writeLE24(dst
, cBlockHeader24
);
4010 return ZSTD_blockHeaderSize
;
4014 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx
* cctx
, rawSeq
* seq
, size_t nbSeq
)
4016 RETURN_ERROR_IF(cctx
->stage
!= ZSTDcs_init
, stage_wrong
,
4017 "wrong cctx stage");
4018 RETURN_ERROR_IF(cctx
->appliedParams
.ldmParams
.enableLdm
,
4019 parameter_unsupported
,
4020 "incompatible with ldm");
4021 cctx
->externSeqStore
.seq
= seq
;
4022 cctx
->externSeqStore
.size
= nbSeq
;
4023 cctx
->externSeqStore
.capacity
= nbSeq
;
4024 cctx
->externSeqStore
.pos
= 0;
4025 cctx
->externSeqStore
.posInSequence
= 0;
4030 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx
* cctx
,
4031 void* dst
, size_t dstCapacity
,
4032 const void* src
, size_t srcSize
,
4033 U32 frame
, U32 lastFrameChunk
)
4035 ZSTD_matchState_t
* const ms
= &cctx
->blockState
.matchState
;
4038 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
4039 cctx
->stage
, (unsigned)srcSize
);
4040 RETURN_ERROR_IF(cctx
->stage
==ZSTDcs_created
, stage_wrong
,
4041 "missing init (ZSTD_compressBegin)");
4043 if (frame
&& (cctx
->stage
==ZSTDcs_init
)) {
4044 fhSize
= ZSTD_writeFrameHeader(dst
, dstCapacity
, &cctx
->appliedParams
,
4045 cctx
->pledgedSrcSizePlusOne
-1, cctx
->dictID
);
4046 FORWARD_IF_ERROR(fhSize
, "ZSTD_writeFrameHeader failed");
4047 assert(fhSize
<= dstCapacity
);
4048 dstCapacity
-= fhSize
;
4049 dst
= (char*)dst
+ fhSize
;
4050 cctx
->stage
= ZSTDcs_ongoing
;
4053 if (!srcSize
) return fhSize
; /* do not generate an empty block if no input */
4055 if (!ZSTD_window_update(&ms
->window
, src
, srcSize
)) {
4056 ms
->nextToUpdate
= ms
->window
.dictLimit
;
4058 if (cctx
->appliedParams
.ldmParams
.enableLdm
) {
4059 ZSTD_window_update(&cctx
->ldmState
.window
, src
, srcSize
);
4063 /* overflow check and correction for block mode */
4064 ZSTD_overflowCorrectIfNeeded(
4065 ms
, &cctx
->workspace
, &cctx
->appliedParams
,
4066 src
, (BYTE
const*)src
+ srcSize
);
4069 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx
->blockSize
);
4070 { size_t const cSize
= frame
?
4071 ZSTD_compress_frameChunk (cctx
, dst
, dstCapacity
, src
, srcSize
, lastFrameChunk
) :
4072 ZSTD_compressBlock_internal (cctx
, dst
, dstCapacity
, src
, srcSize
, 0 /* frame */);
4073 FORWARD_IF_ERROR(cSize
, "%s", frame
? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
4074 cctx
->consumedSrcSize
+= srcSize
;
4075 cctx
->producedCSize
+= (cSize
+ fhSize
);
4076 assert(!(cctx
->appliedParams
.fParams
.contentSizeFlag
&& cctx
->pledgedSrcSizePlusOne
== 0));
4077 if (cctx
->pledgedSrcSizePlusOne
!= 0) { /* control src size */
4078 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN
== (unsigned long long)-1);
4080 cctx
->consumedSrcSize
+1 > cctx
->pledgedSrcSizePlusOne
,
4082 "error : pledgedSrcSize = %u, while realSrcSize >= %u",
4083 (unsigned)cctx
->pledgedSrcSizePlusOne
-1,
4084 (unsigned)cctx
->consumedSrcSize
);
4086 return cSize
+ fhSize
;
4090 size_t ZSTD_compressContinue (ZSTD_CCtx
* cctx
,
4091 void* dst
, size_t dstCapacity
,
4092 const void* src
, size_t srcSize
)
4094 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize
);
4095 return ZSTD_compressContinue_internal(cctx
, dst
, dstCapacity
, src
, srcSize
, 1 /* frame mode */, 0 /* last chunk */);
4099 size_t ZSTD_getBlockSize(const ZSTD_CCtx
* cctx
)
4101 ZSTD_compressionParameters
const cParams
= cctx
->appliedParams
.cParams
;
4102 assert(!ZSTD_checkCParams(cParams
));
4103 return MIN (ZSTD_BLOCKSIZE_MAX
, (U32
)1 << cParams
.windowLog
);
4106 size_t ZSTD_compressBlock(ZSTD_CCtx
* cctx
, void* dst
, size_t dstCapacity
, const void* src
, size_t srcSize
)
4108 DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize
);
4109 { size_t const blockSizeMax
= ZSTD_getBlockSize(cctx
);
4110 RETURN_ERROR_IF(srcSize
> blockSizeMax
, srcSize_wrong
, "input is larger than a block"); }
4112 return ZSTD_compressContinue_internal(cctx
, dst
, dstCapacity
, src
, srcSize
, 0 /* frame mode */, 0 /* last chunk */);
4115 /*! ZSTD_loadDictionaryContent() :
4116 * @return : 0, or an error code
4118 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t
* ms
,
4121 ZSTD_CCtx_params
const* params
,
4122 const void* src
, size_t srcSize
,
4123 ZSTD_dictTableLoadMethod_e dtlm
)
4125 const BYTE
* ip
= (const BYTE
*) src
;
4126 const BYTE
* const iend
= ip
+ srcSize
;
4127 int const loadLdmDict
= params
->ldmParams
.enableLdm
&& ls
!= NULL
;
4129 /* Assert that we the ms params match the params we're being given */
4130 ZSTD_assertEqualCParams(params
->cParams
, ms
->cParams
);
4132 if (srcSize
> ZSTD_CHUNKSIZE_MAX
) {
4133 /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
4134 * Dictionaries right at the edge will immediately trigger overflow
4135 * correction, but I don't want to insert extra constraints here.
4137 U32
const maxDictSize
= ZSTD_CURRENT_MAX
- 1;
4138 /* We must have cleared our windows when our source is this large. */
4139 assert(ZSTD_window_isEmpty(ms
->window
));
4141 assert(ZSTD_window_isEmpty(ls
->window
));
4142 /* If the dictionary is too large, only load the suffix of the dictionary. */
4143 if (srcSize
> maxDictSize
) {
4144 ip
= iend
- maxDictSize
;
4146 srcSize
= maxDictSize
;
4150 DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params
->useRowMatchFinder
);
4151 ZSTD_window_update(&ms
->window
, src
, srcSize
);
4152 ms
->loadedDictEnd
= params
->forceWindow
? 0 : (U32
)(iend
- ms
->window
.base
);
4155 ZSTD_window_update(&ls
->window
, src
, srcSize
);
4156 ls
->loadedDictEnd
= params
->forceWindow
? 0 : (U32
)(iend
- ls
->window
.base
);
4159 if (srcSize
<= HASH_READ_SIZE
) return 0;
4161 ZSTD_overflowCorrectIfNeeded(ms
, ws
, params
, ip
, iend
);
4164 ZSTD_ldm_fillHashTable(ls
, ip
, iend
, ¶ms
->ldmParams
);
4166 switch(params
->cParams
.strategy
)
4169 ZSTD_fillHashTable(ms
, iend
, dtlm
);
4172 ZSTD_fillDoubleHashTable(ms
, iend
, dtlm
);
4178 assert(srcSize
>= HASH_READ_SIZE
);
4179 if (ms
->dedicatedDictSearch
) {
4180 assert(ms
->chainTable
!= NULL
);
4181 ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms
, iend
-HASH_READ_SIZE
);
4183 assert(params
->useRowMatchFinder
!= ZSTD_urm_auto
);
4184 if (params
->useRowMatchFinder
== ZSTD_urm_enableRowMatchFinder
) {
4185 size_t const tagTableSize
= ((size_t)1 << params
->cParams
.hashLog
) * sizeof(U16
);
4186 ZSTD_memset(ms
->tagTable
, 0, tagTableSize
);
4187 ZSTD_row_update(ms
, iend
-HASH_READ_SIZE
);
4188 DEBUGLOG(4, "Using row-based hash table for lazy dict");
4190 ZSTD_insertAndFindFirstIndex(ms
, iend
-HASH_READ_SIZE
);
4191 DEBUGLOG(4, "Using chain-based hash table for lazy dict");
4196 case ZSTD_btlazy2
: /* we want the dictionary table fully sorted */
4200 assert(srcSize
>= HASH_READ_SIZE
);
4201 ZSTD_updateTree(ms
, iend
-HASH_READ_SIZE
, iend
);
4205 assert(0); /* not possible : not a valid strategy id */
4208 ms
->nextToUpdate
= (U32
)(iend
- ms
->window
.base
);
4213 /* Dictionaries that assign zero probability to symbols that show up causes problems
4214 * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
4215 * and only dictionaries with 100% valid symbols can be assumed valid.
4217 static FSE_repeat
ZSTD_dictNCountRepeat(short* normalizedCounter
, unsigned dictMaxSymbolValue
, unsigned maxSymbolValue
)
4220 if (dictMaxSymbolValue
< maxSymbolValue
) {
4221 return FSE_repeat_check
;
4223 for (s
= 0; s
<= maxSymbolValue
; ++s
) {
4224 if (normalizedCounter
[s
] == 0) {
4225 return FSE_repeat_check
;
4228 return FSE_repeat_valid
;
4231 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t
* bs
, void* workspace
,
4232 const void* const dict
, size_t dictSize
)
4234 short offcodeNCount
[MaxOff
+1];
4235 unsigned offcodeMaxValue
= MaxOff
;
4236 const BYTE
* dictPtr
= (const BYTE
*)dict
; /* skip magic num and dict ID */
4237 const BYTE
* const dictEnd
= dictPtr
+ dictSize
;
4239 bs
->entropy
.huf
.repeatMode
= HUF_repeat_check
;
4241 { unsigned maxSymbolValue
= 255;
4242 unsigned hasZeroWeights
= 1;
4243 size_t const hufHeaderSize
= HUF_readCTable((HUF_CElt
*)bs
->entropy
.huf
.CTable
, &maxSymbolValue
, dictPtr
,
4244 dictEnd
-dictPtr
, &hasZeroWeights
);
4246 /* We only set the loaded table as valid if it contains all non-zero
4247 * weights. Otherwise, we set it to check */
4248 if (!hasZeroWeights
)
4249 bs
->entropy
.huf
.repeatMode
= HUF_repeat_valid
;
4251 RETURN_ERROR_IF(HUF_isError(hufHeaderSize
), dictionary_corrupted
, "");
4252 RETURN_ERROR_IF(maxSymbolValue
< 255, dictionary_corrupted
, "");
4253 dictPtr
+= hufHeaderSize
;
4256 { unsigned offcodeLog
;
4257 size_t const offcodeHeaderSize
= FSE_readNCount(offcodeNCount
, &offcodeMaxValue
, &offcodeLog
, dictPtr
, dictEnd
-dictPtr
);
4258 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize
), dictionary_corrupted
, "");
4259 RETURN_ERROR_IF(offcodeLog
> OffFSELog
, dictionary_corrupted
, "");
4260 /* fill all offset symbols to avoid garbage at end of table */
4261 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4262 bs
->entropy
.fse
.offcodeCTable
,
4263 offcodeNCount
, MaxOff
, offcodeLog
,
4264 workspace
, HUF_WORKSPACE_SIZE
)),
4265 dictionary_corrupted
, "");
4266 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
4267 dictPtr
+= offcodeHeaderSize
;
4270 { short matchlengthNCount
[MaxML
+1];
4271 unsigned matchlengthMaxValue
= MaxML
, matchlengthLog
;
4272 size_t const matchlengthHeaderSize
= FSE_readNCount(matchlengthNCount
, &matchlengthMaxValue
, &matchlengthLog
, dictPtr
, dictEnd
-dictPtr
);
4273 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize
), dictionary_corrupted
, "");
4274 RETURN_ERROR_IF(matchlengthLog
> MLFSELog
, dictionary_corrupted
, "");
4275 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4276 bs
->entropy
.fse
.matchlengthCTable
,
4277 matchlengthNCount
, matchlengthMaxValue
, matchlengthLog
,
4278 workspace
, HUF_WORKSPACE_SIZE
)),
4279 dictionary_corrupted
, "");
4280 bs
->entropy
.fse
.matchlength_repeatMode
= ZSTD_dictNCountRepeat(matchlengthNCount
, matchlengthMaxValue
, MaxML
);
4281 dictPtr
+= matchlengthHeaderSize
;
4284 { short litlengthNCount
[MaxLL
+1];
4285 unsigned litlengthMaxValue
= MaxLL
, litlengthLog
;
4286 size_t const litlengthHeaderSize
= FSE_readNCount(litlengthNCount
, &litlengthMaxValue
, &litlengthLog
, dictPtr
, dictEnd
-dictPtr
);
4287 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize
), dictionary_corrupted
, "");
4288 RETURN_ERROR_IF(litlengthLog
> LLFSELog
, dictionary_corrupted
, "");
4289 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4290 bs
->entropy
.fse
.litlengthCTable
,
4291 litlengthNCount
, litlengthMaxValue
, litlengthLog
,
4292 workspace
, HUF_WORKSPACE_SIZE
)),
4293 dictionary_corrupted
, "");
4294 bs
->entropy
.fse
.litlength_repeatMode
= ZSTD_dictNCountRepeat(litlengthNCount
, litlengthMaxValue
, MaxLL
);
4295 dictPtr
+= litlengthHeaderSize
;
4298 RETURN_ERROR_IF(dictPtr
+12 > dictEnd
, dictionary_corrupted
, "");
4299 bs
->rep
[0] = MEM_readLE32(dictPtr
+0);
4300 bs
->rep
[1] = MEM_readLE32(dictPtr
+4);
4301 bs
->rep
[2] = MEM_readLE32(dictPtr
+8);
4304 { size_t const dictContentSize
= (size_t)(dictEnd
- dictPtr
);
4305 U32 offcodeMax
= MaxOff
;
4306 if (dictContentSize
<= ((U32
)-1) - 128 KB
) {
4307 U32
const maxOffset
= (U32
)dictContentSize
+ 128 KB
; /* The maximum offset that must be supported */
4308 offcodeMax
= ZSTD_highbit32(maxOffset
); /* Calculate minimum offset code required to represent maxOffset */
4310 /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
4311 bs
->entropy
.fse
.offcode_repeatMode
= ZSTD_dictNCountRepeat(offcodeNCount
, offcodeMaxValue
, MIN(offcodeMax
, MaxOff
));
4313 /* All repCodes must be <= dictContentSize and != 0 */
4315 for (u
=0; u
<3; u
++) {
4316 RETURN_ERROR_IF(bs
->rep
[u
] == 0, dictionary_corrupted
, "");
4317 RETURN_ERROR_IF(bs
->rep
[u
] > dictContentSize
, dictionary_corrupted
, "");
4320 return dictPtr
- (const BYTE
*)dict
;
4323 /* Dictionary format :
4325 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
4327 /*! ZSTD_loadZstdDictionary() :
4328 * @return : dictID, or an error code
4329 * assumptions : magic number supposed already checked
4330 * dictSize supposed >= 8
4332 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t
* bs
,
4333 ZSTD_matchState_t
* ms
,
4335 ZSTD_CCtx_params
const* params
,
4336 const void* dict
, size_t dictSize
,
4337 ZSTD_dictTableLoadMethod_e dtlm
,
4340 const BYTE
* dictPtr
= (const BYTE
*)dict
;
4341 const BYTE
* const dictEnd
= dictPtr
+ dictSize
;
4344 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE
>= (1<<MAX(MLFSELog
,LLFSELog
)));
4345 assert(dictSize
>= 8);
4346 assert(MEM_readLE32(dictPtr
) == ZSTD_MAGIC_DICTIONARY
);
4348 dictID
= params
->fParams
.noDictIDFlag
? 0 : MEM_readLE32(dictPtr
+ 4 /* skip magic number */ );
4349 eSize
= ZSTD_loadCEntropy(bs
, workspace
, dict
, dictSize
);
4350 FORWARD_IF_ERROR(eSize
, "ZSTD_loadCEntropy failed");
4354 size_t const dictContentSize
= (size_t)(dictEnd
- dictPtr
);
4355 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
4356 ms
, NULL
, ws
, params
, dictPtr
, dictContentSize
, dtlm
), "");
4361 /** ZSTD_compress_insertDictionary() :
4362 * @return : dictID, or an error code */
4364 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t
* bs
,
4365 ZSTD_matchState_t
* ms
,
4368 const ZSTD_CCtx_params
* params
,
4369 const void* dict
, size_t dictSize
,
4370 ZSTD_dictContentType_e dictContentType
,
4371 ZSTD_dictTableLoadMethod_e dtlm
,
4374 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32
)dictSize
);
4375 if ((dict
==NULL
) || (dictSize
<8)) {
4376 RETURN_ERROR_IF(dictContentType
== ZSTD_dct_fullDict
, dictionary_wrong
, "");
4380 ZSTD_reset_compressedBlockState(bs
);
4382 /* dict restricted modes */
4383 if (dictContentType
== ZSTD_dct_rawContent
)
4384 return ZSTD_loadDictionaryContent(ms
, ls
, ws
, params
, dict
, dictSize
, dtlm
);
4386 if (MEM_readLE32(dict
) != ZSTD_MAGIC_DICTIONARY
) {
4387 if (dictContentType
== ZSTD_dct_auto
) {
4388 DEBUGLOG(4, "raw content dictionary detected");
4389 return ZSTD_loadDictionaryContent(
4390 ms
, ls
, ws
, params
, dict
, dictSize
, dtlm
);
4392 RETURN_ERROR_IF(dictContentType
== ZSTD_dct_fullDict
, dictionary_wrong
, "");
4393 assert(0); /* impossible */
4396 /* dict as full zstd dictionary */
4397 return ZSTD_loadZstdDictionary(
4398 bs
, ms
, ws
, params
, dict
, dictSize
, dtlm
, workspace
);
4401 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
4402 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
4404 /*! ZSTD_compressBegin_internal() :
4405 * @return : 0, or an error code */
4406 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx
* cctx
,
4407 const void* dict
, size_t dictSize
,
4408 ZSTD_dictContentType_e dictContentType
,
4409 ZSTD_dictTableLoadMethod_e dtlm
,
4410 const ZSTD_CDict
* cdict
,
4411 const ZSTD_CCtx_params
* params
, U64 pledgedSrcSize
,
4412 ZSTD_buffered_policy_e zbuff
)
4414 size_t const dictContentSize
= cdict
? cdict
->dictContentSize
: dictSize
;
4416 cctx
->traceCtx
= (ZSTD_trace_compress_begin
!= NULL
) ? ZSTD_trace_compress_begin(cctx
) : 0;
4418 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params
->cParams
.windowLog
);
4419 /* params are supposed to be fully validated at this point */
4420 assert(!ZSTD_isError(ZSTD_checkCParams(params
->cParams
)));
4421 assert(!((dict
) && (cdict
))); /* either dict or cdict, not both */
4423 && (cdict
->dictContentSize
> 0)
4424 && ( pledgedSrcSize
< ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
4425 || pledgedSrcSize
< cdict
->dictContentSize
* ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
4426 || pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
4427 || cdict
->compressionLevel
== 0)
4428 && (params
->attachDictPref
!= ZSTD_dictForceLoad
) ) {
4429 return ZSTD_resetCCtx_usingCDict(cctx
, cdict
, params
, pledgedSrcSize
, zbuff
);
4432 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx
, params
, pledgedSrcSize
,
4434 ZSTDcrp_makeClean
, zbuff
) , "");
4435 { size_t const dictID
= cdict
?
4436 ZSTD_compress_insertDictionary(
4437 cctx
->blockState
.prevCBlock
, &cctx
->blockState
.matchState
,
4438 &cctx
->ldmState
, &cctx
->workspace
, &cctx
->appliedParams
, cdict
->dictContent
,
4439 cdict
->dictContentSize
, cdict
->dictContentType
, dtlm
,
4440 cctx
->entropyWorkspace
)
4441 : ZSTD_compress_insertDictionary(
4442 cctx
->blockState
.prevCBlock
, &cctx
->blockState
.matchState
,
4443 &cctx
->ldmState
, &cctx
->workspace
, &cctx
->appliedParams
, dict
, dictSize
,
4444 dictContentType
, dtlm
, cctx
->entropyWorkspace
);
4445 FORWARD_IF_ERROR(dictID
, "ZSTD_compress_insertDictionary failed");
4446 assert(dictID
<= UINT_MAX
);
4447 cctx
->dictID
= (U32
)dictID
;
4448 cctx
->dictContentSize
= dictContentSize
;
4453 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx
* cctx
,
4454 const void* dict
, size_t dictSize
,
4455 ZSTD_dictContentType_e dictContentType
,
4456 ZSTD_dictTableLoadMethod_e dtlm
,
4457 const ZSTD_CDict
* cdict
,
4458 const ZSTD_CCtx_params
* params
,
4459 unsigned long long pledgedSrcSize
)
4461 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params
->cParams
.windowLog
);
4462 /* compression parameters verification and optimization */
4463 FORWARD_IF_ERROR( ZSTD_checkCParams(params
->cParams
) , "");
4464 return ZSTD_compressBegin_internal(cctx
,
4465 dict
, dictSize
, dictContentType
, dtlm
,
4467 params
, pledgedSrcSize
,
4468 ZSTDb_not_buffered
);
4471 /*! ZSTD_compressBegin_advanced() :
4472 * @return : 0, or an error code */
4473 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx
* cctx
,
4474 const void* dict
, size_t dictSize
,
4475 ZSTD_parameters params
, unsigned long long pledgedSrcSize
)
4477 ZSTD_CCtx_params cctxParams
;
4478 ZSTD_CCtxParams_init_internal(&cctxParams
, ¶ms
, ZSTD_NO_CLEVEL
);
4479 return ZSTD_compressBegin_advanced_internal(cctx
,
4480 dict
, dictSize
, ZSTD_dct_auto
, ZSTD_dtlm_fast
,
4482 &cctxParams
, pledgedSrcSize
);
4485 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx
* cctx
, const void* dict
, size_t dictSize
, int compressionLevel
)
4487 ZSTD_CCtx_params cctxParams
;
4489 ZSTD_parameters
const params
= ZSTD_getParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_noAttachDict
);
4490 ZSTD_CCtxParams_init_internal(&cctxParams
, ¶ms
, (compressionLevel
== 0) ? ZSTD_CLEVEL_DEFAULT
: compressionLevel
);
4492 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize
);
4493 return ZSTD_compressBegin_internal(cctx
, dict
, dictSize
, ZSTD_dct_auto
, ZSTD_dtlm_fast
, NULL
,
4494 &cctxParams
, ZSTD_CONTENTSIZE_UNKNOWN
, ZSTDb_not_buffered
);
4497 size_t ZSTD_compressBegin(ZSTD_CCtx
* cctx
, int compressionLevel
)
4499 return ZSTD_compressBegin_usingDict(cctx
, NULL
, 0, compressionLevel
);
4503 /*! ZSTD_writeEpilogue() :
4505 * @return : nb of bytes written into dst (or an error code) */
4506 static size_t ZSTD_writeEpilogue(ZSTD_CCtx
* cctx
, void* dst
, size_t dstCapacity
)
4508 BYTE
* const ostart
= (BYTE
*)dst
;
4512 DEBUGLOG(4, "ZSTD_writeEpilogue");
4513 RETURN_ERROR_IF(cctx
->stage
== ZSTDcs_created
, stage_wrong
, "init missing");
4515 /* special case : empty frame */
4516 if (cctx
->stage
== ZSTDcs_init
) {
4517 fhSize
= ZSTD_writeFrameHeader(dst
, dstCapacity
, &cctx
->appliedParams
, 0, 0);
4518 FORWARD_IF_ERROR(fhSize
, "ZSTD_writeFrameHeader failed");
4519 dstCapacity
-= fhSize
;
4521 cctx
->stage
= ZSTDcs_ongoing
;
4524 if (cctx
->stage
!= ZSTDcs_ending
) {
4525 /* write one last empty block, make it the "last" block */
4526 U32
const cBlockHeader24
= 1 /* last block */ + (((U32
)bt_raw
)<<1) + 0;
4527 RETURN_ERROR_IF(dstCapacity
<4, dstSize_tooSmall
, "no room for epilogue");
4528 MEM_writeLE32(op
, cBlockHeader24
);
4529 op
+= ZSTD_blockHeaderSize
;
4530 dstCapacity
-= ZSTD_blockHeaderSize
;
4533 if (cctx
->appliedParams
.fParams
.checksumFlag
) {
4534 U32
const checksum
= (U32
) XXH64_digest(&cctx
->xxhState
);
4535 RETURN_ERROR_IF(dstCapacity
<4, dstSize_tooSmall
, "no room for checksum");
4536 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum
);
4537 MEM_writeLE32(op
, checksum
);
4541 cctx
->stage
= ZSTDcs_created
; /* return to "created but no init" status */
4545 void ZSTD_CCtx_trace(ZSTD_CCtx
* cctx
, size_t extraCSize
)
4548 if (cctx
->traceCtx
&& ZSTD_trace_compress_end
!= NULL
) {
4549 int const streaming
= cctx
->inBuffSize
> 0 || cctx
->outBuffSize
> 0 || cctx
->appliedParams
.nbWorkers
> 0;
4551 ZSTD_memset(&trace
, 0, sizeof(trace
));
4552 trace
.version
= ZSTD_VERSION_NUMBER
;
4553 trace
.streaming
= streaming
;
4554 trace
.dictionaryID
= cctx
->dictID
;
4555 trace
.dictionarySize
= cctx
->dictContentSize
;
4556 trace
.uncompressedSize
= cctx
->consumedSrcSize
;
4557 trace
.compressedSize
= cctx
->producedCSize
+ extraCSize
;
4558 trace
.params
= &cctx
->appliedParams
;
4560 ZSTD_trace_compress_end(cctx
->traceCtx
, &trace
);
4569 size_t ZSTD_compressEnd (ZSTD_CCtx
* cctx
,
4570 void* dst
, size_t dstCapacity
,
4571 const void* src
, size_t srcSize
)
4574 size_t const cSize
= ZSTD_compressContinue_internal(cctx
,
4575 dst
, dstCapacity
, src
, srcSize
,
4576 1 /* frame mode */, 1 /* last chunk */);
4577 FORWARD_IF_ERROR(cSize
, "ZSTD_compressContinue_internal failed");
4578 endResult
= ZSTD_writeEpilogue(cctx
, (char*)dst
+ cSize
, dstCapacity
-cSize
);
4579 FORWARD_IF_ERROR(endResult
, "ZSTD_writeEpilogue failed");
4580 assert(!(cctx
->appliedParams
.fParams
.contentSizeFlag
&& cctx
->pledgedSrcSizePlusOne
== 0));
4581 if (cctx
->pledgedSrcSizePlusOne
!= 0) { /* control src size */
4582 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN
== (unsigned long long)-1);
4583 DEBUGLOG(4, "end of frame : controlling src size");
4585 cctx
->pledgedSrcSizePlusOne
!= cctx
->consumedSrcSize
+1,
4587 "error : pledgedSrcSize = %u, while realSrcSize = %u",
4588 (unsigned)cctx
->pledgedSrcSizePlusOne
-1,
4589 (unsigned)cctx
->consumedSrcSize
);
4591 ZSTD_CCtx_trace(cctx
, endResult
);
4592 return cSize
+ endResult
;
4595 size_t ZSTD_compress_advanced (ZSTD_CCtx
* cctx
,
4596 void* dst
, size_t dstCapacity
,
4597 const void* src
, size_t srcSize
,
4598 const void* dict
,size_t dictSize
,
4599 ZSTD_parameters params
)
4601 DEBUGLOG(4, "ZSTD_compress_advanced");
4602 FORWARD_IF_ERROR(ZSTD_checkCParams(params
.cParams
), "");
4603 ZSTD_CCtxParams_init_internal(&cctx
->simpleApiParams
, ¶ms
, ZSTD_NO_CLEVEL
);
4604 return ZSTD_compress_advanced_internal(cctx
,
4608 &cctx
->simpleApiParams
);
4612 size_t ZSTD_compress_advanced_internal(
4614 void* dst
, size_t dstCapacity
,
4615 const void* src
, size_t srcSize
,
4616 const void* dict
,size_t dictSize
,
4617 const ZSTD_CCtx_params
* params
)
4619 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize
);
4620 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx
,
4621 dict
, dictSize
, ZSTD_dct_auto
, ZSTD_dtlm_fast
, NULL
,
4622 params
, srcSize
, ZSTDb_not_buffered
) , "");
4623 return ZSTD_compressEnd(cctx
, dst
, dstCapacity
, src
, srcSize
);
4626 size_t ZSTD_compress_usingDict(ZSTD_CCtx
* cctx
,
4627 void* dst
, size_t dstCapacity
,
4628 const void* src
, size_t srcSize
,
4629 const void* dict
, size_t dictSize
,
4630 int compressionLevel
)
4633 ZSTD_parameters
const params
= ZSTD_getParams_internal(compressionLevel
, srcSize
, dict
? dictSize
: 0, ZSTD_cpm_noAttachDict
);
4634 assert(params
.fParams
.contentSizeFlag
== 1);
4635 ZSTD_CCtxParams_init_internal(&cctx
->simpleApiParams
, ¶ms
, (compressionLevel
== 0) ? ZSTD_CLEVEL_DEFAULT
: compressionLevel
);
4637 DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize
);
4638 return ZSTD_compress_advanced_internal(cctx
, dst
, dstCapacity
, src
, srcSize
, dict
, dictSize
, &cctx
->simpleApiParams
);
4641 size_t ZSTD_compressCCtx(ZSTD_CCtx
* cctx
,
4642 void* dst
, size_t dstCapacity
,
4643 const void* src
, size_t srcSize
,
4644 int compressionLevel
)
4646 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize
);
4647 assert(cctx
!= NULL
);
4648 return ZSTD_compress_usingDict(cctx
, dst
, dstCapacity
, src
, srcSize
, NULL
, 0, compressionLevel
);
4651 size_t ZSTD_compress(void* dst
, size_t dstCapacity
,
4652 const void* src
, size_t srcSize
,
4653 int compressionLevel
)
4656 #if ZSTD_COMPRESS_HEAPMODE
4657 ZSTD_CCtx
* cctx
= ZSTD_createCCtx();
4658 RETURN_ERROR_IF(!cctx
, memory_allocation
, "ZSTD_createCCtx failed");
4659 result
= ZSTD_compressCCtx(cctx
, dst
, dstCapacity
, src
, srcSize
, compressionLevel
);
4660 ZSTD_freeCCtx(cctx
);
4663 ZSTD_initCCtx(&ctxBody
, ZSTD_defaultCMem
);
4664 result
= ZSTD_compressCCtx(&ctxBody
, dst
, dstCapacity
, src
, srcSize
, compressionLevel
);
4665 ZSTD_freeCCtxContent(&ctxBody
); /* can't free ctxBody itself, as it's on stack; free only heap content */
4671 /* ===== Dictionary API ===== */
4673 /*! ZSTD_estimateCDictSize_advanced() :
4674 * Estimate amount of memory that will be needed to create a dictionary with following arguments */
4675 size_t ZSTD_estimateCDictSize_advanced(
4676 size_t dictSize
, ZSTD_compressionParameters cParams
,
4677 ZSTD_dictLoadMethod_e dictLoadMethod
)
4679 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict
));
4680 return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict
))
4681 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE
)
4682 /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
4683 * in case we are using DDS with row-hash. */
4684 + ZSTD_sizeof_matchState(&cParams
, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto
, &cParams
),
4685 /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
4686 + (dictLoadMethod
== ZSTD_dlm_byRef
? 0
4687 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize
, sizeof(void *))));
4690 size_t ZSTD_estimateCDictSize(size_t dictSize
, int compressionLevel
)
4692 ZSTD_compressionParameters
const cParams
= ZSTD_getCParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4693 return ZSTD_estimateCDictSize_advanced(dictSize
, cParams
, ZSTD_dlm_byCopy
);
4696 size_t ZSTD_sizeof_CDict(const ZSTD_CDict
* cdict
)
4698 if (cdict
==NULL
) return 0; /* support sizeof on NULL */
4699 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict
));
4700 /* cdict may be in the workspace */
4701 return (cdict
->workspace
.workspace
== cdict
? 0 : sizeof(*cdict
))
4702 + ZSTD_cwksp_sizeof(&cdict
->workspace
);
4705 static size_t ZSTD_initCDict_internal(
4707 const void* dictBuffer
, size_t dictSize
,
4708 ZSTD_dictLoadMethod_e dictLoadMethod
,
4709 ZSTD_dictContentType_e dictContentType
,
4710 ZSTD_CCtx_params params
)
4712 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType
);
4713 assert(!ZSTD_checkCParams(params
.cParams
));
4714 cdict
->matchState
.cParams
= params
.cParams
;
4715 cdict
->matchState
.dedicatedDictSearch
= params
.enableDedicatedDictSearch
;
4716 if ((dictLoadMethod
== ZSTD_dlm_byRef
) || (!dictBuffer
) || (!dictSize
)) {
4717 cdict
->dictContent
= dictBuffer
;
4719 void *internalBuffer
= ZSTD_cwksp_reserve_object(&cdict
->workspace
, ZSTD_cwksp_align(dictSize
, sizeof(void*)));
4720 RETURN_ERROR_IF(!internalBuffer
, memory_allocation
, "NULL pointer!");
4721 cdict
->dictContent
= internalBuffer
;
4722 ZSTD_memcpy(internalBuffer
, dictBuffer
, dictSize
);
4724 cdict
->dictContentSize
= dictSize
;
4725 cdict
->dictContentType
= dictContentType
;
4727 cdict
->entropyWorkspace
= (U32
*)ZSTD_cwksp_reserve_object(&cdict
->workspace
, HUF_WORKSPACE_SIZE
);
4730 /* Reset the state to no dictionary */
4731 ZSTD_reset_compressedBlockState(&cdict
->cBlockState
);
4732 FORWARD_IF_ERROR(ZSTD_reset_matchState(
4736 params
.useRowMatchFinder
,
4739 ZSTD_resetTarget_CDict
), "");
4740 /* (Maybe) load the dictionary
4741 * Skips loading the dictionary if it is < 8 bytes.
4743 { params
.compressionLevel
= ZSTD_CLEVEL_DEFAULT
;
4744 params
.fParams
.contentSizeFlag
= 1;
4745 { size_t const dictID
= ZSTD_compress_insertDictionary(
4746 &cdict
->cBlockState
, &cdict
->matchState
, NULL
, &cdict
->workspace
,
4747 ¶ms
, cdict
->dictContent
, cdict
->dictContentSize
,
4748 dictContentType
, ZSTD_dtlm_full
, cdict
->entropyWorkspace
);
4749 FORWARD_IF_ERROR(dictID
, "ZSTD_compress_insertDictionary failed");
4750 assert(dictID
<= (size_t)(U32
)-1);
4751 cdict
->dictID
= (U32
)dictID
;
4758 static ZSTD_CDict
* ZSTD_createCDict_advanced_internal(size_t dictSize
,
4759 ZSTD_dictLoadMethod_e dictLoadMethod
,
4760 ZSTD_compressionParameters cParams
,
4761 ZSTD_useRowMatchFinderMode_e useRowMatchFinder
,
4762 U32 enableDedicatedDictSearch
,
4763 ZSTD_customMem customMem
)
4765 if ((!customMem
.customAlloc
) ^ (!customMem
.customFree
)) return NULL
;
4767 { size_t const workspaceSize
=
4768 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict
)) +
4769 ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE
) +
4770 ZSTD_sizeof_matchState(&cParams
, useRowMatchFinder
, enableDedicatedDictSearch
, /* forCCtx */ 0) +
4771 (dictLoadMethod
== ZSTD_dlm_byRef
? 0
4772 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize
, sizeof(void*))));
4773 void* const workspace
= ZSTD_customMalloc(workspaceSize
, customMem
);
4778 ZSTD_customFree(workspace
, customMem
);
4782 ZSTD_cwksp_init(&ws
, workspace
, workspaceSize
, ZSTD_cwksp_dynamic_alloc
);
4784 cdict
= (ZSTD_CDict
*)ZSTD_cwksp_reserve_object(&ws
, sizeof(ZSTD_CDict
));
4785 assert(cdict
!= NULL
);
4786 ZSTD_cwksp_move(&cdict
->workspace
, &ws
);
4787 cdict
->customMem
= customMem
;
4788 cdict
->compressionLevel
= ZSTD_NO_CLEVEL
; /* signals advanced API usage */
4789 cdict
->useRowMatchFinder
= useRowMatchFinder
;
4794 ZSTD_CDict
* ZSTD_createCDict_advanced(const void* dictBuffer
, size_t dictSize
,
4795 ZSTD_dictLoadMethod_e dictLoadMethod
,
4796 ZSTD_dictContentType_e dictContentType
,
4797 ZSTD_compressionParameters cParams
,
4798 ZSTD_customMem customMem
)
4800 ZSTD_CCtx_params cctxParams
;
4801 ZSTD_memset(&cctxParams
, 0, sizeof(cctxParams
));
4802 ZSTD_CCtxParams_init(&cctxParams
, 0);
4803 cctxParams
.cParams
= cParams
;
4804 cctxParams
.customMem
= customMem
;
4805 return ZSTD_createCDict_advanced2(
4806 dictBuffer
, dictSize
,
4807 dictLoadMethod
, dictContentType
,
4808 &cctxParams
, customMem
);
4811 ZSTDLIB_API ZSTD_CDict
* ZSTD_createCDict_advanced2(
4812 const void* dict
, size_t dictSize
,
4813 ZSTD_dictLoadMethod_e dictLoadMethod
,
4814 ZSTD_dictContentType_e dictContentType
,
4815 const ZSTD_CCtx_params
* originalCctxParams
,
4816 ZSTD_customMem customMem
)
4818 ZSTD_CCtx_params cctxParams
= *originalCctxParams
;
4819 ZSTD_compressionParameters cParams
;
4822 DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType
);
4823 if (!customMem
.customAlloc
^ !customMem
.customFree
) return NULL
;
4825 if (cctxParams
.enableDedicatedDictSearch
) {
4826 cParams
= ZSTD_dedicatedDictSearch_getCParams(
4827 cctxParams
.compressionLevel
, dictSize
);
4828 ZSTD_overrideCParams(&cParams
, &cctxParams
.cParams
);
4830 cParams
= ZSTD_getCParamsFromCCtxParams(
4831 &cctxParams
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4834 if (!ZSTD_dedicatedDictSearch_isSupported(&cParams
)) {
4835 /* Fall back to non-DDSS params */
4836 cctxParams
.enableDedicatedDictSearch
= 0;
4837 cParams
= ZSTD_getCParamsFromCCtxParams(
4838 &cctxParams
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4841 DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams
.enableDedicatedDictSearch
);
4842 cctxParams
.cParams
= cParams
;
4843 cctxParams
.useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(cctxParams
.useRowMatchFinder
, &cParams
);
4845 cdict
= ZSTD_createCDict_advanced_internal(dictSize
,
4846 dictLoadMethod
, cctxParams
.cParams
,
4847 cctxParams
.useRowMatchFinder
, cctxParams
.enableDedicatedDictSearch
,
4850 if (ZSTD_isError( ZSTD_initCDict_internal(cdict
,
4852 dictLoadMethod
, dictContentType
,
4854 ZSTD_freeCDict(cdict
);
4861 ZSTD_CDict
* ZSTD_createCDict(const void* dict
, size_t dictSize
, int compressionLevel
)
4863 ZSTD_compressionParameters cParams
= ZSTD_getCParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4864 ZSTD_CDict
* const cdict
= ZSTD_createCDict_advanced(dict
, dictSize
,
4865 ZSTD_dlm_byCopy
, ZSTD_dct_auto
,
4866 cParams
, ZSTD_defaultCMem
);
4868 cdict
->compressionLevel
= (compressionLevel
== 0) ? ZSTD_CLEVEL_DEFAULT
: compressionLevel
;
4872 ZSTD_CDict
* ZSTD_createCDict_byReference(const void* dict
, size_t dictSize
, int compressionLevel
)
4874 ZSTD_compressionParameters cParams
= ZSTD_getCParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4875 ZSTD_CDict
* const cdict
= ZSTD_createCDict_advanced(dict
, dictSize
,
4876 ZSTD_dlm_byRef
, ZSTD_dct_auto
,
4877 cParams
, ZSTD_defaultCMem
);
4879 cdict
->compressionLevel
= (compressionLevel
== 0) ? ZSTD_CLEVEL_DEFAULT
: compressionLevel
;
4883 size_t ZSTD_freeCDict(ZSTD_CDict
* cdict
)
4885 if (cdict
==NULL
) return 0; /* support free on NULL */
4886 { ZSTD_customMem
const cMem
= cdict
->customMem
;
4887 int cdictInWorkspace
= ZSTD_cwksp_owns_buffer(&cdict
->workspace
, cdict
);
4888 ZSTD_cwksp_free(&cdict
->workspace
, cMem
);
4889 if (!cdictInWorkspace
) {
4890 ZSTD_customFree(cdict
, cMem
);
4896 /*! ZSTD_initStaticCDict_advanced() :
4897 * Generate a digested dictionary in provided memory area.
4898 * workspace: The memory area to emplace the dictionary into.
4899 * Provided pointer must 8-bytes aligned.
4900 * It must outlive dictionary usage.
4901 * workspaceSize: Use ZSTD_estimateCDictSize()
4902 * to determine how large workspace must be.
4903 * cParams : use ZSTD_getCParams() to transform a compression level
4904 * into its relevants cParams.
4905 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
4906 * Note : there is no corresponding "free" function.
4907 * Since workspace was allocated externally, it must be freed externally.
4909 const ZSTD_CDict
* ZSTD_initStaticCDict(
4910 void* workspace
, size_t workspaceSize
,
4911 const void* dict
, size_t dictSize
,
4912 ZSTD_dictLoadMethod_e dictLoadMethod
,
4913 ZSTD_dictContentType_e dictContentType
,
4914 ZSTD_compressionParameters cParams
)
4916 ZSTD_useRowMatchFinderMode_e
const useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto
, &cParams
);
4917 /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
4918 size_t const matchStateSize
= ZSTD_sizeof_matchState(&cParams
, useRowMatchFinder
, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
4919 size_t const neededSize
= ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict
))
4920 + (dictLoadMethod
== ZSTD_dlm_byRef
? 0
4921 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize
, sizeof(void*))))
4922 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE
)
4925 ZSTD_CCtx_params params
;
4927 if ((size_t)workspace
& 7) return NULL
; /* 8-aligned */
4931 ZSTD_cwksp_init(&ws
, workspace
, workspaceSize
, ZSTD_cwksp_static_alloc
);
4932 cdict
= (ZSTD_CDict
*)ZSTD_cwksp_reserve_object(&ws
, sizeof(ZSTD_CDict
));
4933 if (cdict
== NULL
) return NULL
;
4934 ZSTD_cwksp_move(&cdict
->workspace
, &ws
);
4937 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
4938 (unsigned)workspaceSize
, (unsigned)neededSize
, (unsigned)(workspaceSize
< neededSize
));
4939 if (workspaceSize
< neededSize
) return NULL
;
4941 ZSTD_CCtxParams_init(¶ms
, 0);
4942 params
.cParams
= cParams
;
4943 params
.useRowMatchFinder
= useRowMatchFinder
;
4944 cdict
->useRowMatchFinder
= useRowMatchFinder
;
4946 if (ZSTD_isError( ZSTD_initCDict_internal(cdict
,
4948 dictLoadMethod
, dictContentType
,
4955 ZSTD_compressionParameters
ZSTD_getCParamsFromCDict(const ZSTD_CDict
* cdict
)
4957 assert(cdict
!= NULL
);
4958 return cdict
->matchState
.cParams
;
4961 /*! ZSTD_getDictID_fromCDict() :
4962 * Provides the dictID of the dictionary loaded into `cdict`.
4963 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
4964 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
4965 unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict
* cdict
)
4967 if (cdict
==NULL
) return 0;
4968 return cdict
->dictID
;
4972 /* ZSTD_compressBegin_usingCDict_advanced() :
4973 * cdict must be != NULL */
4974 size_t ZSTD_compressBegin_usingCDict_advanced(
4975 ZSTD_CCtx
* const cctx
, const ZSTD_CDict
* const cdict
,
4976 ZSTD_frameParameters
const fParams
, unsigned long long const pledgedSrcSize
)
4978 ZSTD_CCtx_params cctxParams
;
4979 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
4980 RETURN_ERROR_IF(cdict
==NULL
, dictionary_wrong
, "NULL pointer!");
4981 /* Initialize the cctxParams from the cdict */
4983 ZSTD_parameters params
;
4984 params
.fParams
= fParams
;
4985 params
.cParams
= ( pledgedSrcSize
< ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
4986 || pledgedSrcSize
< cdict
->dictContentSize
* ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
4987 || pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
4988 || cdict
->compressionLevel
== 0 ) ?
4989 ZSTD_getCParamsFromCDict(cdict
)
4990 : ZSTD_getCParams(cdict
->compressionLevel
,
4992 cdict
->dictContentSize
);
4993 ZSTD_CCtxParams_init_internal(&cctxParams
, ¶ms
, cdict
->compressionLevel
);
4995 /* Increase window log to fit the entire dictionary and source if the
4996 * source size is known. Limit the increase to 19, which is the
4997 * window log for compression level 1 with the largest source size.
4999 if (pledgedSrcSize
!= ZSTD_CONTENTSIZE_UNKNOWN
) {
5000 U32
const limitedSrcSize
= (U32
)MIN(pledgedSrcSize
, 1U << 19);
5001 U32
const limitedSrcLog
= limitedSrcSize
> 1 ? ZSTD_highbit32(limitedSrcSize
- 1) + 1 : 1;
5002 cctxParams
.cParams
.windowLog
= MAX(cctxParams
.cParams
.windowLog
, limitedSrcLog
);
5004 return ZSTD_compressBegin_internal(cctx
,
5005 NULL
, 0, ZSTD_dct_auto
, ZSTD_dtlm_fast
,
5007 &cctxParams
, pledgedSrcSize
,
5008 ZSTDb_not_buffered
);
5011 /* ZSTD_compressBegin_usingCDict() :
5012 * pledgedSrcSize=0 means "unknown"
5013 * if pledgedSrcSize>0, it will enable contentSizeFlag */
5014 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx
* cctx
, const ZSTD_CDict
* cdict
)
5016 ZSTD_frameParameters
const fParams
= { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
5017 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams
.noDictIDFlag
);
5018 return ZSTD_compressBegin_usingCDict_advanced(cctx
, cdict
, fParams
, ZSTD_CONTENTSIZE_UNKNOWN
);
5021 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx
* cctx
,
5022 void* dst
, size_t dstCapacity
,
5023 const void* src
, size_t srcSize
,
5024 const ZSTD_CDict
* cdict
, ZSTD_frameParameters fParams
)
5026 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx
, cdict
, fParams
, srcSize
), ""); /* will check if cdict != NULL */
5027 return ZSTD_compressEnd(cctx
, dst
, dstCapacity
, src
, srcSize
);
5030 /*! ZSTD_compress_usingCDict() :
5031 * Compression using a digested Dictionary.
5032 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
5033 * Note that compression parameters are decided at CDict creation time
5034 * while frame parameters are hardcoded */
5035 size_t ZSTD_compress_usingCDict(ZSTD_CCtx
* cctx
,
5036 void* dst
, size_t dstCapacity
,
5037 const void* src
, size_t srcSize
,
5038 const ZSTD_CDict
* cdict
)
5040 ZSTD_frameParameters
const fParams
= { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
5041 return ZSTD_compress_usingCDict_advanced(cctx
, dst
, dstCapacity
, src
, srcSize
, cdict
, fParams
);
5046 /* ******************************************************************
5048 ********************************************************************/
5050 ZSTD_CStream
* ZSTD_createCStream(void)
5052 DEBUGLOG(3, "ZSTD_createCStream");
5053 return ZSTD_createCStream_advanced(ZSTD_defaultCMem
);
5056 ZSTD_CStream
* ZSTD_initStaticCStream(void *workspace
, size_t workspaceSize
)
5058 return ZSTD_initStaticCCtx(workspace
, workspaceSize
);
5061 ZSTD_CStream
* ZSTD_createCStream_advanced(ZSTD_customMem customMem
)
5062 { /* CStream and CCtx are now same object */
5063 return ZSTD_createCCtx_advanced(customMem
);
5066 size_t ZSTD_freeCStream(ZSTD_CStream
* zcs
)
5068 return ZSTD_freeCCtx(zcs
); /* same object */
5073 /*====== Initialization ======*/
5075 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX
; }
5077 size_t ZSTD_CStreamOutSize(void)
5079 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX
) + ZSTD_blockHeaderSize
+ 4 /* 32-bits hash */ ;
5082 static ZSTD_cParamMode_e
ZSTD_getCParamMode(ZSTD_CDict
const* cdict
, ZSTD_CCtx_params
const* params
, U64 pledgedSrcSize
)
5084 if (cdict
!= NULL
&& ZSTD_shouldAttachDict(cdict
, params
, pledgedSrcSize
))
5085 return ZSTD_cpm_attachDict
;
5087 return ZSTD_cpm_noAttachDict
;
5090 /* ZSTD_resetCStream():
5091 * pledgedSrcSize == 0 means "unknown" */
5092 size_t ZSTD_resetCStream(ZSTD_CStream
* zcs
, unsigned long long pss
)
5094 /* temporary : 0 interpreted as "unknown" during transition period.
5095 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
5096 * 0 will be interpreted as "empty" in the future.
5098 U64
const pledgedSrcSize
= (pss
==0) ? ZSTD_CONTENTSIZE_UNKNOWN
: pss
;
5099 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize
);
5100 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5101 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5105 /*! ZSTD_initCStream_internal() :
5106 * Note : for lib/compress only. Used by zstdmt_compress.c.
5107 * Assumption 1 : params are valid
5108 * Assumption 2 : either dict, or cdict, is defined, not both */
5109 size_t ZSTD_initCStream_internal(ZSTD_CStream
* zcs
,
5110 const void* dict
, size_t dictSize
, const ZSTD_CDict
* cdict
,
5111 const ZSTD_CCtx_params
* params
,
5112 unsigned long long pledgedSrcSize
)
5114 DEBUGLOG(4, "ZSTD_initCStream_internal");
5115 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5116 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5117 assert(!ZSTD_isError(ZSTD_checkCParams(params
->cParams
)));
5118 zcs
->requestedParams
= *params
;
5119 assert(!((dict
) && (cdict
))); /* either dict or cdict, not both */
5121 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs
, dict
, dictSize
) , "");
5123 /* Dictionary is cleared if !cdict */
5124 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, cdict
) , "");
5129 /* ZSTD_initCStream_usingCDict_advanced() :
5130 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
5131 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream
* zcs
,
5132 const ZSTD_CDict
* cdict
,
5133 ZSTD_frameParameters fParams
,
5134 unsigned long long pledgedSrcSize
)
5136 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
5137 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5138 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5139 zcs
->requestedParams
.fParams
= fParams
;
5140 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, cdict
) , "");
5144 /* note : cdict must outlive compression session */
5145 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream
* zcs
, const ZSTD_CDict
* cdict
)
5147 DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
5148 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5149 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, cdict
) , "");
5154 /* ZSTD_initCStream_advanced() :
5155 * pledgedSrcSize must be exact.
5156 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
5157 * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
5158 size_t ZSTD_initCStream_advanced(ZSTD_CStream
* zcs
,
5159 const void* dict
, size_t dictSize
,
5160 ZSTD_parameters params
, unsigned long long pss
)
5162 /* for compatibility with older programs relying on this behavior.
5163 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
5164 * This line will be removed in the future.
5166 U64
const pledgedSrcSize
= (pss
==0 && params
.fParams
.contentSizeFlag
==0) ? ZSTD_CONTENTSIZE_UNKNOWN
: pss
;
5167 DEBUGLOG(4, "ZSTD_initCStream_advanced");
5168 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5169 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5170 FORWARD_IF_ERROR( ZSTD_checkCParams(params
.cParams
) , "");
5171 ZSTD_CCtxParams_setZstdParams(&zcs
->requestedParams
, ¶ms
);
5172 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs
, dict
, dictSize
) , "");
5176 size_t ZSTD_initCStream_usingDict(ZSTD_CStream
* zcs
, const void* dict
, size_t dictSize
, int compressionLevel
)
5178 DEBUGLOG(4, "ZSTD_initCStream_usingDict");
5179 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5180 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs
, ZSTD_c_compressionLevel
, compressionLevel
) , "");
5181 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs
, dict
, dictSize
) , "");
5185 size_t ZSTD_initCStream_srcSize(ZSTD_CStream
* zcs
, int compressionLevel
, unsigned long long pss
)
5187 /* temporary : 0 interpreted as "unknown" during transition period.
5188 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
5189 * 0 will be interpreted as "empty" in the future.
5191 U64
const pledgedSrcSize
= (pss
==0) ? ZSTD_CONTENTSIZE_UNKNOWN
: pss
;
5192 DEBUGLOG(4, "ZSTD_initCStream_srcSize");
5193 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5194 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, NULL
) , "");
5195 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs
, ZSTD_c_compressionLevel
, compressionLevel
) , "");
5196 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5200 size_t ZSTD_initCStream(ZSTD_CStream
* zcs
, int compressionLevel
)
5202 DEBUGLOG(4, "ZSTD_initCStream");
5203 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5204 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, NULL
) , "");
5205 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs
, ZSTD_c_compressionLevel
, compressionLevel
) , "");
5209 /*====== Compression ======*/
5211 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx
* cctx
)
5213 size_t hintInSize
= cctx
->inBuffTarget
- cctx
->inBuffPos
;
5214 if (hintInSize
==0) hintInSize
= cctx
->blockSize
;
5218 /** ZSTD_compressStream_generic():
5219 * internal function for all *compressStream*() variants
5220 * non-static, because can be called from zstdmt_compress.c
5221 * @return : hint size for next input */
5222 static size_t ZSTD_compressStream_generic(ZSTD_CStream
* zcs
,
5223 ZSTD_outBuffer
* output
,
5224 ZSTD_inBuffer
* input
,
5225 ZSTD_EndDirective
const flushMode
)
5227 const char* const istart
= (const char*)input
->src
;
5228 const char* const iend
= input
->size
!= 0 ? istart
+ input
->size
: istart
;
5229 const char* ip
= input
->pos
!= 0 ? istart
+ input
->pos
: istart
;
5230 char* const ostart
= (char*)output
->dst
;
5231 char* const oend
= output
->size
!= 0 ? ostart
+ output
->size
: ostart
;
5232 char* op
= output
->pos
!= 0 ? ostart
+ output
->pos
: ostart
;
5233 U32 someMoreWork
= 1;
5235 /* check expectations */
5236 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode
);
5237 if (zcs
->appliedParams
.inBufferMode
== ZSTD_bm_buffered
) {
5238 assert(zcs
->inBuff
!= NULL
);
5239 assert(zcs
->inBuffSize
> 0);
5241 if (zcs
->appliedParams
.outBufferMode
== ZSTD_bm_buffered
) {
5242 assert(zcs
->outBuff
!= NULL
);
5243 assert(zcs
->outBuffSize
> 0);
5245 assert(output
->pos
<= output
->size
);
5246 assert(input
->pos
<= input
->size
);
5247 assert((U32
)flushMode
<= (U32
)ZSTD_e_end
);
5249 while (someMoreWork
) {
5250 switch(zcs
->streamStage
)
5253 RETURN_ERROR(init_missing
, "call ZSTD_initCStream() first!");
5256 if ( (flushMode
== ZSTD_e_end
)
5257 && ( (size_t)(oend
-op
) >= ZSTD_compressBound(iend
-ip
) /* Enough output space */
5258 || zcs
->appliedParams
.outBufferMode
== ZSTD_bm_stable
) /* OR we are allowed to return dstSizeTooSmall */
5259 && (zcs
->inBuffPos
== 0) ) {
5260 /* shortcut to compression pass directly into output buffer */
5261 size_t const cSize
= ZSTD_compressEnd(zcs
,
5262 op
, oend
-op
, ip
, iend
-ip
);
5263 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize
);
5264 FORWARD_IF_ERROR(cSize
, "ZSTD_compressEnd failed");
5267 zcs
->frameEnded
= 1;
5268 ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
);
5269 someMoreWork
= 0; break;
5271 /* complete loading into inBuffer in buffered mode */
5272 if (zcs
->appliedParams
.inBufferMode
== ZSTD_bm_buffered
) {
5273 size_t const toLoad
= zcs
->inBuffTarget
- zcs
->inBuffPos
;
5274 size_t const loaded
= ZSTD_limitCopy(
5275 zcs
->inBuff
+ zcs
->inBuffPos
, toLoad
,
5277 zcs
->inBuffPos
+= loaded
;
5280 if ( (flushMode
== ZSTD_e_continue
)
5281 && (zcs
->inBuffPos
< zcs
->inBuffTarget
) ) {
5282 /* not enough input to fill full block : stop here */
5283 someMoreWork
= 0; break;
5285 if ( (flushMode
== ZSTD_e_flush
)
5286 && (zcs
->inBuffPos
== zcs
->inToCompress
) ) {
5288 someMoreWork
= 0; break;
5291 /* compress current block (note : this stage cannot be stopped in the middle) */
5292 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode
);
5293 { int const inputBuffered
= (zcs
->appliedParams
.inBufferMode
== ZSTD_bm_buffered
);
5296 size_t oSize
= oend
-op
;
5297 size_t const iSize
= inputBuffered
5298 ? zcs
->inBuffPos
- zcs
->inToCompress
5299 : MIN((size_t)(iend
- ip
), zcs
->blockSize
);
5300 if (oSize
>= ZSTD_compressBound(iSize
) || zcs
->appliedParams
.outBufferMode
== ZSTD_bm_stable
)
5301 cDst
= op
; /* compress into output buffer, to skip flush stage */
5303 cDst
= zcs
->outBuff
, oSize
= zcs
->outBuffSize
;
5304 if (inputBuffered
) {
5305 unsigned const lastBlock
= (flushMode
== ZSTD_e_end
) && (ip
==iend
);
5307 ZSTD_compressEnd(zcs
, cDst
, oSize
,
5308 zcs
->inBuff
+ zcs
->inToCompress
, iSize
) :
5309 ZSTD_compressContinue(zcs
, cDst
, oSize
,
5310 zcs
->inBuff
+ zcs
->inToCompress
, iSize
);
5311 FORWARD_IF_ERROR(cSize
, "%s", lastBlock
? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
5312 zcs
->frameEnded
= lastBlock
;
5313 /* prepare next block */
5314 zcs
->inBuffTarget
= zcs
->inBuffPos
+ zcs
->blockSize
;
5315 if (zcs
->inBuffTarget
> zcs
->inBuffSize
)
5316 zcs
->inBuffPos
= 0, zcs
->inBuffTarget
= zcs
->blockSize
;
5317 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
5318 (unsigned)zcs
->inBuffTarget
, (unsigned)zcs
->inBuffSize
);
5320 assert(zcs
->inBuffTarget
<= zcs
->inBuffSize
);
5321 zcs
->inToCompress
= zcs
->inBuffPos
;
5323 unsigned const lastBlock
= (ip
+ iSize
== iend
);
5324 assert(flushMode
== ZSTD_e_end
/* Already validated */);
5326 ZSTD_compressEnd(zcs
, cDst
, oSize
, ip
, iSize
) :
5327 ZSTD_compressContinue(zcs
, cDst
, oSize
, ip
, iSize
);
5328 /* Consume the input prior to error checking to mirror buffered mode. */
5331 FORWARD_IF_ERROR(cSize
, "%s", lastBlock
? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
5332 zcs
->frameEnded
= lastBlock
;
5336 if (cDst
== op
) { /* no need to flush */
5338 if (zcs
->frameEnded
) {
5339 DEBUGLOG(5, "Frame completed directly in outBuffer");
5341 ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
);
5345 zcs
->outBuffContentSize
= cSize
;
5346 zcs
->outBuffFlushedSize
= 0;
5347 zcs
->streamStage
= zcss_flush
; /* pass-through to flush stage */
5351 DEBUGLOG(5, "flush stage");
5352 assert(zcs
->appliedParams
.outBufferMode
== ZSTD_bm_buffered
);
5353 { size_t const toFlush
= zcs
->outBuffContentSize
- zcs
->outBuffFlushedSize
;
5354 size_t const flushed
= ZSTD_limitCopy(op
, (size_t)(oend
-op
),
5355 zcs
->outBuff
+ zcs
->outBuffFlushedSize
, toFlush
);
5356 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
5357 (unsigned)toFlush
, (unsigned)(oend
-op
), (unsigned)flushed
);
5360 zcs
->outBuffFlushedSize
+= flushed
;
5361 if (toFlush
!=flushed
) {
5362 /* flush not fully completed, presumably because dst is too small */
5367 zcs
->outBuffContentSize
= zcs
->outBuffFlushedSize
= 0;
5368 if (zcs
->frameEnded
) {
5369 DEBUGLOG(5, "Frame completed on flush");
5371 ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
);
5374 zcs
->streamStage
= zcss_load
;
5378 default: /* impossible */
5383 input
->pos
= ip
- istart
;
5384 output
->pos
= op
- ostart
;
5385 if (zcs
->frameEnded
) return 0;
5386 return ZSTD_nextInputSizeHint(zcs
);
5389 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx
* cctx
)
5391 #ifdef ZSTD_MULTITHREAD
5392 if (cctx
->appliedParams
.nbWorkers
>= 1) {
5393 assert(cctx
->mtctx
!= NULL
);
5394 return ZSTDMT_nextInputSizeHint(cctx
->mtctx
);
5397 return ZSTD_nextInputSizeHint(cctx
);
5401 size_t ZSTD_compressStream(ZSTD_CStream
* zcs
, ZSTD_outBuffer
* output
, ZSTD_inBuffer
* input
)
5403 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs
, output
, input
, ZSTD_e_continue
) , "");
5404 return ZSTD_nextInputSizeHint_MTorST(zcs
);
5407 /* After a compression call set the expected input/output buffer.
5408 * This is validated at the start of the next compression call.
5410 static void ZSTD_setBufferExpectations(ZSTD_CCtx
* cctx
, ZSTD_outBuffer
const* output
, ZSTD_inBuffer
const* input
)
5412 if (cctx
->appliedParams
.inBufferMode
== ZSTD_bm_stable
) {
5413 cctx
->expectedInBuffer
= *input
;
5415 if (cctx
->appliedParams
.outBufferMode
== ZSTD_bm_stable
) {
5416 cctx
->expectedOutBufferSize
= output
->size
- output
->pos
;
5420 /* Validate that the input/output buffers match the expectations set by
5421 * ZSTD_setBufferExpectations.
5423 static size_t ZSTD_checkBufferStability(ZSTD_CCtx
const* cctx
,
5424 ZSTD_outBuffer
const* output
,
5425 ZSTD_inBuffer
const* input
,
5426 ZSTD_EndDirective endOp
)
5428 if (cctx
->appliedParams
.inBufferMode
== ZSTD_bm_stable
) {
5429 ZSTD_inBuffer
const expect
= cctx
->expectedInBuffer
;
5430 if (expect
.src
!= input
->src
|| expect
.pos
!= input
->pos
|| expect
.size
!= input
->size
)
5431 RETURN_ERROR(srcBuffer_wrong
, "ZSTD_c_stableInBuffer enabled but input differs!");
5432 if (endOp
!= ZSTD_e_end
)
5433 RETURN_ERROR(srcBuffer_wrong
, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
5435 if (cctx
->appliedParams
.outBufferMode
== ZSTD_bm_stable
) {
5436 size_t const outBufferSize
= output
->size
- output
->pos
;
5437 if (cctx
->expectedOutBufferSize
!= outBufferSize
)
5438 RETURN_ERROR(dstBuffer_wrong
, "ZSTD_c_stableOutBuffer enabled but output size differs!");
5443 static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx
* cctx
,
5444 ZSTD_EndDirective endOp
,
5446 ZSTD_CCtx_params params
= cctx
->requestedParams
;
5447 ZSTD_prefixDict
const prefixDict
= cctx
->prefixDict
;
5448 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx
) , ""); /* Init the local dict if present. */
5449 ZSTD_memset(&cctx
->prefixDict
, 0, sizeof(cctx
->prefixDict
)); /* single usage */
5450 assert(prefixDict
.dict
==NULL
|| cctx
->cdict
==NULL
); /* only one can be set */
5451 if (cctx
->cdict
&& !cctx
->localDict
.cdict
) {
5452 /* Let the cdict's compression level take priority over the requested params.
5453 * But do not take the cdict's compression level if the "cdict" is actually a localDict
5454 * generated from ZSTD_initLocalDict().
5456 params
.compressionLevel
= cctx
->cdict
->compressionLevel
;
5458 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
5459 if (endOp
== ZSTD_e_end
) cctx
->pledgedSrcSizePlusOne
= inSize
+ 1; /* auto-fix pledgedSrcSize */
5461 size_t const dictSize
= prefixDict
.dict
5462 ? prefixDict
.dictSize
5463 : (cctx
->cdict
? cctx
->cdict
->dictContentSize
: 0);
5464 ZSTD_cParamMode_e
const mode
= ZSTD_getCParamMode(cctx
->cdict
, ¶ms
, cctx
->pledgedSrcSizePlusOne
- 1);
5465 params
.cParams
= ZSTD_getCParamsFromCCtxParams(
5466 ¶ms
, cctx
->pledgedSrcSizePlusOne
-1,
5470 if (ZSTD_CParams_shouldEnableLdm(¶ms
.cParams
)) {
5471 /* Enable LDM by default for optimal parser and window size >= 128MB */
5472 DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
5473 params
.ldmParams
.enableLdm
= 1;
5476 if (ZSTD_CParams_useBlockSplitter(¶ms
.cParams
)) {
5477 DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)");
5478 params
.splitBlocks
= 1;
5481 params
.useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(params
.useRowMatchFinder
, ¶ms
.cParams
);
5483 #ifdef ZSTD_MULTITHREAD
5484 if ((cctx
->pledgedSrcSizePlusOne
-1) <= ZSTDMT_JOBSIZE_MIN
) {
5485 params
.nbWorkers
= 0; /* do not invoke multi-threading when src size is too small */
5487 if (params
.nbWorkers
> 0) {
5489 cctx
->traceCtx
= (ZSTD_trace_compress_begin
!= NULL
) ? ZSTD_trace_compress_begin(cctx
) : 0;
5491 /* mt context creation */
5492 if (cctx
->mtctx
== NULL
) {
5493 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
5495 cctx
->mtctx
= ZSTDMT_createCCtx_advanced((U32
)params
.nbWorkers
, cctx
->customMem
, cctx
->pool
);
5496 RETURN_ERROR_IF(cctx
->mtctx
== NULL
, memory_allocation
, "NULL pointer!");
5498 /* mt compression */
5499 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params
.nbWorkers
);
5500 FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
5502 prefixDict
.dict
, prefixDict
.dictSize
, prefixDict
.dictContentType
,
5503 cctx
->cdict
, params
, cctx
->pledgedSrcSizePlusOne
-1) , "");
5504 cctx
->dictID
= cctx
->cdict
? cctx
->cdict
->dictID
: 0;
5505 cctx
->dictContentSize
= cctx
->cdict
? cctx
->cdict
->dictContentSize
: prefixDict
.dictSize
;
5506 cctx
->consumedSrcSize
= 0;
5507 cctx
->producedCSize
= 0;
5508 cctx
->streamStage
= zcss_load
;
5509 cctx
->appliedParams
= params
;
5512 { U64
const pledgedSrcSize
= cctx
->pledgedSrcSizePlusOne
- 1;
5513 assert(!ZSTD_isError(ZSTD_checkCParams(params
.cParams
)));
5514 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx
,
5515 prefixDict
.dict
, prefixDict
.dictSize
, prefixDict
.dictContentType
, ZSTD_dtlm_fast
,
5517 ¶ms
, pledgedSrcSize
,
5518 ZSTDb_buffered
) , "");
5519 assert(cctx
->appliedParams
.nbWorkers
== 0);
5520 cctx
->inToCompress
= 0;
5521 cctx
->inBuffPos
= 0;
5522 if (cctx
->appliedParams
.inBufferMode
== ZSTD_bm_buffered
) {
5523 /* for small input: avoid automatic flush on reaching end of block, since
5524 * it would require to add a 3-bytes null block to end frame
5526 cctx
->inBuffTarget
= cctx
->blockSize
+ (cctx
->blockSize
== pledgedSrcSize
);
5528 cctx
->inBuffTarget
= 0;
5530 cctx
->outBuffContentSize
= cctx
->outBuffFlushedSize
= 0;
5531 cctx
->streamStage
= zcss_load
;
5532 cctx
->frameEnded
= 0;
5537 size_t ZSTD_compressStream2( ZSTD_CCtx
* cctx
,
5538 ZSTD_outBuffer
* output
,
5539 ZSTD_inBuffer
* input
,
5540 ZSTD_EndDirective endOp
)
5542 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp
);
5543 /* check conditions */
5544 RETURN_ERROR_IF(output
->pos
> output
->size
, dstSize_tooSmall
, "invalid output buffer");
5545 RETURN_ERROR_IF(input
->pos
> input
->size
, srcSize_wrong
, "invalid input buffer");
5546 RETURN_ERROR_IF((U32
)endOp
> (U32
)ZSTD_e_end
, parameter_outOfBound
, "invalid endDirective");
5547 assert(cctx
!= NULL
);
5549 /* transparent initialization stage */
5550 if (cctx
->streamStage
== zcss_init
) {
5551 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx
, endOp
, input
->size
), "CompressStream2 initialization failed");
5552 ZSTD_setBufferExpectations(cctx
, output
, input
); /* Set initial buffer expectations now that we've initialized */
5554 /* end of transparent initialization stage */
5556 FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx
, output
, input
, endOp
), "invalid buffers");
5557 /* compression stage */
5558 #ifdef ZSTD_MULTITHREAD
5559 if (cctx
->appliedParams
.nbWorkers
> 0) {
5561 if (cctx
->cParamsChanged
) {
5562 ZSTDMT_updateCParams_whileCompressing(cctx
->mtctx
, &cctx
->requestedParams
);
5563 cctx
->cParamsChanged
= 0;
5566 size_t const ipos
= input
->pos
;
5567 size_t const opos
= output
->pos
;
5568 flushMin
= ZSTDMT_compressStream_generic(cctx
->mtctx
, output
, input
, endOp
);
5569 cctx
->consumedSrcSize
+= (U64
)(input
->pos
- ipos
);
5570 cctx
->producedCSize
+= (U64
)(output
->pos
- opos
);
5571 if ( ZSTD_isError(flushMin
)
5572 || (endOp
== ZSTD_e_end
&& flushMin
== 0) ) { /* compression completed */
5574 ZSTD_CCtx_trace(cctx
, 0);
5575 ZSTD_CCtx_reset(cctx
, ZSTD_reset_session_only
);
5577 FORWARD_IF_ERROR(flushMin
, "ZSTDMT_compressStream_generic failed");
5579 if (endOp
== ZSTD_e_continue
) {
5580 /* We only require some progress with ZSTD_e_continue, not maximal progress.
5581 * We're done if we've consumed or produced any bytes, or either buffer is
5584 if (input
->pos
!= ipos
|| output
->pos
!= opos
|| input
->pos
== input
->size
|| output
->pos
== output
->size
)
5587 assert(endOp
== ZSTD_e_flush
|| endOp
== ZSTD_e_end
);
5588 /* We require maximal progress. We're done when the flush is complete or the
5589 * output buffer is full.
5591 if (flushMin
== 0 || output
->pos
== output
->size
)
5595 DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
5596 /* Either we don't require maximum forward progress, we've finished the
5597 * flush, or we are out of output space.
5599 assert(endOp
== ZSTD_e_continue
|| flushMin
== 0 || output
->pos
== output
->size
);
5600 ZSTD_setBufferExpectations(cctx
, output
, input
);
5604 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx
, output
, input
, endOp
) , "");
5605 DEBUGLOG(5, "completed ZSTD_compressStream2");
5606 ZSTD_setBufferExpectations(cctx
, output
, input
);
5607 return cctx
->outBuffContentSize
- cctx
->outBuffFlushedSize
; /* remaining to flush */
5610 size_t ZSTD_compressStream2_simpleArgs (
5612 void* dst
, size_t dstCapacity
, size_t* dstPos
,
5613 const void* src
, size_t srcSize
, size_t* srcPos
,
5614 ZSTD_EndDirective endOp
)
5616 ZSTD_outBuffer output
= { dst
, dstCapacity
, *dstPos
};
5617 ZSTD_inBuffer input
= { src
, srcSize
, *srcPos
};
5618 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
5619 size_t const cErr
= ZSTD_compressStream2(cctx
, &output
, &input
, endOp
);
5620 *dstPos
= output
.pos
;
5621 *srcPos
= input
.pos
;
5625 size_t ZSTD_compress2(ZSTD_CCtx
* cctx
,
5626 void* dst
, size_t dstCapacity
,
5627 const void* src
, size_t srcSize
)
5629 ZSTD_bufferMode_e
const originalInBufferMode
= cctx
->requestedParams
.inBufferMode
;
5630 ZSTD_bufferMode_e
const originalOutBufferMode
= cctx
->requestedParams
.outBufferMode
;
5631 DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize
);
5632 ZSTD_CCtx_reset(cctx
, ZSTD_reset_session_only
);
5633 /* Enable stable input/output buffers. */
5634 cctx
->requestedParams
.inBufferMode
= ZSTD_bm_stable
;
5635 cctx
->requestedParams
.outBufferMode
= ZSTD_bm_stable
;
5638 size_t const result
= ZSTD_compressStream2_simpleArgs(cctx
,
5639 dst
, dstCapacity
, &oPos
,
5640 src
, srcSize
, &iPos
,
5642 /* Reset to the original values. */
5643 cctx
->requestedParams
.inBufferMode
= originalInBufferMode
;
5644 cctx
->requestedParams
.outBufferMode
= originalOutBufferMode
;
5645 FORWARD_IF_ERROR(result
, "ZSTD_compressStream2_simpleArgs failed");
5646 if (result
!= 0) { /* compression not completed, due to lack of output space */
5647 assert(oPos
== dstCapacity
);
5648 RETURN_ERROR(dstSize_tooSmall
, "");
5650 assert(iPos
== srcSize
); /* all input is expected consumed */
5656 U32 idx
; /* Index in array of ZSTD_Sequence */
5657 U32 posInSequence
; /* Position within sequence at idx */
5658 size_t posInSrc
; /* Number of bytes given by sequences provided so far */
5659 } ZSTD_sequencePosition
;
5661 /* Returns a ZSTD error code if sequence is not valid */
5662 static size_t ZSTD_validateSequence(U32 offCode
, U32 matchLength
,
5663 size_t posInSrc
, U32 windowLog
, size_t dictSize
, U32 minMatch
) {
5665 U32 windowSize
= 1 << windowLog
;
5666 /* posInSrc represents the amount of data the the decoder would decode up to this point.
5667 * As long as the amount of data decoded is less than or equal to window size, offsets may be
5668 * larger than the total length of output decoded in order to reference the dict, even larger than
5669 * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
5671 offsetBound
= posInSrc
> windowSize
? (size_t)windowSize
: posInSrc
+ (size_t)dictSize
;
5672 RETURN_ERROR_IF(offCode
> offsetBound
+ ZSTD_REP_MOVE
, corruption_detected
, "Offset too large!");
5673 RETURN_ERROR_IF(matchLength
< minMatch
, corruption_detected
, "Matchlength too small");
5677 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
5678 static U32
ZSTD_finalizeOffCode(U32 rawOffset
, const U32 rep
[ZSTD_REP_NUM
], U32 ll0
) {
5679 U32 offCode
= rawOffset
+ ZSTD_REP_MOVE
;
5682 if (!ll0
&& rawOffset
== rep
[0]) {
5684 } else if (rawOffset
== rep
[1]) {
5686 } else if (rawOffset
== rep
[2]) {
5688 } else if (ll0
&& rawOffset
== rep
[0] - 1) {
5692 /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
5693 offCode
= repCode
- 1;
5698 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
5699 * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
5701 static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx
* cctx
, ZSTD_sequencePosition
* seqPos
,
5702 const ZSTD_Sequence
* const inSeqs
, size_t inSeqsSize
,
5703 const void* src
, size_t blockSize
) {
5704 U32 idx
= seqPos
->idx
;
5705 BYTE
const* ip
= (BYTE
const*)(src
);
5706 const BYTE
* const iend
= ip
+ blockSize
;
5707 repcodes_t updatedRepcodes
;
5715 dictSize
= (U32
)cctx
->cdict
->dictContentSize
;
5716 } else if (cctx
->prefixDict
.dict
) {
5717 dictSize
= (U32
)cctx
->prefixDict
.dictSize
;
5721 ZSTD_memcpy(updatedRepcodes
.rep
, cctx
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
5722 for (; (inSeqs
[idx
].matchLength
!= 0 || inSeqs
[idx
].offset
!= 0) && idx
< inSeqsSize
; ++idx
) {
5723 litLength
= inSeqs
[idx
].litLength
;
5724 matchLength
= inSeqs
[idx
].matchLength
;
5725 ll0
= litLength
== 0;
5726 offCode
= ZSTD_finalizeOffCode(inSeqs
[idx
].offset
, updatedRepcodes
.rep
, ll0
);
5727 updatedRepcodes
= ZSTD_updateRep(updatedRepcodes
.rep
, offCode
, ll0
);
5729 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode
, matchLength
, litLength
);
5730 if (cctx
->appliedParams
.validateSequences
) {
5731 seqPos
->posInSrc
+= litLength
+ matchLength
;
5732 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode
, matchLength
, seqPos
->posInSrc
,
5733 cctx
->appliedParams
.cParams
.windowLog
, dictSize
,
5734 cctx
->appliedParams
.cParams
.minMatch
),
5735 "Sequence validation failed");
5737 RETURN_ERROR_IF(idx
- seqPos
->idx
> cctx
->seqStore
.maxNbSeq
, memory_allocation
,
5738 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
5739 ZSTD_storeSeq(&cctx
->seqStore
, litLength
, ip
, iend
, offCode
, matchLength
- MINMATCH
);
5740 ip
+= matchLength
+ litLength
;
5742 ZSTD_memcpy(cctx
->blockState
.nextCBlock
->rep
, updatedRepcodes
.rep
, sizeof(repcodes_t
));
5744 if (inSeqs
[idx
].litLength
) {
5745 DEBUGLOG(6, "Storing last literals of size: %u", inSeqs
[idx
].litLength
);
5746 ZSTD_storeLastLiterals(&cctx
->seqStore
, ip
, inSeqs
[idx
].litLength
);
5747 ip
+= inSeqs
[idx
].litLength
;
5748 seqPos
->posInSrc
+= inSeqs
[idx
].litLength
;
5750 RETURN_ERROR_IF(ip
!= iend
, corruption_detected
, "Blocksize doesn't agree with block delimiter!");
5751 seqPos
->idx
= idx
+1;
5755 /* Returns the number of bytes to move the current read position back by. Only non-zero
5756 * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
5759 * This function will attempt to scan through blockSize bytes represented by the sequences
5760 * in inSeqs, storing any (partial) sequences.
5762 * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
5763 * avoid splitting a match, or to avoid splitting a match such that it would produce a match
5764 * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
5766 static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx
* cctx
, ZSTD_sequencePosition
* seqPos
,
5767 const ZSTD_Sequence
* const inSeqs
, size_t inSeqsSize
,
5768 const void* src
, size_t blockSize
) {
5769 U32 idx
= seqPos
->idx
;
5770 U32 startPosInSequence
= seqPos
->posInSequence
;
5771 U32 endPosInSequence
= seqPos
->posInSequence
+ (U32
)blockSize
;
5773 BYTE
const* ip
= (BYTE
const*)(src
);
5774 BYTE
const* iend
= ip
+ blockSize
; /* May be adjusted if we decide to process fewer than blockSize bytes */
5775 repcodes_t updatedRepcodes
;
5776 U32 bytesAdjustment
= 0;
5777 U32 finalMatchSplit
= 0;
5784 dictSize
= cctx
->cdict
->dictContentSize
;
5785 } else if (cctx
->prefixDict
.dict
) {
5786 dictSize
= cctx
->prefixDict
.dictSize
;
5790 DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx
, startPosInSequence
, blockSize
);
5791 DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx
, inSeqs
[idx
].offset
, inSeqs
[idx
].matchLength
, inSeqs
[idx
].litLength
);
5792 ZSTD_memcpy(updatedRepcodes
.rep
, cctx
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
5793 while (endPosInSequence
&& idx
< inSeqsSize
&& !finalMatchSplit
) {
5794 const ZSTD_Sequence currSeq
= inSeqs
[idx
];
5795 litLength
= currSeq
.litLength
;
5796 matchLength
= currSeq
.matchLength
;
5797 rawOffset
= currSeq
.offset
;
5799 /* Modify the sequence depending on where endPosInSequence lies */
5800 if (endPosInSequence
>= currSeq
.litLength
+ currSeq
.matchLength
) {
5801 if (startPosInSequence
>= litLength
) {
5802 startPosInSequence
-= litLength
;
5804 matchLength
-= startPosInSequence
;
5806 litLength
-= startPosInSequence
;
5808 /* Move to the next sequence */
5809 endPosInSequence
-= currSeq
.litLength
+ currSeq
.matchLength
;
5810 startPosInSequence
= 0;
5813 /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
5814 does not reach the end of the match. So, we have to split the sequence */
5815 DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
5816 currSeq
.litLength
+ currSeq
.matchLength
- endPosInSequence
, idx
, endPosInSequence
);
5817 if (endPosInSequence
> litLength
) {
5818 U32 firstHalfMatchLength
;
5819 litLength
= startPosInSequence
>= litLength
? 0 : litLength
- startPosInSequence
;
5820 firstHalfMatchLength
= endPosInSequence
- startPosInSequence
- litLength
;
5821 if (matchLength
> blockSize
&& firstHalfMatchLength
>= cctx
->appliedParams
.cParams
.minMatch
) {
5822 /* Only ever split the match if it is larger than the block size */
5823 U32 secondHalfMatchLength
= currSeq
.matchLength
+ currSeq
.litLength
- endPosInSequence
;
5824 if (secondHalfMatchLength
< cctx
->appliedParams
.cParams
.minMatch
) {
5825 /* Move the endPosInSequence backward so that it creates match of minMatch length */
5826 endPosInSequence
-= cctx
->appliedParams
.cParams
.minMatch
- secondHalfMatchLength
;
5827 bytesAdjustment
= cctx
->appliedParams
.cParams
.minMatch
- secondHalfMatchLength
;
5828 firstHalfMatchLength
-= bytesAdjustment
;
5830 matchLength
= firstHalfMatchLength
;
5831 /* Flag that we split the last match - after storing the sequence, exit the loop,
5832 but keep the value of endPosInSequence */
5833 finalMatchSplit
= 1;
5835 /* Move the position in sequence backwards so that we don't split match, and break to store
5836 * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
5837 * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
5838 * would cause the first half of the match to be too small
5840 bytesAdjustment
= endPosInSequence
- currSeq
.litLength
;
5841 endPosInSequence
= currSeq
.litLength
;
5845 /* This sequence ends inside the literals, break to store the last literals */
5849 /* Check if this offset can be represented with a repcode */
5850 { U32 ll0
= (litLength
== 0);
5851 offCode
= ZSTD_finalizeOffCode(rawOffset
, updatedRepcodes
.rep
, ll0
);
5852 updatedRepcodes
= ZSTD_updateRep(updatedRepcodes
.rep
, offCode
, ll0
);
5855 if (cctx
->appliedParams
.validateSequences
) {
5856 seqPos
->posInSrc
+= litLength
+ matchLength
;
5857 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode
, matchLength
, seqPos
->posInSrc
,
5858 cctx
->appliedParams
.cParams
.windowLog
, dictSize
,
5859 cctx
->appliedParams
.cParams
.minMatch
),
5860 "Sequence validation failed");
5862 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode
, matchLength
, litLength
);
5863 RETURN_ERROR_IF(idx
- seqPos
->idx
> cctx
->seqStore
.maxNbSeq
, memory_allocation
,
5864 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
5865 ZSTD_storeSeq(&cctx
->seqStore
, litLength
, ip
, iend
, offCode
, matchLength
- MINMATCH
);
5866 ip
+= matchLength
+ litLength
;
5868 DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx
, inSeqs
[idx
].offset
, inSeqs
[idx
].matchLength
, inSeqs
[idx
].litLength
);
5869 assert(idx
== inSeqsSize
|| endPosInSequence
<= inSeqs
[idx
].litLength
+ inSeqs
[idx
].matchLength
);
5871 seqPos
->posInSequence
= endPosInSequence
;
5872 ZSTD_memcpy(cctx
->blockState
.nextCBlock
->rep
, updatedRepcodes
.rep
, sizeof(repcodes_t
));
5874 iend
-= bytesAdjustment
;
5876 /* Store any last literals */
5877 U32 lastLLSize
= (U32
)(iend
- ip
);
5879 DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize
);
5880 ZSTD_storeLastLiterals(&cctx
->seqStore
, ip
, lastLLSize
);
5881 seqPos
->posInSrc
+= lastLLSize
;
5884 return bytesAdjustment
;
5887 typedef size_t (*ZSTD_sequenceCopier
) (ZSTD_CCtx
* cctx
, ZSTD_sequencePosition
* seqPos
,
5888 const ZSTD_Sequence
* const inSeqs
, size_t inSeqsSize
,
5889 const void* src
, size_t blockSize
);
5890 static ZSTD_sequenceCopier
ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode
) {
5891 ZSTD_sequenceCopier sequenceCopier
= NULL
;
5892 assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters
, mode
));
5893 if (mode
== ZSTD_sf_explicitBlockDelimiters
) {
5894 return ZSTD_copySequencesToSeqStoreExplicitBlockDelim
;
5895 } else if (mode
== ZSTD_sf_noBlockDelimiters
) {
5896 return ZSTD_copySequencesToSeqStoreNoBlockDelim
;
5898 assert(sequenceCopier
!= NULL
);
5899 return sequenceCopier
;
5902 /* Compress, block-by-block, all of the sequences given.
5904 * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
5906 static size_t ZSTD_compressSequences_internal(ZSTD_CCtx
* cctx
,
5907 void* dst
, size_t dstCapacity
,
5908 const ZSTD_Sequence
* inSeqs
, size_t inSeqsSize
,
5909 const void* src
, size_t srcSize
) {
5913 size_t compressedSeqsSize
;
5914 size_t remaining
= srcSize
;
5915 ZSTD_sequencePosition seqPos
= {0, 0, 0};
5917 BYTE
const* ip
= (BYTE
const*)src
;
5918 BYTE
* op
= (BYTE
*)dst
;
5919 ZSTD_sequenceCopier sequenceCopier
= ZSTD_selectSequenceCopier(cctx
->appliedParams
.blockDelimiters
);
5921 DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize
, inSeqsSize
);
5922 /* Special case: empty frame */
5923 if (remaining
== 0) {
5924 U32
const cBlockHeader24
= 1 /* last block */ + (((U32
)bt_raw
)<<1);
5925 RETURN_ERROR_IF(dstCapacity
<4, dstSize_tooSmall
, "No room for empty frame block header");
5926 MEM_writeLE32(op
, cBlockHeader24
);
5927 op
+= ZSTD_blockHeaderSize
;
5928 dstCapacity
-= ZSTD_blockHeaderSize
;
5929 cSize
+= ZSTD_blockHeaderSize
;
5934 size_t additionalByteAdjustment
;
5935 lastBlock
= remaining
<= cctx
->blockSize
;
5936 blockSize
= lastBlock
? (U32
)remaining
: (U32
)cctx
->blockSize
;
5937 ZSTD_resetSeqStore(&cctx
->seqStore
);
5938 DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize
);
5940 additionalByteAdjustment
= sequenceCopier(cctx
, &seqPos
, inSeqs
, inSeqsSize
, ip
, blockSize
);
5941 FORWARD_IF_ERROR(additionalByteAdjustment
, "Bad sequence copy");
5942 blockSize
-= additionalByteAdjustment
;
5944 /* If blocks are too small, emit as a nocompress block */
5945 if (blockSize
< MIN_CBLOCK_SIZE
+ZSTD_blockHeaderSize
+1) {
5946 cBlockSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, blockSize
, lastBlock
);
5947 FORWARD_IF_ERROR(cBlockSize
, "Nocompress block failed");
5948 DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize
);
5949 cSize
+= cBlockSize
;
5952 remaining
-= blockSize
;
5953 dstCapacity
-= cBlockSize
;
5957 compressedSeqsSize
= ZSTD_entropyCompressSeqStore(&cctx
->seqStore
,
5958 &cctx
->blockState
.prevCBlock
->entropy
, &cctx
->blockState
.nextCBlock
->entropy
,
5959 &cctx
->appliedParams
,
5960 op
+ ZSTD_blockHeaderSize
/* Leave space for block header */, dstCapacity
- ZSTD_blockHeaderSize
,
5962 cctx
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
/* statically allocated in resetCCtx */,
5964 FORWARD_IF_ERROR(compressedSeqsSize
, "Compressing sequences of block failed");
5965 DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize
);
5967 if (!cctx
->isFirstBlock
&&
5968 ZSTD_maybeRLE(&cctx
->seqStore
) &&
5969 ZSTD_isRLE((BYTE
const*)src
, srcSize
)) {
5970 /* We don't want to emit our first block as a RLE even if it qualifies because
5971 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
5972 * This is only an issue for zstd <= v1.4.3
5974 compressedSeqsSize
= 1;
5977 if (compressedSeqsSize
== 0) {
5978 /* ZSTD_noCompressBlock writes the block header as well */
5979 cBlockSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, blockSize
, lastBlock
);
5980 FORWARD_IF_ERROR(cBlockSize
, "Nocompress block failed");
5981 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize
);
5982 } else if (compressedSeqsSize
== 1) {
5983 cBlockSize
= ZSTD_rleCompressBlock(op
, dstCapacity
, *ip
, blockSize
, lastBlock
);
5984 FORWARD_IF_ERROR(cBlockSize
, "RLE compress block failed");
5985 DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize
);
5988 /* Error checking and repcodes update */
5989 ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx
->blockState
);
5990 if (cctx
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
5991 cctx
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
5993 /* Write block header into beginning of block*/
5994 cBlockHeader
= lastBlock
+ (((U32
)bt_compressed
)<<1) + (U32
)(compressedSeqsSize
<< 3);
5995 MEM_writeLE24(op
, cBlockHeader
);
5996 cBlockSize
= ZSTD_blockHeaderSize
+ compressedSeqsSize
;
5997 DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize
);
6000 cSize
+= cBlockSize
;
6001 DEBUGLOG(4, "cSize running total: %zu", cSize
);
6008 remaining
-= blockSize
;
6009 dstCapacity
-= cBlockSize
;
6010 cctx
->isFirstBlock
= 0;
6017 size_t ZSTD_compressSequences(ZSTD_CCtx
* const cctx
, void* dst
, size_t dstCapacity
,
6018 const ZSTD_Sequence
* inSeqs
, size_t inSeqsSize
,
6019 const void* src
, size_t srcSize
) {
6020 BYTE
* op
= (BYTE
*)dst
;
6022 size_t compressedBlocksSize
= 0;
6023 size_t frameHeaderSize
= 0;
6025 /* Transparent initialization stage, same as compressStream2() */
6026 DEBUGLOG(3, "ZSTD_compressSequences()");
6027 assert(cctx
!= NULL
);
6028 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx
, ZSTD_e_end
, srcSize
), "CCtx initialization failed");
6029 /* Begin writing output, starting with frame header */
6030 frameHeaderSize
= ZSTD_writeFrameHeader(op
, dstCapacity
, &cctx
->appliedParams
, srcSize
, cctx
->dictID
);
6031 op
+= frameHeaderSize
;
6032 dstCapacity
-= frameHeaderSize
;
6033 cSize
+= frameHeaderSize
;
6034 if (cctx
->appliedParams
.fParams
.checksumFlag
&& srcSize
) {
6035 XXH64_update(&cctx
->xxhState
, src
, srcSize
);
6037 /* cSize includes block header size and compressed sequences size */
6038 compressedBlocksSize
= ZSTD_compressSequences_internal(cctx
,
6042 FORWARD_IF_ERROR(compressedBlocksSize
, "Compressing blocks failed!");
6043 cSize
+= compressedBlocksSize
;
6044 dstCapacity
-= compressedBlocksSize
;
6046 if (cctx
->appliedParams
.fParams
.checksumFlag
) {
6047 U32
const checksum
= (U32
) XXH64_digest(&cctx
->xxhState
);
6048 RETURN_ERROR_IF(dstCapacity
<4, dstSize_tooSmall
, "no room for checksum");
6049 DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum
);
6050 MEM_writeLE32((char*)dst
+ cSize
, checksum
);
6054 DEBUGLOG(3, "Final compressed size: %zu", cSize
);
6058 /*====== Finalize ======*/
6060 /*! ZSTD_flushStream() :
6061 * @return : amount of data remaining to flush */
6062 size_t ZSTD_flushStream(ZSTD_CStream
* zcs
, ZSTD_outBuffer
* output
)
6064 ZSTD_inBuffer input
= { NULL
, 0, 0 };
6065 return ZSTD_compressStream2(zcs
, output
, &input
, ZSTD_e_flush
);
6069 size_t ZSTD_endStream(ZSTD_CStream
* zcs
, ZSTD_outBuffer
* output
)
6071 ZSTD_inBuffer input
= { NULL
, 0, 0 };
6072 size_t const remainingToFlush
= ZSTD_compressStream2(zcs
, output
, &input
, ZSTD_e_end
);
6073 FORWARD_IF_ERROR( remainingToFlush
, "ZSTD_compressStream2 failed");
6074 if (zcs
->appliedParams
.nbWorkers
> 0) return remainingToFlush
; /* minimal estimation */
6075 /* single thread mode : attempt to calculate remaining to flush more precisely */
6076 { size_t const lastBlockSize
= zcs
->frameEnded
? 0 : ZSTD_BLOCKHEADERSIZE
;
6077 size_t const checksumSize
= (size_t)(zcs
->frameEnded
? 0 : zcs
->appliedParams
.fParams
.checksumFlag
* 4);
6078 size_t const toFlush
= remainingToFlush
+ lastBlockSize
+ checksumSize
;
6079 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush
);
6085 /*-===== Pre-defined compression levels =====-*/
6087 #define ZSTD_MAX_CLEVEL 22
6088 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL
; }
6089 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX
; }
6090 int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT
; }
6092 static const ZSTD_compressionParameters ZSTD_defaultCParameters
[4][ZSTD_MAX_CLEVEL
+1] = {
6093 { /* "default" - for any srcSize > 256 KB */
6094 /* W, C, H, S, L, TL, strat */
6095 { 19, 12, 13, 1, 6, 1, ZSTD_fast
}, /* base for negative levels */
6096 { 19, 13, 14, 1, 7, 0, ZSTD_fast
}, /* level 1 */
6097 { 20, 15, 16, 1, 6, 0, ZSTD_fast
}, /* level 2 */
6098 { 21, 16, 17, 1, 5, 0, ZSTD_dfast
}, /* level 3 */
6099 { 21, 18, 18, 1, 5, 0, ZSTD_dfast
}, /* level 4 */
6100 { 21, 18, 19, 2, 5, 2, ZSTD_greedy
}, /* level 5 */
6101 { 21, 19, 19, 3, 5, 4, ZSTD_greedy
}, /* level 6 */
6102 { 21, 19, 19, 3, 5, 8, ZSTD_lazy
}, /* level 7 */
6103 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2
}, /* level 8 */
6104 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2
}, /* level 9 */
6105 { 22, 20, 21, 4, 5, 16, ZSTD_lazy2
}, /* level 10 */
6106 { 22, 21, 22, 4, 5, 16, ZSTD_lazy2
}, /* level 11 */
6107 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2
}, /* level 12 */
6108 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2
}, /* level 13 */
6109 { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2
}, /* level 14 */
6110 { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2
}, /* level 15 */
6111 { 22, 22, 22, 5, 5, 48, ZSTD_btopt
}, /* level 16 */
6112 { 23, 23, 22, 5, 4, 64, ZSTD_btopt
}, /* level 17 */
6113 { 23, 23, 22, 6, 3, 64, ZSTD_btultra
}, /* level 18 */
6114 { 23, 24, 22, 7, 3,256, ZSTD_btultra2
}, /* level 19 */
6115 { 25, 25, 23, 7, 3,256, ZSTD_btultra2
}, /* level 20 */
6116 { 26, 26, 24, 7, 3,512, ZSTD_btultra2
}, /* level 21 */
6117 { 27, 27, 25, 9, 3,999, ZSTD_btultra2
}, /* level 22 */
6119 { /* for srcSize <= 256 KB */
6120 /* W, C, H, S, L, T, strat */
6121 { 18, 12, 13, 1, 5, 1, ZSTD_fast
}, /* base for negative levels */
6122 { 18, 13, 14, 1, 6, 0, ZSTD_fast
}, /* level 1 */
6123 { 18, 14, 14, 1, 5, 0, ZSTD_dfast
}, /* level 2 */
6124 { 18, 16, 16, 1, 4, 0, ZSTD_dfast
}, /* level 3 */
6125 { 18, 16, 17, 2, 5, 2, ZSTD_greedy
}, /* level 4.*/
6126 { 18, 18, 18, 3, 5, 2, ZSTD_greedy
}, /* level 5.*/
6127 { 18, 18, 19, 3, 5, 4, ZSTD_lazy
}, /* level 6.*/
6128 { 18, 18, 19, 4, 4, 4, ZSTD_lazy
}, /* level 7 */
6129 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2
}, /* level 8 */
6130 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2
}, /* level 9 */
6131 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2
}, /* level 10 */
6132 { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2
}, /* level 11.*/
6133 { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2
}, /* level 12.*/
6134 { 18, 18, 19, 4, 4, 16, ZSTD_btopt
}, /* level 13 */
6135 { 18, 18, 19, 4, 3, 32, ZSTD_btopt
}, /* level 14.*/
6136 { 18, 18, 19, 6, 3,128, ZSTD_btopt
}, /* level 15.*/
6137 { 18, 19, 19, 6, 3,128, ZSTD_btultra
}, /* level 16.*/
6138 { 18, 19, 19, 8, 3,256, ZSTD_btultra
}, /* level 17.*/
6139 { 18, 19, 19, 6, 3,128, ZSTD_btultra2
}, /* level 18.*/
6140 { 18, 19, 19, 8, 3,256, ZSTD_btultra2
}, /* level 19.*/
6141 { 18, 19, 19, 10, 3,512, ZSTD_btultra2
}, /* level 20.*/
6142 { 18, 19, 19, 12, 3,512, ZSTD_btultra2
}, /* level 21.*/
6143 { 18, 19, 19, 13, 3,999, ZSTD_btultra2
}, /* level 22.*/
6145 { /* for srcSize <= 128 KB */
6146 /* W, C, H, S, L, T, strat */
6147 { 17, 12, 12, 1, 5, 1, ZSTD_fast
}, /* base for negative levels */
6148 { 17, 12, 13, 1, 6, 0, ZSTD_fast
}, /* level 1 */
6149 { 17, 13, 15, 1, 5, 0, ZSTD_fast
}, /* level 2 */
6150 { 17, 15, 16, 2, 5, 0, ZSTD_dfast
}, /* level 3 */
6151 { 17, 17, 17, 2, 4, 0, ZSTD_dfast
}, /* level 4 */
6152 { 17, 16, 17, 3, 4, 2, ZSTD_greedy
}, /* level 5 */
6153 { 17, 17, 17, 3, 4, 4, ZSTD_lazy
}, /* level 6 */
6154 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2
}, /* level 7 */
6155 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2
}, /* level 8 */
6156 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2
}, /* level 9 */
6157 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2
}, /* level 10 */
6158 { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2
}, /* level 11 */
6159 { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2
}, /* level 12 */
6160 { 17, 18, 17, 3, 4, 12, ZSTD_btopt
}, /* level 13.*/
6161 { 17, 18, 17, 4, 3, 32, ZSTD_btopt
}, /* level 14.*/
6162 { 17, 18, 17, 6, 3,256, ZSTD_btopt
}, /* level 15.*/
6163 { 17, 18, 17, 6, 3,128, ZSTD_btultra
}, /* level 16.*/
6164 { 17, 18, 17, 8, 3,256, ZSTD_btultra
}, /* level 17.*/
6165 { 17, 18, 17, 10, 3,512, ZSTD_btultra
}, /* level 18.*/
6166 { 17, 18, 17, 5, 3,256, ZSTD_btultra2
}, /* level 19.*/
6167 { 17, 18, 17, 7, 3,512, ZSTD_btultra2
}, /* level 20.*/
6168 { 17, 18, 17, 9, 3,512, ZSTD_btultra2
}, /* level 21.*/
6169 { 17, 18, 17, 11, 3,999, ZSTD_btultra2
}, /* level 22.*/
6171 { /* for srcSize <= 16 KB */
6172 /* W, C, H, S, L, T, strat */
6173 { 14, 12, 13, 1, 5, 1, ZSTD_fast
}, /* base for negative levels */
6174 { 14, 14, 15, 1, 5, 0, ZSTD_fast
}, /* level 1 */
6175 { 14, 14, 15, 1, 4, 0, ZSTD_fast
}, /* level 2 */
6176 { 14, 14, 15, 2, 4, 0, ZSTD_dfast
}, /* level 3 */
6177 { 14, 14, 14, 4, 4, 2, ZSTD_greedy
}, /* level 4 */
6178 { 14, 14, 14, 3, 4, 4, ZSTD_lazy
}, /* level 5.*/
6179 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2
}, /* level 6 */
6180 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2
}, /* level 7 */
6181 { 14, 14, 14, 8, 4, 8, ZSTD_lazy2
}, /* level 8.*/
6182 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2
}, /* level 9.*/
6183 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2
}, /* level 10.*/
6184 { 14, 15, 14, 3, 4, 12, ZSTD_btopt
}, /* level 11.*/
6185 { 14, 15, 14, 4, 3, 24, ZSTD_btopt
}, /* level 12.*/
6186 { 14, 15, 14, 5, 3, 32, ZSTD_btultra
}, /* level 13.*/
6187 { 14, 15, 15, 6, 3, 64, ZSTD_btultra
}, /* level 14.*/
6188 { 14, 15, 15, 7, 3,256, ZSTD_btultra
}, /* level 15.*/
6189 { 14, 15, 15, 5, 3, 48, ZSTD_btultra2
}, /* level 16.*/
6190 { 14, 15, 15, 6, 3,128, ZSTD_btultra2
}, /* level 17.*/
6191 { 14, 15, 15, 7, 3,256, ZSTD_btultra2
}, /* level 18.*/
6192 { 14, 15, 15, 8, 3,256, ZSTD_btultra2
}, /* level 19.*/
6193 { 14, 15, 15, 8, 3,512, ZSTD_btultra2
}, /* level 20.*/
6194 { 14, 15, 15, 9, 3,512, ZSTD_btultra2
}, /* level 21.*/
6195 { 14, 15, 15, 10, 3,999, ZSTD_btultra2
}, /* level 22.*/
6199 static ZSTD_compressionParameters
ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel
, size_t const dictSize
)
6201 ZSTD_compressionParameters cParams
= ZSTD_getCParams_internal(compressionLevel
, 0, dictSize
, ZSTD_cpm_createCDict
);
6202 switch (cParams
.strategy
) {
6209 cParams
.hashLog
+= ZSTD_LAZY_DDSS_BUCKET_LOG
;
6220 static int ZSTD_dedicatedDictSearch_isSupported(
6221 ZSTD_compressionParameters
const* cParams
)
6223 return (cParams
->strategy
>= ZSTD_greedy
)
6224 && (cParams
->strategy
<= ZSTD_lazy2
)
6225 && (cParams
->hashLog
> cParams
->chainLog
)
6226 && (cParams
->chainLog
<= 24);
6230 * Reverses the adjustment applied to cparams when enabling dedicated dict
6231 * search. This is used to recover the params set to be used in the working
6232 * context. (Otherwise, those tables would also grow.)
6234 static void ZSTD_dedicatedDictSearch_revertCParams(
6235 ZSTD_compressionParameters
* cParams
) {
6236 switch (cParams
->strategy
) {
6243 cParams
->hashLog
-= ZSTD_LAZY_DDSS_BUCKET_LOG
;
6244 if (cParams
->hashLog
< ZSTD_HASHLOG_MIN
) {
6245 cParams
->hashLog
= ZSTD_HASHLOG_MIN
;
6256 static U64
ZSTD_getCParamRowSize(U64 srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
)
6259 case ZSTD_cpm_unknown
:
6260 case ZSTD_cpm_noAttachDict
:
6261 case ZSTD_cpm_createCDict
:
6263 case ZSTD_cpm_attachDict
:
6270 { int const unknown
= srcSizeHint
== ZSTD_CONTENTSIZE_UNKNOWN
;
6271 size_t const addedSize
= unknown
&& dictSize
> 0 ? 500 : 0;
6272 return unknown
&& dictSize
== 0 ? ZSTD_CONTENTSIZE_UNKNOWN
: srcSizeHint
+dictSize
+addedSize
;
6276 /*! ZSTD_getCParams_internal() :
6277 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
6278 * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
6279 * Use dictSize == 0 for unknown or unused.
6280 * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
6281 static ZSTD_compressionParameters
ZSTD_getCParams_internal(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
)
6283 U64
const rSize
= ZSTD_getCParamRowSize(srcSizeHint
, dictSize
, mode
);
6284 U32
const tableID
= (rSize
<= 256 KB
) + (rSize
<= 128 KB
) + (rSize
<= 16 KB
);
6286 DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel
);
6289 if (compressionLevel
== 0) row
= ZSTD_CLEVEL_DEFAULT
; /* 0 == default */
6290 else if (compressionLevel
< 0) row
= 0; /* entry 0 is baseline for fast mode */
6291 else if (compressionLevel
> ZSTD_MAX_CLEVEL
) row
= ZSTD_MAX_CLEVEL
;
6292 else row
= compressionLevel
;
6294 { ZSTD_compressionParameters cp
= ZSTD_defaultCParameters
[tableID
][row
];
6295 DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID
, row
, (U32
)cp
.strategy
);
6296 /* acceleration factor */
6297 if (compressionLevel
< 0) {
6298 int const clampedCompressionLevel
= MAX(ZSTD_minCLevel(), compressionLevel
);
6299 cp
.targetLength
= (unsigned)(-clampedCompressionLevel
);
6301 /* refine parameters based on srcSize & dictSize */
6302 return ZSTD_adjustCParams_internal(cp
, srcSizeHint
, dictSize
, mode
);
6306 /*! ZSTD_getCParams() :
6307 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
6308 * Size values are optional, provide 0 if not known or unused */
6309 ZSTD_compressionParameters
ZSTD_getCParams(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
)
6311 if (srcSizeHint
== 0) srcSizeHint
= ZSTD_CONTENTSIZE_UNKNOWN
;
6312 return ZSTD_getCParams_internal(compressionLevel
, srcSizeHint
, dictSize
, ZSTD_cpm_unknown
);
6315 /*! ZSTD_getParams() :
6316 * same idea as ZSTD_getCParams()
6317 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
6318 * Fields of `ZSTD_frameParameters` are set to default values */
6319 static ZSTD_parameters
ZSTD_getParams_internal(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
) {
6320 ZSTD_parameters params
;
6321 ZSTD_compressionParameters
const cParams
= ZSTD_getCParams_internal(compressionLevel
, srcSizeHint
, dictSize
, mode
);
6322 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel
);
6323 ZSTD_memset(¶ms
, 0, sizeof(params
));
6324 params
.cParams
= cParams
;
6325 params
.fParams
.contentSizeFlag
= 1;
6329 /*! ZSTD_getParams() :
6330 * same idea as ZSTD_getCParams()
6331 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
6332 * Fields of `ZSTD_frameParameters` are set to default values */
6333 ZSTD_parameters
ZSTD_getParams(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
) {
6334 if (srcSizeHint
== 0) srcSizeHint
= ZSTD_CONTENTSIZE_UNKNOWN
;
6335 return ZSTD_getParams_internal(compressionLevel
, srcSizeHint
, dictSize
, ZSTD_cpm_unknown
);