]> git.ipfire.org Git - thirdparty/zstd.git/blame - lib/compress/zstd_opt.c
early update literals
[thirdparty/zstd.git] / lib / compress / zstd_opt.c
CommitLineData
721726d6 1/*
a494308a 2 * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
721726d6
NT
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
3128e03b 8 * You may select, at your option, one of the above-listed licenses.
721726d6
NT
9 */
10
ee441d5d 11#include "zstd_compress_internal.h"
2d76defb 12#include "hist.h"
721726d6 13#include "zstd_opt.h"
721726d6
NT
14
15
74b1c75d 16#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
4191efa9 17#define ZSTD_MAX_PRICE (1<<30)
721726d6 18
635783da
YC
19#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20
9a11f70d 21
721726d6
NT
22/*-*************************************
23* Price functions for optimal parser
24***************************************/
18fc3d3c 25
27a8bbe2 26#if 0 /* approximation at bit level (for tests) */
18fc3d3c
YC
27# define BITCOST_ACCURACY 0
28# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
27a8bbe2
YC
29# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30#elif 0 /* fractional bit accuracy (for tests) */
18fc3d3c
YC
31# define BITCOST_ACCURACY 8
32# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
a243020d 34#else /* opt==approx, ultra==accurate */
18fc3d3c
YC
35# define BITCOST_ACCURACY 8
36# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
a243020d 37# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
18fc3d3c
YC
38#endif
39
40MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41{
a243020d 42 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
18fc3d3c
YC
43}
44
a243020d 45MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
18fc3d3c 46{
a243020d
YC
47 U32 const stat = rawStat + 1;
48 U32 const hb = ZSTD_highbit32(stat);
18fc3d3c
YC
49 U32 const BWeight = hb * BITCOST_MULTIPLIER;
50 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51 U32 const weight = BWeight + FWeight;
52 assert(hb + BITCOST_ACCURACY < 31);
18fc3d3c
YC
53 return weight;
54}
55
c9dfb7e4
YC
56#if (DEBUGLEVEL>=2)
57/* debugging function,
58 * @return price in bytes as fractional value
59 * for debug messages only */
18fc3d3c 60MEM_STATIC double ZSTD_fCost(U32 price)
721726d6 61{
18fc3d3c
YC
62 return (double)price / (BITCOST_MULTIPLIER*8);
63}
c9dfb7e4 64#endif
18fc3d3c 65
3d7377b8
NT
66static int ZSTD_compressedLiterals(optState_t const* const optPtr)
67{
b5c35d7e 68 return optPtr->literalCompressionMode != ZSTD_ps_disable;
3d7377b8
NT
69}
70
18fc3d3c
YC
71static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
72{
3d7377b8
NT
73 if (ZSTD_compressedLiterals(optPtr))
74 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
18fc3d3c
YC
75 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
76 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
77 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
721726d6
NT
78}
79
80
08ceda3d
YC
81static U32 sum_u32(const unsigned table[], size_t nbElts)
82{
83 size_t n;
84 U32 total = 0;
85 for (n=0; n<nbElts; n++) {
86 total += table[n];
87 }
88 return total;
89}
90
91static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
134388ba
YC
92{
93 U32 s, sum=0;
08ceda3d
YC
94 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95 assert(shift < 30);
373ff8b9 96 for (s=0; s<lastEltIndex+1; s++) {
08ceda3d 97 table[s] = 1 + (table[s] >> shift);
134388ba
YC
98 sum += table[s];
99 }
100 return sum;
101}
102
08ceda3d
YC
103/* ZSTD_scaleStats() :
104 * reduce all elements in table is sum too large
105 * return the resulting sum of elements */
106static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
27a8bbe2 107{
08ceda3d
YC
108 U32 const prevsum = sum_u32(table, lastEltIndex+1);
109 U32 const factor = prevsum >> logTarget;
110 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111 assert(logTarget < 30);
112 if (factor <= 1) return prevsum;
113 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
27a8bbe2
YC
114}
115
373ff8b9
YC
116/* ZSTD_rescaleFreqs() :
117 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
118 * take hints from dictionary if there is one
27a8bbe2
YC
119 * and init from zero if there is none,
120 * using src for literals stats, and baseline stats for sequence symbols
373ff8b9
YC
121 * otherwise downscale existing stats, to be used as seed for next block.
122 */
123static void
124ZSTD_rescaleFreqs(optState_t* const optPtr,
8e0e495c
YC
125 const BYTE* const src, size_t const srcSize,
126 int const optLevel)
721726d6 127{
3d7377b8 128 int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
8e0e495c 129 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
74b1c75d 130 optPtr->priceType = zop_dynamic;
721726d6 131
74b1c75d 132 if (optPtr->litLengthSum == 0) { /* first block : init */
8e0e495c
YC
133 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
134 DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
74b1c75d 135 optPtr->priceType = zop_predef;
8e0e495c 136 }
74b1c75d 137
338f738c 138 assert(optPtr->symbolCosts != NULL);
8e0e495c
YC
139 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140 /* huffman table presumed generated by dictionary */
2c26df0e 141 optPtr->priceType = zop_dynamic;
74b1c75d 142
3d7377b8
NT
143 if (compressedLiterals) {
144 unsigned lit;
145 assert(optPtr->litFreq != NULL);
146 optPtr->litSum = 0;
1a26ec6e 147 for (lit=0; lit<=MaxLit; lit++) {
09d0fa29 148 U32 const scaleLog = 11; /* scale to 2K */
46f27105 149 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
30d9c84b 150 assert(bitCost <= scaleLog);
1a26ec6e
YC
151 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
152 optPtr->litSum += optPtr->litFreq[lit];
153 } }
74b1c75d 154
1a26ec6e
YC
155 { unsigned ll;
156 FSE_CState_t llstate;
e3959d5e 157 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
1a26ec6e
YC
158 optPtr->litLengthSum = 0;
159 for (ll=0; ll<=MaxLL; ll++) {
09d0fa29 160 U32 const scaleLog = 10; /* scale to 1K */
1a26ec6e
YC
161 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
162 assert(bitCost < scaleLog);
163 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
164 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
165 } }
721726d6 166
1a26ec6e
YC
167 { unsigned ml;
168 FSE_CState_t mlstate;
e3959d5e 169 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
1a26ec6e
YC
170 optPtr->matchLengthSum = 0;
171 for (ml=0; ml<=MaxML; ml++) {
09d0fa29 172 U32 const scaleLog = 10;
1a26ec6e
YC
173 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
174 assert(bitCost < scaleLog);
175 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
176 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
177 } }
178
179 { unsigned of;
180 FSE_CState_t ofstate;
e3959d5e 181 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
1a26ec6e
YC
182 optPtr->offCodeSum = 0;
183 for (of=0; of<=MaxOff; of++) {
09d0fa29 184 U32 const scaleLog = 10;
1a26ec6e
YC
185 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
186 assert(bitCost < scaleLog);
187 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
188 optPtr->offCodeSum += optPtr->offCodeFreq[of];
189 } }
4191efa9 190
1a26ec6e
YC
191 } else { /* not a dictionary */
192
193 assert(optPtr->litFreq != NULL);
3d7377b8
NT
194 if (compressedLiterals) {
195 unsigned lit = MaxLit;
2d76defb 196 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
08ceda3d 197 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
134388ba 198 }
1a26ec6e 199
27a8bbe2
YC
200 { unsigned const baseLLfreqs[MaxLL+1] = {
201 4, 2, 1, 1, 1, 1, 1, 1,
202 1, 1, 1, 1, 1, 1, 1, 1,
203 1, 1, 1, 1, 1, 1, 1, 1,
204 1, 1, 1, 1, 1, 1, 1, 1,
205 1, 1, 1, 1
206 };
e909fa62
YC
207 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
208 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
1a26ec6e
YC
209 }
210
211 { unsigned ml;
212 for (ml=0; ml<=MaxML; ml++)
213 optPtr->matchLengthFreq[ml] = 1;
1a26ec6e 214 }
18fc3d3c 215 optPtr->matchLengthSum = MaxML+1;
1a26ec6e 216
27a8bbe2 217 { unsigned const baseOFCfreqs[MaxOff+1] = {
23a9368c
YC
218 6, 2, 1, 1, 2, 3, 4, 4,
219 4, 3, 2, 1, 1, 1, 1, 1,
220 1, 1, 1, 1, 1, 1, 1, 1,
221 1, 1, 1, 1, 1, 1, 1, 1
222 };
e909fa62
YC
223 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
224 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
1a26ec6e 225 }
27a8bbe2 226
1a26ec6e
YC
227
228 }
8b6aecf2 229
74b1c75d 230 } else { /* new block : re-use previous statistics, scaled down */
721726d6 231
3d7377b8 232 if (compressedLiterals)
7fce9a41
YC
233 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
234 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
235 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
236 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
721726d6
NT
237 }
238
18fc3d3c 239 ZSTD_setBasePrices(optPtr, optLevel);
ac610546
YC
240}
241
f98ee994 242/* ZSTD_rawLiteralsCost() :
18fc3d3c
YC
243 * price of literals (only) in specified segment (which length can be 0).
244 * does not include price of literalLength symbol */
0a0a2129 245static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
18fc3d3c
YC
246 const optState_t* const optPtr,
247 int optLevel)
03f30d9d 248{
c0da0f5e 249 if (litLength == 0) return 0;
3d7377b8
NT
250
251 if (!ZSTD_compressedLiterals(optPtr))
252 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
253
134388ba
YC
254 if (optPtr->priceType == zop_predef)
255 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
03f30d9d 256
c0da0f5e 257 /* dynamic statistics */
809f2f93
YC
258 { U32 price = litLength * optPtr->litSumBasePrice;
259 U32 u;
260 for (u=0; u < litLength; u++) {
261 assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
262 price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
463a0fe3 263 }
18fc3d3c 264 return price;
03f30d9d
YC
265 }
266}
267
f98ee994
YC
268/* ZSTD_litLengthPrice() :
269 * cost of literalLength symbol */
18fc3d3c 270static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
03f30d9d 271{
4d8a2132
NT
272 assert(litLength <= ZSTD_BLOCKSIZE_MAX);
273 if (optPtr->priceType == zop_predef)
274 return WEIGHT(litLength, optLevel);
275 /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
276 * because it isn't representable in the zstd format. So instead just
277 * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
278 * would be all literals.
279 */
280 if (litLength == ZSTD_BLOCKSIZE_MAX)
281 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
03f30d9d 282
ba2ad9b6 283 /* dynamic statistics */
03f30d9d 284 { U32 const llCode = ZSTD_LLcode(litLength);
8e0e495c
YC
285 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
286 + optPtr->litLengthSumBasePrice
287 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
03f30d9d
YC
288 }
289}
290
03f30d9d 291/* ZSTD_getMatchPrice() :
f98ee994
YC
292 * Provides the cost of the match part (offset + matchLength) of a sequence
293 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
1aed9622
YC
294 * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
295 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
296 */
bd84e4a9 297FORCE_INLINE_TEMPLATE U32
1aed9622 298ZSTD_getMatchPrice(U32 const offcode,
bd84e4a9
SH
299 U32 const matchLength,
300 const optState_t* const optPtr,
301 int const optLevel)
721726d6 302{
5aa03527 303 U32 price;
e909fa62 304 U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
e6da37c4
YC
305 U32 const mlBase = matchLength - MINMATCH;
306 assert(matchLength >= MINMATCH);
721726d6 307
338f738c 308 if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
18fc3d3c 309 return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
721726d6 310
c0da0f5e 311 /* dynamic statistics */
18fc3d3c
YC
312 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
313 if ((optLevel<2) /*static*/ && offCode >= 20)
314 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
721726d6
NT
315
316 /* match Length */
100d8ad6 317 { U32 const mlCode = ZSTD_MLcode(mlBase);
18fc3d3c 318 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
721726d6
NT
319 }
320
e2c0e3d4
YC
321 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
322
03f30d9d 323 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
18fc3d3c 324 return price;
03f30d9d
YC
325}
326
88e9a984
YC
327
328static void
329ZSTD_updateLiterals(optState_t* const optPtr, const BYTE* literals, U32 litLength)
721726d6 330{
3d7377b8
NT
331 if (ZSTD_compressedLiterals(optPtr)) {
332 U32 u;
e717a5b0
YC
333 for (u=0; u < litLength; u++)
334 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
e6da37c4 335 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
e717a5b0 336 }
88e9a984
YC
337}
338
339/* ZSTD_updateStats() :
340 * assumption : literals + litLengtn <= iend */
341static void ZSTD_updateStats(optState_t* const optPtr,
342 U32 litLength, const BYTE* literals,
343 U32 offsetCode, U32 matchLength)
344{
345 DEBUGLOG(6, "ZSTD_updateStats (ll=%u, ml=%u)", litLength, matchLength);
346
347 /* literals */
348 ZSTD_updateLiterals(optPtr, literals, litLength);
721726d6
NT
349
350 /* literal Length */
e717a5b0 351 { U32 const llCode = ZSTD_LLcode(litLength);
721726d6
NT
352 optPtr->litLengthFreq[llCode]++;
353 optPtr->litLengthSum++;
354 }
355
e909fa62
YC
356 /* offset code : expected to follow storeSeq() numeric representation */
357 { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
e717a5b0 358 assert(offCode <= MaxOff);
721726d6 359 optPtr->offCodeFreq[offCode]++;
e717a5b0 360 optPtr->offCodeSum++;
721726d6
NT
361 }
362
363 /* match Length */
5aa03527 364 { U32 const mlBase = matchLength - MINMATCH;
100d8ad6 365 U32 const mlCode = ZSTD_MLcode(mlBase);
721726d6
NT
366 optPtr->matchLengthFreq[mlCode]++;
367 optPtr->matchLengthSum++;
368 }
721726d6
NT
369}
370
371
f98ee994
YC
372/* ZSTD_readMINMATCH() :
373 * function safe only for comparisons
374 * assumption : memPtr must be at least 4 bytes before end of buffer */
4202b2e8 375MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
721726d6
NT
376{
377 switch (length)
378 {
379 default :
380 case 4 : return MEM_read32(memPtr);
381 case 3 : if (MEM_isLittleEndian())
382 return MEM_read32(memPtr)<<8;
383 else
384 return MEM_read32(memPtr)>>8;
385 }
386}
387
388
389/* Update hashTable3 up to ip (excluded)
390 Assumption : always within prefix (i.e. not within extDict) */
fa2a4d77 391static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
9719fd61
YC
392 U32* nextToUpdate3,
393 const BYTE* const ip)
721726d6 394{
887cd4e3
NT
395 U32* const hashTable3 = ms->hashTable3;
396 U32 const hashLog3 = ms->hashLog3;
7e5e226c 397 const BYTE* const base = ms->window.base;
9719fd61 398 U32 idx = *nextToUpdate3;
327cf6fa 399 U32 const target = (U32)(ip - base);
4191efa9 400 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
ab3346af 401 assert(hashLog3 > 0);
721726d6
NT
402
403 while(idx < target) {
404 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
405 idx++;
406 }
407
9719fd61 408 *nextToUpdate3 = target;
721726d6
NT
409 return hashTable3[hash3];
410}
411
412
413/*-*************************************
414* Binary Tree search
415***************************************/
5235d8d6 416/** ZSTD_insertBt1() : add one or multiple positions to tree.
91c9a247
NT
417 * @param ip assumed <= iend-8 .
418 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
5235d8d6 419 * @return : nb of positions added */
5da9bbc3 420static U32 ZSTD_insertBt1(
fa2a4d77 421 const ZSTD_matchState_t* ms,
5235d8d6 422 const BYTE* const ip, const BYTE* const iend,
91c9a247 423 U32 const target,
5da9bbc3 424 U32 const mls, const int extDict)
5235d8d6 425{
97149f22 426 const ZSTD_compressionParameters* const cParams = &ms->cParams;
887cd4e3
NT
427 U32* const hashTable = ms->hashTable;
428 U32 const hashLog = cParams->hashLog;
5235d8d6 429 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
887cd4e3
NT
430 U32* const bt = ms->chainTable;
431 U32 const btLog = cParams->chainLog - 1;
5235d8d6
YC
432 U32 const btMask = (1 << btLog) - 1;
433 U32 matchIndex = hashTable[h];
434 size_t commonLengthSmaller=0, commonLengthLarger=0;
7e5e226c
NT
435 const BYTE* const base = ms->window.base;
436 const BYTE* const dictBase = ms->window.dictBase;
437 const U32 dictLimit = ms->window.dictLimit;
5235d8d6
YC
438 const BYTE* const dictEnd = dictBase + dictLimit;
439 const BYTE* const prefixStart = base + dictLimit;
440 const BYTE* match;
f91ed5c7
NT
441 const U32 curr = (U32)(ip-base);
442 const U32 btLow = btMask >= curr ? 0 : curr - btMask;
443 U32* smallerPtr = bt + 2*(curr&btMask);
5235d8d6
YC
444 U32* largerPtr = smallerPtr + 1;
445 U32 dummy32; /* to be nullified at the end */
fa2a4d77
YC
446 /* windowLow is based on target because
447 * we only need positions that will be in the window at the end of the tree update.
91c9a247
NT
448 */
449 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
f91ed5c7 450 U32 matchEndIdx = curr+8+1;
5235d8d6 451 size_t bestLength = 8;
887cd4e3 452 U32 nbCompares = 1U << cParams->searchLog;
5235d8d6 453#ifdef ZSTD_C_PREDICT
f91ed5c7
NT
454 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
455 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
5235d8d6
YC
456 predictedSmall += (predictedSmall>0);
457 predictedLarge += (predictedLarge>0);
458#endif /* ZSTD_C_PREDICT */
459
f91ed5c7 460 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
5235d8d6 461
91c9a247 462 assert(curr <= target);
5235d8d6 463 assert(ip <= iend-8); /* required for h calculation */
f91ed5c7 464 hashTable[h] = curr; /* Update Hash Table */
5235d8d6 465
8e0e495c 466 assert(windowLow > 0);
c6c482fe 467 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
5235d8d6
YC
468 U32* const nextPtr = bt + 2*(matchIndex & btMask);
469 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
f91ed5c7 470 assert(matchIndex < curr);
5235d8d6
YC
471
472#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
473 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
474 if (matchIndex == predictedSmall) {
475 /* no need to check length, result known */
476 *smallerPtr = matchIndex;
477 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
478 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
479 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
480 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
481 continue;
482 }
483 if (matchIndex == predictedLarge) {
484 *largerPtr = matchIndex;
485 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
486 largerPtr = nextPtr;
487 matchIndex = nextPtr[0];
488 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
489 continue;
490 }
491#endif
492
5da9bbc3 493 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
d5d82409 494 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
5235d8d6
YC
495 match = base + matchIndex;
496 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
497 } else {
498 match = dictBase + matchIndex;
499 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
500 if (matchIndex+matchLength >= dictLimit)
501 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
502 }
503
504 if (matchLength > bestLength) {
505 bestLength = matchLength;
506 if (matchLength > matchEndIdx - matchIndex)
507 matchEndIdx = matchIndex + (U32)matchLength;
508 }
509
510 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
511 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
512 }
513
514 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
515 /* match is smaller than current */
516 *smallerPtr = matchIndex; /* update smaller idx */
517 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
518 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
519 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
520 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
521 } else {
522 /* match is larger than current */
523 *largerPtr = matchIndex;
524 commonLengthLarger = matchLength;
525 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
526 largerPtr = nextPtr;
527 matchIndex = nextPtr[0];
528 } }
529
530 *smallerPtr = *largerPtr = 0;
cdb9481e
NT
531 { U32 positions = 0;
532 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
f91ed5c7
NT
533 assert(matchEndIdx > curr + 8);
534 return MAX(positions, matchEndIdx - (curr + 8));
cdb9481e 535 }
5235d8d6
YC
536}
537
bd84e4a9
SH
538FORCE_INLINE_TEMPLATE
539void ZSTD_updateTree_internal(
540 ZSTD_matchState_t* ms,
541 const BYTE* const ip, const BYTE* const iend,
542 const U32 mls, const ZSTD_dictMode_e dictMode)
5235d8d6 543{
7e5e226c 544 const BYTE* const base = ms->window.base;
5235d8d6 545 U32 const target = (U32)(ip - base);
887cd4e3 546 U32 idx = ms->nextToUpdate;
8e0e495c 547 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
d5d82409 548 idx, target, dictMode);
5235d8d6 549
95e2b430 550 while(idx < target) {
91c9a247 551 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
95e2b430
NT
552 assert(idx < (U32)(idx + forward));
553 idx += forward;
554 }
555 assert((size_t)(ip - base) <= (size_t)(U32)(-1));
556 assert((size_t)(iend - base) <= (size_t)(U32)(-1));
887cd4e3 557 ms->nextToUpdate = target;
5235d8d6
YC
558}
559
dcdf437f 560void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
e874dacc 561 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
5235d8d6
YC
562}
563
bd84e4a9
SH
564FORCE_INLINE_TEMPLATE
565U32 ZSTD_insertBtAndGetAllMatches (
33dabc8c 566 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
97149f22 567 ZSTD_matchState_t* ms,
9719fd61 568 U32* nextToUpdate3,
d5d82409 569 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
33dabc8c 570 const U32 rep[ZSTD_REP_NUM],
2caa9955 571 U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
2caa9955
YC
572 const U32 lengthToBeat,
573 U32 const mls /* template */)
721726d6 574{
97149f22 575 const ZSTD_compressionParameters* const cParams = &ms->cParams;
887cd4e3 576 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
7e5e226c 577 const BYTE* const base = ms->window.base;
f91ed5c7 578 U32 const curr = (U32)(ip-base);
887cd4e3 579 U32 const hashLog = cParams->hashLog;
9a11f70d 580 U32 const minMatch = (mls==3) ? 3 : 4;
887cd4e3 581 U32* const hashTable = ms->hashTable;
05dffe43 582 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
721726d6 583 U32 matchIndex = hashTable[h];
887cd4e3
NT
584 U32* const bt = ms->chainTable;
585 U32 const btLog = cParams->chainLog - 1;
05dffe43 586 U32 const btMask= (1U << btLog) - 1;
721726d6 587 size_t commonLengthSmaller=0, commonLengthLarger=0;
7e5e226c
NT
588 const BYTE* const dictBase = ms->window.dictBase;
589 U32 const dictLimit = ms->window.dictLimit;
721726d6
NT
590 const BYTE* const dictEnd = dictBase + dictLimit;
591 const BYTE* const prefixStart = base + dictLimit;
f91ed5c7
NT
592 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
593 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
f0a13bcd 594 U32 const matchLow = windowLow ? windowLow : 1;
f91ed5c7
NT
595 U32* smallerPtr = bt + 2*(curr&btMask);
596 U32* largerPtr = bt + 2*(curr&btMask) + 1;
597 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
721726d6
NT
598 U32 dummy32; /* to be nullified at the end */
599 U32 mnum = 0;
887cd4e3 600 U32 nbCompares = 1U << cParams->searchLog;
721726d6 601
5d81f71e 602 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
00c088b3
FH
603 const ZSTD_compressionParameters* const dmsCParams =
604 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
1e03377b
FH
605 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
606 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
607 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
608 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
609 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
bad74c47
FH
610 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
611 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
00c088b3
FH
612 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
613 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
1e03377b 614
99435dbb 615 size_t bestLength = lengthToBeat-1;
f91ed5c7 616 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
721726d6 617
9a11f70d 618 /* check repCode */
2caa9955 619 assert(ll0 <= 1); /* necessarily 1 or 0 */
61c2d70c 620 { U32 const lastR = ZSTD_REP_NUM + ll0;
9a11f70d
YC
621 U32 repCode;
622 for (repCode = ll0; repCode < lastR; repCode++) {
61c2d70c 623 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
f91ed5c7 624 U32 const repIndex = curr - repOffset;
61c2d70c 625 U32 repLen = 0;
f91ed5c7
NT
626 assert(curr >= dictLimit);
627 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
6d687a88
NT
628 /* We must validate the repcode offset because when we're using a dictionary the
629 * valid offset range shrinks when the dictionary goes out of bounds.
630 */
631 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
61c2d70c
YC
632 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
633 }
f91ed5c7 634 } else { /* repIndex < dictLimit || repIndex >= curr */
64348a15
FH
635 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
636 dmsBase + repIndex - dmsIndexDelta :
637 dictBase + repIndex;
f91ed5c7 638 assert(curr >= windowLow);
87fe4788 639 if ( dictMode == ZSTD_extDict
f91ed5c7 640 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
05dffe43 641 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
61c2d70c
YC
642 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
643 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
1e03377b
FH
644 }
645 if (dictMode == ZSTD_dictMatchState
f91ed5c7 646 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
3e91dc4d 647 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
64348a15
FH
648 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
649 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
650 } }
61c2d70c
YC
651 /* save longer solution */
652 if (repLen > bestLength) {
463a0fe3
YC
653 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
654 repCode, ll0, repOffset, repLen);
61c2d70c 655 bestLength = repLen;
1aed9622 656 matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */
61c2d70c
YC
657 matches[mnum].len = (U32)repLen;
658 mnum++;
99435dbb 659 if ( (repLen > sufficient_len)
61c2d70c
YC
660 | (ip+repLen == iLimit) ) { /* best possible */
661 return mnum;
662 } } } }
9a11f70d 663
4202b2e8
YC
664 /* HC3 match finder */
665 if ((mls == 3) /*static*/ && (bestLength < mls)) {
9719fd61 666 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
9c14eafe 667 if ((matchIndex3 >= matchLow)
f91ed5c7 668 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
4202b2e8 669 size_t mlen;
87fe4788 670 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
4202b2e8
YC
671 const BYTE* const match = base + matchIndex3;
672 mlen = ZSTD_count(ip, match, iLimit);
721726d6 673 } else {
4202b2e8
YC
674 const BYTE* const match = dictBase + matchIndex3;
675 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
721726d6
NT
676 }
677
678 /* save best solution */
4202b2e8 679 if (mlen >= mls /* == 3 > bestLength */) {
05dffe43
YC
680 DEBUGLOG(8, "found small match with hlog3, of length %u",
681 (U32)mlen);
4202b2e8 682 bestLength = mlen;
f91ed5c7 683 assert(curr > matchIndex3);
4202b2e8 684 assert(mnum==0); /* no prior solution */
1aed9622 685 matches[0].off = STORE_OFFSET(curr - matchIndex3);
4202b2e8 686 matches[0].len = (U32)mlen;
046ea53b 687 mnum = 1;
42c1e642
YC
688 if ( (mlen > sufficient_len) |
689 (ip+mlen == iLimit) ) { /* best possible length */
f91ed5c7 690 ms->nextToUpdate = curr+1; /* skip insertion */
046ea53b 691 return 1;
327cf6fa 692 } } }
ae1f3898 693 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
fa2a4d77 694 } /* if (mls == 3) */
721726d6 695
f91ed5c7 696 hashTable[h] = curr; /* Update Hash Table */
721726d6 697
c6c482fe 698 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
4202b2e8 699 U32* const nextPtr = bt + 2*(matchIndex & btMask);
721726d6 700 const BYTE* match;
98e7c344 701 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
f91ed5c7 702 assert(curr > matchIndex);
d7e98050 703
64348a15 704 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
d7e98050 705 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
721726d6 706 match = base + matchIndex;
98e7c344 707 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
3f457264 708 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
721726d6
NT
709 } else {
710 match = dictBase + matchIndex;
98e7c344 711 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
721726d6
NT
712 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
713 if (matchIndex+matchLength >= dictLimit)
98e7c344 714 match = base + matchIndex; /* prepare for match[matchLength] read */
721726d6
NT
715 }
716
717 if (matchLength > bestLength) {
463a0fe3 718 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
1aed9622 719 (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
4202b2e8
YC
720 assert(matchEndIdx > matchIndex);
721 if (matchLength > matchEndIdx - matchIndex)
722 matchEndIdx = matchIndex + (U32)matchLength;
721726d6 723 bestLength = matchLength;
1aed9622 724 matches[mnum].off = STORE_OFFSET(curr - matchIndex);
721726d6
NT
725 matches[mnum].len = (U32)matchLength;
726 mnum++;
4bb79f9c
FH
727 if ( (matchLength > ZSTD_OPT_NUM)
728 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
2091f34e
FH
729 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
730 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
fa2a4d77 731 } }
721726d6
NT
732
733 if (match[matchLength] < ip[matchLength]) {
d7e98050 734 /* match smaller than current */
721726d6
NT
735 *smallerPtr = matchIndex; /* update smaller idx */
736 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
737 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
d7e98050
YC
738 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
739 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
721726d6 740 } else {
721726d6
NT
741 *largerPtr = matchIndex;
742 commonLengthLarger = matchLength;
743 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
744 largerPtr = nextPtr;
745 matchIndex = nextPtr[0];
746 } }
747
748 *smallerPtr = *largerPtr = 0;
749
c6c482fe 750 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
2091f34e 751 if (dictMode == ZSTD_dictMatchState && nbCompares) {
00c088b3
FH
752 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
753 U32 dictMatchIndex = dms->hashTable[dmsH];
2091f34e 754 const U32* const dmsBt = dms->chainTable;
0a6cf7cd 755 commonLengthSmaller = commonLengthLarger = 0;
c6c482fe 756 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
00c088b3 757 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
2091f34e
FH
758 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
759 const BYTE* match = dmsBase + dictMatchIndex;
760 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
761 if (dictMatchIndex+matchLength >= dmsHighLimit)
762 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
763
764 if (matchLength > bestLength) {
765 matchIndex = dictMatchIndex + dmsIndexDelta;
5bd3d4b7 766 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
1aed9622 767 (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
2091f34e
FH
768 if (matchLength > matchEndIdx - matchIndex)
769 matchEndIdx = matchIndex + (U32)matchLength;
770 bestLength = matchLength;
1aed9622 771 matches[mnum].off = STORE_OFFSET(curr - matchIndex);
2091f34e
FH
772 matches[mnum].len = (U32)matchLength;
773 mnum++;
4bb79f9c
FH
774 if ( (matchLength > ZSTD_OPT_NUM)
775 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
2091f34e 776 break; /* drop, to guarantee consistency (miss a little bit of compression) */
fa2a4d77 777 } }
2091f34e 778
87fe4788 779 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
2091f34e 780 if (match[matchLength] < ip[matchLength]) {
2091f34e
FH
781 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
782 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
783 } else {
784 /* match is larger than current */
2091f34e
FH
785 commonLengthLarger = matchLength;
786 dictMatchIndex = nextPtr[0];
fa2a4d77 787 } } } /* if (dictMode == ZSTD_dictMatchState) */
2091f34e 788
f91ed5c7 789 assert(matchEndIdx > curr+8);
887cd4e3 790 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
721726d6
NT
791 return mnum;
792}
793
e5bfaeed
NT
794typedef U32 (*ZSTD_getAllMatchesFn)(
795 ZSTD_match_t*,
796 ZSTD_matchState_t*,
797 U32*,
798 const BYTE*,
799 const BYTE*,
800 const U32 rep[ZSTD_REP_NUM],
801 U32 const ll0,
802 U32 const lengthToBeat);
803
804FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
805 ZSTD_match_t* matches,
806 ZSTD_matchState_t* ms,
807 U32* nextToUpdate3,
808 const BYTE* ip,
809 const BYTE* const iHighLimit,
810 const U32 rep[ZSTD_REP_NUM],
811 U32 const ll0,
812 U32 const lengthToBeat,
813 const ZSTD_dictMode_e dictMode,
814 const U32 mls)
721726d6 815{
e5bfaeed
NT
816 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
817 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
818 if (ip < ms->window.base + ms->nextToUpdate)
819 return 0; /* skipped area */
820 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
821 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
822}
823
824#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
825
826#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
827 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
828 ZSTD_match_t* matches, \
829 ZSTD_matchState_t* ms, \
830 U32* nextToUpdate3, \
831 const BYTE* ip, \
832 const BYTE* const iHighLimit, \
833 const U32 rep[ZSTD_REP_NUM], \
834 U32 const ll0, \
835 U32 const lengthToBeat) \
836 { \
837 return ZSTD_btGetAllMatches_internal( \
838 matches, ms, nextToUpdate3, ip, iHighLimit, \
839 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
840 }
841
842#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
843 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
844 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
845 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
846 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
847
848GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
849GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
850GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
851
852#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
853 { \
854 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
855 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
856 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
857 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
721726d6 858 }
e5bfaeed 859
435f5a2e
YC
860static ZSTD_getAllMatchesFn
861ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
e5bfaeed
NT
862{
863 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
864 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
865 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
866 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
867 };
868 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
869 assert((U32)dictMode < 3);
870 assert(mls - 3 < 4);
871 return getAllMatchesFns[(int)dictMode][mls - 3];
721726d6
NT
872}
873
c87d2e58 874/*************************
875* LDM helper functions *
876*************************/
aea61e3c 877
0fac8e07 878/* Struct containing info needed to make decision about ldm inclusion */
a5500cf2 879typedef struct {
435f5a2e
YC
880 rawSeqStore_t seqStore; /* External match candidates store for this block */
881 U32 startPosInBlock; /* Start position of the current match candidate */
882 U32 endPosInBlock; /* End position of the current match candidate */
883 U32 offset; /* Offset of the match candidate */
a5500cf2 884} ZSTD_optLdm_t;
885
0fac8e07 886/* ZSTD_optLdm_skipRawSeqStoreBytes():
435f5a2e
YC
887 * Moves forward in @rawSeqStore by @nbBytes,
888 * which will update the fields 'pos' and 'posInSequence'.
a1ef2db5 889 */
435f5a2e
YC
890static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
891{
abce708a 892 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
0fac8e07 893 while (currPos && rawSeqStore->pos < rawSeqStore->size) {
894 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
895 if (currPos >= currSeq.litLength + currSeq.matchLength) {
896 currPos -= currSeq.litLength + currSeq.matchLength;
897 rawSeqStore->pos++;
a1ef2db5 898 } else {
0fac8e07 899 rawSeqStore->posInSequence = currPos;
900 break;
a1ef2db5 901 }
902 }
abce708a 903 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
904 rawSeqStore->posInSequence = 0;
905 }
a1ef2db5 906}
907
a5500cf2 908/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
10647924 909 * Calculates the beginning and end of the next match in the current block.
910 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
0718aa70 911 */
435f5a2e
YC
912static void
913ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
914 U32 blockBytesRemaining)
915{
37617e23 916 rawSeq currSeq;
917 U32 currBlockEndPos;
918 U32 literalsBytesRemaining;
919 U32 matchBytesRemaining;
920
921 /* Setting match end position to MAX to ensure we never use an LDM during this block */
a6165c1b 922 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
a5500cf2 923 optLdm->startPosInBlock = UINT_MAX;
924 optLdm->endPosInBlock = UINT_MAX;
37617e23 925 return;
926 }
435f5a2e
YC
927 /* Calculate appropriate bytes left in matchLength and litLength
928 * after adjusting based on ldmSeqStore->posInSequence */
0fac8e07 929 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
930 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
37617e23 931 currBlockEndPos = currPosInBlock + blockBytesRemaining;
0fac8e07 932 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
933 currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
37617e23 934 0;
935 matchBytesRemaining = (literalsBytesRemaining == 0) ?
0fac8e07 936 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
37617e23 937 currSeq.matchLength;
938
10647924 939 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
37617e23 940 if (literalsBytesRemaining >= blockBytesRemaining) {
a5500cf2 941 optLdm->startPosInBlock = UINT_MAX;
942 optLdm->endPosInBlock = UINT_MAX;
0fac8e07 943 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
37617e23 944 return;
a1ef2db5 945 }
946
0718aa70 947 /* Matches may be < MINMATCH by this process. In that case, we will reject them
948 when we are deciding whether or not to add the ldm */
a5500cf2 949 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
950 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
951 optLdm->offset = currSeq.offset;
a1ef2db5 952
a5500cf2 953 if (optLdm->endPosInBlock > currBlockEndPos) {
0718aa70 954 /* Match ends after the block ends, we can't use the whole match */
a5500cf2 955 optLdm->endPosInBlock = currBlockEndPos;
0fac8e07 956 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
a1ef2db5 957 } else {
0fac8e07 958 /* Consume nb of bytes equal to size of sequence left */
959 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
a1ef2db5 960 }
961}
962
0fac8e07 963/* ZSTD_optLdm_maybeAddMatch():
435f5a2e
YC
964 * Adds a match if it's long enough,
965 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
966 * into 'matches'. Maintains the correct ordering of 'matches'.
10647924 967 */
0fac8e07 968static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
435f5a2e 969 const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
1aed9622
YC
970{
971 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
ea92fb3a 972 /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
1aed9622 973 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
9c3c7cd2 974
bff5785f 975 /* Ensure that current block position is not outside of the match */
a5500cf2 976 if (currPosInBlock < optLdm->startPosInBlock
977 || currPosInBlock >= optLdm->endPosInBlock
c87d2e58 978 || candidateMatchLength < MINMATCH) {
bff5785f 979 return;
c87d2e58 980 }
bff5785f 981
d6911b86 982 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
b7630a47 983 U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
d6911b86 984 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
985 candidateOffCode, candidateMatchLength, currPosInBlock);
0325d878 986 matches[*nbMatches].len = candidateMatchLength;
987 matches[*nbMatches].off = candidateOffCode;
988 (*nbMatches)++;
766c4a8c 989 }
aea61e3c 990}
991
0fac8e07 992/* ZSTD_optLdm_processMatchCandidate():
c87d2e58 993 * Wrapper function to update ldm seq store and call ldm functions as necessary.
994 */
435f5a2e
YC
995static void
996ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
997 ZSTD_match_t* matches, U32* nbMatches,
998 U32 currPosInBlock, U32 remainingBytes)
999{
a6165c1b 1000 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
a5500cf2 1001 return;
1002 }
1003
1004 if (currPosInBlock >= optLdm->endPosInBlock) {
1005 if (currPosInBlock > optLdm->endPosInBlock) {
0fac8e07 1006 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
ea92fb3a 1007 * at the end of a match from the ldm seq store, and will often be some bytes
37617e23 1008 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
ea92fb3a 1009 */
435f5a2e 1010 U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
0fac8e07 1011 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
91c9a247 1012 }
a5500cf2 1013 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
634ab783 1014 }
0fac8e07 1015 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
aea61e3c 1016}
1017
27a8bbe2 1018
721726d6
NT
1019/*-*******************************
1020* Optimal parser
1021*********************************/
9a11f70d 1022
463a0fe3 1023static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
0a0a2129 1024{
463a0fe3 1025 return sol.litlen + sol.mlen;
0a0a2129
YC
1026}
1027
635783da 1028#if 0 /* debug */
373ff8b9
YC
1029
1030static void
1031listStats(const U32* table, int lastEltID)
1032{
1033 int const nbElts = lastEltID + 1;
1034 int enb;
1035 for (enb=0; enb < nbElts; enb++) {
1036 (void)table;
c7da66c9 1037 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
373ff8b9
YC
1038 RAWLOG(2, "%4i,", table[enb]);
1039 }
1040 RAWLOG(2, " \n");
1041}
1042
635783da
YC
1043#endif
1044
bd84e4a9 1045FORCE_INLINE_TEMPLATE size_t
a1550613
YC
1046ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1047 seqStore_t* seqStore,
1048 U32 rep[ZSTD_REP_NUM],
8e0e495c
YC
1049 const void* src, size_t srcSize,
1050 const int optLevel,
1051 const ZSTD_dictMode_e dictMode)
721726d6 1052{
887cd4e3 1053 optState_t* const optStatePtr = &ms->opt;
721726d6
NT
1054 const BYTE* const istart = (const BYTE*)src;
1055 const BYTE* ip = istart;
1056 const BYTE* anchor = istart;
1057 const BYTE* const iend = istart + srcSize;
1058 const BYTE* const ilimit = iend - 8;
7e5e226c
NT
1059 const BYTE* const base = ms->window.base;
1060 const BYTE* const prefixStart = base + ms->window.dictLimit;
6cb24546 1061 const ZSTD_compressionParameters* const cParams = &ms->cParams;
721726d6 1062
e5bfaeed
NT
1063 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1064
887cd4e3 1065 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
e874dacc 1066 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
9719fd61 1067 U32 nextToUpdate3 = ms->nextToUpdate;
721726d6 1068
4191efa9
YC
1069 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1070 ZSTD_match_t* const matches = optStatePtr->matchTable;
463a0fe3 1071 ZSTD_optimal_t lastSequence;
a6165c1b 1072 ZSTD_optLdm_t optLdm;
88e9a984 1073 size_t literalsAlreadyCounted = 0;
a6165c1b 1074
b9c8033c 1075 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1076 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
0fac8e07 1077 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
c87d2e58 1078
721726d6 1079 /* init */
8e0e495c
YC
1080 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1081 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
18fc3d3c 1082 assert(optLevel <= 2);
18fc3d3c 1083 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
721726d6 1084 ip += (ip==prefixStart);
f8ce7cab 1085
721726d6
NT
1086 /* Match Loop */
1087 while (ip < ilimit) {
eb47705b 1088 U32 cur, last_pos = 0;
9a11f70d
YC
1089
1090 /* find first match */
4202b2e8
YC
1091 { U32 const litlen = (U32)(ip - anchor);
1092 U32 const ll0 = !litlen;
e5bfaeed 1093 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
0fac8e07 1094 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
abce708a 1095 (U32)(ip-istart), (U32)(iend - ip));
4202b2e8
YC
1096 if (!nbMatches) { ip++; continue; }
1097
88e9a984
YC
1098 if (ip-anchor > 0) {
1099 /* if (literalsAlreadyCounted > 0) : avoid double counting */
1100 size_t const newlits = (size_t)(ip-anchor) - literalsAlreadyCounted;
1101 assert(literalsAlreadyCounted <= (size_t)(ip-anchor));
1102 ZSTD_updateLiterals(optStatePtr, anchor + literalsAlreadyCounted, newlits);
1103 literalsAlreadyCounted += newlits;
1104 }
1105
4202b2e8
YC
1106 /* initialize opt[0] */
1107 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
463a0fe3 1108 opt[0].mlen = 0; /* means is_a_literal */
4202b2e8 1109 opt[0].litlen = litlen;
610171ed
NT
1110 /* We don't need to include the actual price of the literals because
1111 * it is static for the duration of the forward pass, and is included
1112 * in every price. We include the literal length to avoid negative
1113 * prices when we subtract the previous literal length.
1114 */
eab69221 1115 opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
4202b2e8
YC
1116
1117 /* large match -> immediate encoding */
1118 { U32 const maxML = matches[nbMatches-1].len;
1aed9622 1119 U32 const maxOffcode = matches[nbMatches-1].off;
a880ca23 1120 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1aed9622 1121 nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
4202b2e8
YC
1122
1123 if (maxML > sufficient_len) {
463a0fe3
YC
1124 lastSequence.litlen = litlen;
1125 lastSequence.mlen = maxML;
1aed9622 1126 lastSequence.off = maxOffcode;
463a0fe3
YC
1127 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1128 maxML, sufficient_len);
4202b2e8 1129 cur = 0;
463a0fe3 1130 last_pos = ZSTD_totalLen(lastSequence);
4202b2e8
YC
1131 goto _shortestPath;
1132 } }
721726d6 1133
4202b2e8 1134 /* set prices for first matches starting position == 0 */
eab69221 1135 assert(opt[0].price >= 0);
f0fc8cb3 1136 { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
03f30d9d 1137 U32 pos;
9a11f70d 1138 U32 matchNb;
a1550613
YC
1139 for (pos = 1; pos < minMatch; pos++) {
1140 opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
eee87cd6 1141 }
4202b2e8 1142 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1aed9622 1143 U32 const offcode = matches[matchNb].off;
eb47705b 1144 U32 const end = matches[matchNb].len;
eee87cd6 1145 for ( ; pos <= end ; pos++ ) {
1aed9622 1146 U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
a1550613 1147 U32 const sequencePrice = literalsPrice + matchPrice;
ac610546
YC
1148 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1149 pos, ZSTD_fCost(sequencePrice));
eee87cd6 1150 opt[pos].mlen = pos;
1aed9622 1151 opt[pos].off = offcode;
eee87cd6 1152 opt[pos].litlen = litlen;
f0fc8cb3 1153 opt[pos].price = (int)sequencePrice;
eee87cd6
YC
1154 } }
1155 last_pos = pos-1;
1156 }
4202b2e8 1157 }
721726d6 1158
eb47705b 1159 /* check further positions */
721726d6 1160 for (cur = 1; cur <= last_pos; cur++) {
eb47705b
YC
1161 const BYTE* const inr = ip + cur;
1162 assert(cur < ZSTD_OPT_NUM);
463a0fe3 1163 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
721726d6 1164
eb47705b 1165 /* Fix current position with one literal if cheaper */
463a0fe3
YC
1166 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1167 int const price = opt[cur-1].price
f0fc8cb3
YC
1168 + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1169 + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1170 - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
0a0a2129 1171 assert(price < 1000000000); /* overflow check */
9a11f70d 1172 if (price <= opt[cur].price) {
463a0fe3
YC
1173 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1174 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1175 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1176 opt[cur].mlen = 0;
03f30d9d
YC
1177 opt[cur].off = 0;
1178 opt[cur].litlen = litlen;
1179 opt[cur].price = price;
ac610546 1180 } else {
463a0fe3
YC
1181 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1182 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1183 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
ac610546
YC
1184 }
1185 }
eb47705b 1186
81fda041
NT
1187 /* Set the repcodes of the current position. We must do it here
1188 * because we rely on the repcodes of the 2nd to last sequence being
1189 * correct to set the next chunks repcodes during the backward
1190 * traversal.
1191 */
1192 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1193 assert(cur >= opt[cur].mlen);
1194 if (opt[cur].mlen != 0) {
1195 U32 const prev = cur - opt[cur].mlen;
6fa640ef 1196 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
c465f244 1197 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
81fda041 1198 } else {
c465f244 1199 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
81fda041
NT
1200 }
1201
eb47705b
YC
1202 /* last match must start at a minimum distance of 8 from oend */
1203 if (inr > ilimit) continue;
1204
d1006700
YC
1205 if (cur == last_pos) break;
1206
a1550613 1207 if ( (optLevel==0) /*static_test*/
ac610546
YC
1208 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1209 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
d1006700 1210 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
ac610546 1211 }
d1006700 1212
f0fc8cb3 1213 assert(opt[cur].price >= 0);
463a0fe3
YC
1214 { U32 const ll0 = (opt[cur].mlen != 0);
1215 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
f0fc8cb3 1216 U32 const previousPrice = (U32)opt[cur].price;
463a0fe3 1217 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
e5bfaeed 1218 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
eb47705b 1219 U32 matchNb;
634ab783 1220
0fac8e07 1221 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
abce708a 1222 (U32)(inr-istart), (U32)(iend-inr));
88f72ed9 1223
ac610546
YC
1224 if (!nbMatches) {
1225 DEBUGLOG(7, "rPos:%u : no match found", cur);
1226 continue;
1227 }
4202b2e8
YC
1228
1229 { U32 const maxML = matches[nbMatches-1].len;
463a0fe3
YC
1230 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1231 inr-istart, cur, nbMatches, maxML);
4202b2e8
YC
1232
1233 if ( (maxML > sufficient_len)
a1550613 1234 || (cur + maxML >= ZSTD_OPT_NUM) ) {
463a0fe3
YC
1235 lastSequence.mlen = maxML;
1236 lastSequence.off = matches[nbMatches-1].off;
1237 lastSequence.litlen = litlen;
1238 cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1239 last_pos = cur + ZSTD_totalLen(lastSequence);
1240 if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
4202b2e8 1241 goto _shortestPath;
a1550613 1242 } }
721726d6 1243
4202b2e8
YC
1244 /* set prices using matches found at position == cur */
1245 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
9a11f70d 1246 U32 const offset = matches[matchNb].off;
e6da37c4
YC
1247 U32 const lastML = matches[matchNb].len;
1248 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1249 U32 mlen;
9a11f70d 1250
463a0fe3 1251 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
9a11f70d 1252 matchNb, matches[matchNb].off, lastML, litlen);
721726d6 1253
a1550613 1254 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
4202b2e8 1255 U32 const pos = cur + mlen;
f0fc8cb3 1256 int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
721726d6 1257
a4a20a4b 1258 if ((pos > last_pos) || (price < opt[pos].price)) {
463a0fe3
YC
1259 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1260 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
a1550613 1261 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
bc42bc3b
YC
1262 opt[pos].mlen = mlen;
1263 opt[pos].off = offset;
1264 opt[pos].litlen = litlen;
1265 opt[pos].price = price;
e6da37c4 1266 } else {
463a0fe3
YC
1267 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1268 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
a1550613 1269 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
9a11f70d 1270 }
4154aec6
YC
1271 } } }
1272 } /* for (cur = 1; cur <= last_pos; cur++) */
721726d6 1273
463a0fe3
YC
1274 lastSequence = opt[last_pos];
1275 cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1276 assert(cur < ZSTD_OPT_NUM); /* control overflow*/
721726d6 1277
eb47705b 1278_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
463a0fe3
YC
1279 assert(opt[0].mlen == 0);
1280
0f9882de
NT
1281 /* Set the next chunk's repcodes based on the repcodes of the beginning
1282 * of the last match, and the last sequence. This avoids us having to
1283 * update them while traversing the sequences.
1284 */
1285 if (lastSequence.mlen != 0) {
6fa640ef 1286 repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
c465f244 1287 ZSTD_memcpy(rep, &reps, sizeof(reps));
0f9882de 1288 } else {
c465f244 1289 ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
0f9882de
NT
1290 }
1291
463a0fe3
YC
1292 { U32 const storeEnd = cur + 1;
1293 U32 storeStart = storeEnd;
1294 U32 seqPos = cur;
1295
1296 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
6e66bbf5 1297 last_pos, cur); (void)last_pos;
463a0fe3
YC
1298 assert(storeEnd < ZSTD_OPT_NUM);
1299 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1300 storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1301 opt[storeEnd] = lastSequence;
1302 while (seqPos > 0) {
1303 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1304 storeStart--;
1305 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1306 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1307 opt[storeStart] = opt[seqPos];
1308 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1309 }
1310
1311 /* save sequences */
1312 DEBUGLOG(6, "sending selected sequences into seqStore")
1313 { U32 storePos;
1314 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1315 U32 const llen = opt[storePos].litlen;
1316 U32 const mlen = opt[storePos].mlen;
1317 U32 const offCode = opt[storePos].off;
1318 U32 const advance = llen + mlen;
1319 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
ededcfca 1320 anchor - istart, (unsigned)llen, (unsigned)mlen);
463a0fe3 1321
88e9a984 1322 if (mlen==0) { /* only literals */
463a0fe3
YC
1323 assert(storePos == storeEnd); /* must be last sequence */
1324 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
88e9a984 1325 continue;
463a0fe3
YC
1326 }
1327
463a0fe3 1328 assert(anchor + llen <= iend);
88e9a984
YC
1329 assert(llen >= literalsAlreadyCounted);
1330 ZSTD_updateStats(optStatePtr, llen - literalsAlreadyCounted, anchor + literalsAlreadyCounted, offCode, mlen);
1331 literalsAlreadyCounted = 0;
b77fcac6 1332 ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
463a0fe3
YC
1333 anchor += advance;
1334 ip = anchor;
1335 } }
1336 ZSTD_setBasePrices(optStatePtr, optLevel);
1337 }
f8d5c478 1338 } /* while (ip < ilimit) */
a6165c1b 1339
88e9a984
YC
1340 /* update literals statistics, for next block */
1341 assert((size_t)(iend - anchor) >= literalsAlreadyCounted);
1342 ZSTD_updateLiterals(optStatePtr, anchor + literalsAlreadyCounted, (size_t)(iend - anchor) - literalsAlreadyCounted);
eeff55df 1343 /* Return the last literals size */
33dabc8c 1344 return (size_t)(iend - anchor);
721726d6
NT
1345}
1346
e5bfaeed
NT
1347static size_t ZSTD_compressBlock_opt0(
1348 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1349 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1350{
1351 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1352}
1353
1354static size_t ZSTD_compressBlock_opt2(
1355 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1356 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1357{
1358 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1359}
721726d6 1360
887cd4e3
NT
1361size_t ZSTD_compressBlock_btopt(
1362 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
6cb24546 1363 const void* src, size_t srcSize)
721726d6 1364{
05dffe43 1365 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
e5bfaeed 1366 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
721726d6
NT
1367}
1368
134388ba 1369
134388ba 1370
373ff8b9 1371
e9448cdf
YC
1372/* ZSTD_initStats_ultra():
1373 * make a first compression pass, just to seed stats with more accurate starting values.
1374 * only works on first block, with no dictionary and no ldm.
a880ca23 1375 * this function cannot error, hence its contract must be respected.
e9448cdf 1376 */
373ff8b9
YC
1377static void
1378ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1379 seqStore_t* seqStore,
1380 U32 rep[ZSTD_REP_NUM],
1381 const void* src, size_t srcSize)
e9448cdf
YC
1382{
1383 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
c465f244 1384 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
e9448cdf 1385
2898afab 1386 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
e9448cdf
YC
1387 assert(ms->opt.litLengthSum == 0); /* first block */
1388 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1389 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
5e6aaa3a 1390 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
e9448cdf 1391
e5bfaeed 1392 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
e9448cdf
YC
1393
1394 /* invalidate first scan from history */
1395 ZSTD_resetSeqStore(seqStore);
1396 ms->window.base -= srcSize;
1397 ms->window.dictLimit += (U32)srcSize;
1398 ms->window.lowLimit = ms->window.dictLimit;
1399 ms->nextToUpdate = ms->window.dictLimit;
e9448cdf 1400
e9448cdf 1401}
134388ba 1402
887cd4e3
NT
1403size_t ZSTD_compressBlock_btultra(
1404 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
6cb24546 1405 const void* src, size_t srcSize)
721726d6 1406{
ef984e73 1407 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
e5bfaeed 1408 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
e9448cdf
YC
1409}
1410
1411size_t ZSTD_compressBlock_btultra2(
1412 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1413 const void* src, size_t srcSize)
1414{
f91ed5c7 1415 U32 const curr = (U32)((const BYTE*)src - ms->window.base);
ef984e73 1416 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
e9448cdf 1417
373ff8b9 1418 /* 2-pass strategy:
a8ddf1d3
YC
1419 * this strategy makes a first pass over first block to collect statistics
1420 * and seed next round's statistics with it.
5e6aaa3a
YC
1421 * After 1st pass, function forgets everything, and starts a new block.
1422 * Consequently, this can only work if no data has been previously loaded in tables,
1423 * aka, no dictionary, no prefix, no ldm preprocessing.
a8ddf1d3
YC
1424 * The compression ratio gain is generally small (~0.5% on first block),
1425 * the cost is 2x cpu time on first block. */
8572b4d0
YC
1426 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1427 if ( (ms->opt.litLengthSum==0) /* first block */
5e6aaa3a
YC
1428 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1429 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
f91ed5c7 1430 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
635783da 1431 && (srcSize > ZSTD_PREDEF_THRESHOLD)
5e6aaa3a 1432 ) {
e9448cdf 1433 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
134388ba 1434 }
e9448cdf 1435
e5bfaeed 1436 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
721726d6
NT
1437}
1438
ccbf0679
FH
1439size_t ZSTD_compressBlock_btopt_dictMatchState(
1440 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
6cb24546 1441 const void* src, size_t srcSize)
ccbf0679 1442{
e5bfaeed 1443 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
ccbf0679
FH
1444}
1445
1446size_t ZSTD_compressBlock_btultra_dictMatchState(
1447 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
6cb24546 1448 const void* src, size_t srcSize)
ccbf0679 1449{
e5bfaeed 1450 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
ccbf0679
FH
1451}
1452
887cd4e3
NT
1453size_t ZSTD_compressBlock_btopt_extDict(
1454 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
6cb24546 1455 const void* src, size_t srcSize)
721726d6 1456{
e5bfaeed 1457 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
721726d6
NT
1458}
1459
887cd4e3
NT
1460size_t ZSTD_compressBlock_btultra_extDict(
1461 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
6cb24546 1462 const void* src, size_t srcSize)
721726d6 1463{
e5bfaeed 1464 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
721726d6 1465}
e9448cdf
YC
1466
1467/* note : no btultra2 variant for extDict nor dictMatchState,
be9e561d 1468 * because btultra2 is not meant to work with dictionaries
e9448cdf 1469 * and is only specific for the first block (no prefix) */