A minor change in
5434de0 changed a `<=` into a `<`,
and as an indirect consequence allowed compression attempt of literals when there are only 6 literals to compress
(previous limit was effectively 7 literals).
This is not in itself a problem, as the threshold is merely an heuristic,
but it emerged a bug that has always been there, and was just never triggered so far due to the previous limit.
This bug would make the literal compressor believes that all literals are the same symbol,
but for the exact case where nbLiterals==6, plus a pretty wild combination of other limit conditions,
this outcome could be false, resulting in data corruption.
Replaced the blind heuristic by an actual test for all limit cases,
so that even if the threshold is changed again in the future,
the detection of RLE mode will remain reliable.
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2,
- unsigned suspectUncompressible, HUF_depth_mode depthMode);
+ int suspectUncompressible, HUF_depth_mode depthMode);
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2,
- unsigned suspectUncompressible, HUF_depth_mode depthMode);
+ int suspectUncompressible, HUF_depth_mode depthMode);
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
HUF_nbStreams_e nbStreams,
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
- const int bmi2, unsigned suspectUncompressible, HUF_depth_mode depthMode)
+ const int bmi2, int suspectUncompressible, HUF_depth_mode depthMode)
{
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
BYTE* const ostart = (BYTE*)dst;
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
- int bmi2, unsigned suspectUncompressible, HUF_depth_mode depthMode)
+ int bmi2, int suspectUncompressible, HUF_depth_mode depthMode)
{
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2,
- unsigned suspectUncompressible, HUF_depth_mode depthMode)
+ int suspectUncompressible, HUF_depth_mode depthMode)
{
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize,
const BYTE* const ip = (const BYTE*)src;
size_t u;
for (u=0; u<srcSize; u++) {
- RAWLOG(6, " %02X", ip[u]); (void)ip;
+ RAWLOG(5, " %02X", ip[u]); (void)ip;
}
- RAWLOG(6, " \n");
+ RAWLOG(5, " \n");
return srcSize;
}
return srcSize + flSize;
}
+static int allBytesIdentical(const void* src, size_t srcSize)
+{
+ assert(srcSize >= 1);
+ assert(src != NULL);
+ { const BYTE b = ((const BYTE*)src)[0];
+ size_t p;
+ for (p=1; p<srcSize; p++) {
+ if (((const BYTE*)src)[p] != b) return 0;
+ }
+ return 1;
+ }
+}
+
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
- (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
+ assert(dstCapacity >= 4); (void)dstCapacity;
+ assert(allBytesIdentical(src, srcSize));
switch(flSize)
{
}
ostart[flSize] = *(const BYTE*)src;
- DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
+ DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
return flSize+1;
}
/* btultra2 : min 8 bytes;
* then 2x larger for each successive compression strategy
* max threshold 64 bytes */
- { int const shift = MIN(9-strategy, 3);
- size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : 8 << shift;
+ { int const shift = MIN(9-(int)strategy, 3);
+ size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
return mintc;
}
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = (strategy < ZSTD_lazy) ? srcSize <= 1024 : 0;
HUF_depth_mode const depthMode = (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD) ? HUF_depth_optimal : HUF_depth_fast;
- typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int, int, unsigned, HUF_depth_mode);
+ typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int, int, int, HUF_depth_mode);
huf_compress_f huf_compress;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
(HUF_CElt*)nextHuf->CTable,
&repeat, preferRepeat,
bmi2, suspectUncompressible, depthMode);
+ DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
- DEBUGLOG(5, "Reusing previous huffman table");
+ DEBUGLOG(5, "reusing statistics from previous huffman block");
hType = set_repeat;
}
}
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (cLitSize==1) {
- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
- return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
- }
+ if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+ return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+ } }
if (hType == set_compressed) {
/* using a newly constructed table */
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+/* ZSTD_compressRleLiteralsBlock() :
+ * Conditions :
+ * - All bytes in @src are identical
+ * - dstCapacity >= 4 */
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* ZSTD_compressLiterals():
}
ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
/* Allow caller to get size read */
+ DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr);
*srcPtr = ip;
*srcSizePtr = remainingSrcSize;
return (size_t)(op-ostart);