******************************************/
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbols decoder */
-size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder */
+size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
/* ****************************************
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
-size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbol decoder */
+size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
const void* src, size_t srcSize)
{
U32 weightTotal;
- U32 tableLog;
const BYTE* ip = (const BYTE*) src;
size_t iSize = ip[0];
size_t oSize;
/* collect weight stats */
memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
weightTotal = 0;
- { U32 n; for (n=0; n<oSize; n++) {
- if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
- rankStats[huffWeight[n]]++;
- weightTotal += (1 << huffWeight[n]) >> 1;
- }}
+ { U32 n; for (n=0; n<oSize; n++) {
+ if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+ rankStats[huffWeight[n]]++;
+ weightTotal += (1 << huffWeight[n]) >> 1;
+ } }
/* get last non-null symbol weight (implied, total must be 2^n) */
- tableLog = BIT_highbit32(weightTotal) + 1;
- if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
- /* determine last weight */
- { U32 const total = 1 << tableLog;
- U32 const rest = total - weightTotal;
- U32 const verif = 1 << BIT_highbit32(rest);
- U32 const lastWeight = BIT_highbit32(rest) + 1;
- if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
- huffWeight[oSize] = (BYTE)lastWeight;
- rankStats[lastWeight]++;
- }
+ { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+ if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
+ *tableLogPtr = tableLog;
+ /* determine last weight */
+ { U32 const total = 1 << tableLog;
+ U32 const rest = total - weightTotal;
+ U32 const verif = 1 << BIT_highbit32(rest);
+ U32 const lastWeight = BIT_highbit32(rest) + 1;
+ if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
+ huffWeight[oSize] = (BYTE)lastWeight;
+ rankStats[lastWeight]++;
+ } }
/* check tree construction validity */
if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
/* results */
*nbSymbolsPtr = (U32)(oSize+1);
- *tableLogPtr = tableLog;
return iSize+1;
}
static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
{
- const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
memcpy(op, ds+val, sizeof(HUF_DSeqX6));
BIT_skipBits(DStream, dd[val].nbBits);
return dd[val].nbBytes;
}
-static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
+static U32 HUF_decodeLastSymbolsX6(void* op, U32 const maxL, BIT_DStream_t* DStream,
const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
{
- const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
- U32 length = dd[val].nbBytes;
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ U32 const length = dd[val].nbBytes;
if (length <= maxL) {
memcpy(op, ds+val, length);
BIT_skipBits(DStream, dd[val].nbBits);
return p-pStart;
}
-
size_t HUF_decompress1X6_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
-
- const U32 dtLog = DTable[0];
- size_t errorCode;
+ BIT_DStream_t bitD;
/* Init */
- BIT_DStream_t bitD;
- errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
- if (HUF_isError(errorCode)) return errorCode;
+ { size_t const errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
+ if (HUF_isError(errorCode)) return errorCode; }
/* finish bitStreams one by one */
- HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog);
+ { U32 const dtLog = DTable[0];
+ HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog); }
/* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
const BYTE* ip = (const BYTE*) cSrc;
- size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
+ size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize;
}
+#define HUF_DECODE_ROUNDX6 \
+ HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
+ HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
+ HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
+ HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
+ HUF_DECODE_SYMBOLX6_1(op1, &bitD1); \
+ HUF_DECODE_SYMBOLX6_1(op2, &bitD2); \
+ HUF_DECODE_SYMBOLX6_1(op3, &bitD3); \
+ HUF_DECODE_SYMBOLX6_1(op4, &bitD4); \
+ HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
+ HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
+ HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
+ HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
+ HUF_DECODE_SYMBOLX6_0(op1, &bitD1); \
+ HUF_DECODE_SYMBOLX6_0(op2, &bitD2); \
+ HUF_DECODE_SYMBOLX6_0(op3, &bitD3); \
+ HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
+
size_t HUF_decompress4X6_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
{
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+ if (dstSize < 64) return ERROR(dstSize_tooSmall); /* only work for dstSize >= 64 */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
- size_t errorCode;
/* Init */
BIT_DStream_t bitD1;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
- errorCode = BIT_initDStream(&bitD1, istart1, length1);
- if (HUF_isError(errorCode)) return errorCode;
- errorCode = BIT_initDStream(&bitD2, istart2, length2);
- if (HUF_isError(errorCode)) return errorCode;
- errorCode = BIT_initDStream(&bitD3, istart3, length3);
- if (HUF_isError(errorCode)) return errorCode;
- errorCode = BIT_initDStream(&bitD4, istart4, length4);
- if (HUF_isError(errorCode)) return errorCode;
-
- /* 16-64 symbols per loop (4-16 symbols per stream) */
+ { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
+ if (HUF_isError(errorCode)) return errorCode; }
+ { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
+ if (HUF_isError(errorCode)) return errorCode; }
+ { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
+ if (HUF_isError(errorCode)) return errorCode; }
+ { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
+ if (HUF_isError(errorCode)) return errorCode; }
+
+ /* 4-64 symbols per loop (1-16 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
- HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
- HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
- HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
- HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
- HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
- HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
- HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
- HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
- HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
- HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
- HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
- HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
- HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
- HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
- HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
- HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
-
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- }
+ if (endSignal==BIT_DStream_unfinished) {
+ HUF_DECODE_ROUNDX6;
+ if (sizeof(bitD1.bitContainer)==4) { /* need to decode at least 4 bytes per stream */
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ HUF_DECODE_ROUNDX6;
+ }
+ { U32 const saved2 = MEM_read32(opStart2); /* saved from overwrite */
+ U32 const saved3 = MEM_read32(opStart3);
+ U32 const saved4 = MEM_read32(opStart4);
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
+ HUF_DECODE_ROUNDX6;
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ }
+ MEM_write32(opStart2, saved2);
+ MEM_write32(opStart3, saved3);
+ MEM_write32(opStart4, saved4);
+ } }
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
- /* note : op4 supposed already verified within main loop */
+ /* note : op4 already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{
static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
- /* estimate decompression time */
- U32 Q;
- const U32 D256 = (U32)(dstSize >> 8);
- U32 Dtime[3];
- U32 algoNb = 0;
- int n;
+ U32 Dtime[3]; /* decompression time estimation */
/* validation checks */
if (dstSize == 0) return ERROR(dstSize_tooSmall);
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
/* decoder timing evaluation */
- Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
- for (n=0; n<3; n++)
- Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+ { U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
+ U32 const D256 = (U32)(dstSize >> 8);
+ U32 n; for (n=0; n<3; n++)
+ Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
+ }
Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
- if (Dtime[1] < Dtime[0]) algoNb = 1;
- //if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
-
- return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+ { U32 algoNb = 0;
+ if (Dtime[1] < Dtime[0]) algoNb = 1;
+ if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
+ return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+ }
//return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */
//return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */