From: Yann Collet Date: Fri, 2 Dec 2016 00:13:35 +0000 (-0800) Subject: introduced ext_wksp variants of count to reduce stack memory usage X-Git-Tag: v1.1.2~51 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e928f7e16db5113e09902b822a2ce82e05624f11;p=thirdparty%2Fzstd.git introduced ext_wksp variants of count to reduce stack memory usage --- diff --git a/lib/common/fse.h b/lib/common/fse.h index 0588651e2..8b07d184d 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -294,8 +294,31 @@ If there is an error, the function will return an error code, which can be teste /* ***************************************** * FSE advanced API *******************************************/ +/* FSE_count_wksp() : + * Same as FSE_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned + */ +size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace); + +/** FSE_countFast() : + * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr + */ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); -/**< same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */ + +/* FSE_countFast_wksp() : + * Same as FSE_countFast(), but using an externally provided scratch buffer. + * `workSpace` must be a table of minimum `1024` unsigned + */ +size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); + +/*! FSE_count_simple + * Same as FSE_countFast(), but does not use any additional memory (not even on stack). + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). +*/ +size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + + unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); /**< same as FSE_optimalTableLog(), which used `minus==2` */ @@ -334,13 +357,9 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size *******************************************/ /*! This API consists of small unitary functions, which highly benefit from being inlined. - You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary. - Visual seems to do it automatically. - For gcc or clang, you'll need to add -flto flag at compilation and linking stages. - If none of these solutions is applicable, include "fse.c" directly. + Hence their body are included in next section. */ -typedef struct -{ +typedef struct { ptrdiff_t value; const void* stateTable; const void* symbolTT; @@ -400,8 +419,7 @@ If there is an error, it returns an errorCode (which can be tested using FSE_isE /* ***************************************** * FSE symbol decompression API *******************************************/ -typedef struct -{ +typedef struct { size_t state; const void* table; /* precise table may vary, depending on U16 */ } FSE_DState_t; diff --git a/lib/common/huf.h b/lib/common/huf.h index 29bab4b76..06568f08a 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -90,6 +90,11 @@ const char* HUF_getErrorName(size_t code); /**< provides error code string (us * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +/** HUF_compress4X_wksp() : +* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of <= 1024 unsigned */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, unsigned* workSpace); + + #ifdef HUF_STATIC_LINKING_ONLY @@ -208,12 +213,13 @@ size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* c /* single stream variants */ size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, unsigned* workSpace); /**< `workSpace` must be a table of at least 1024 unsigned */ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index e4b3dcaca..840a3fec5 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -308,14 +308,14 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized * Counting histogram ****************************************************************/ /*! FSE_count_simple - This function just counts byte values within `src`, - and store the histogram into table `count`. - This function is unsafe : it doesn't check that all values within `src` can fit into `count`. + This function counts byte values within `src`, and store the histogram into table `count`. + It doesn't use any additional memory. + But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. For this reason, prefer using a table `count` with 256 elements. @return : count of most numerous element */ -static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) +size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; const BYTE* const end = ip + srcSize; @@ -409,31 +409,41 @@ static size_t FSE_count_parallel_wksp( return (size_t)max; } - -static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - unsigned checkMax) +/* FSE_countFast_wksp() : + * Same as FSE_countFast(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned */ +size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace) { - U32 tmpCounters[1024]; - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMax, tmpCounters); + if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); } - /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize) { - if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); - return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0); + unsigned tmpCounters[1024]; + return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); } -size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize) +/* FSE_count_wksp() : + * Same as FSE_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned */ +size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace) { if (*maxSymbolValuePtr < 255) - return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1); + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); *maxSymbolValuePtr = 255; - return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize); + return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); +} + +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[1024]; + return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); } @@ -764,7 +774,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } -#define CHECK_E_F(e, f) size_t const e = f; if (FSE_isError(e)) return f +#define CHECK_E_F(e, f) size_t const e = f; if (ERR_isError(e)) return f #define CHECK_F(f) { CHECK_E_F(_var_err__, f); } /* FSE_compress_wksp() : @@ -773,9 +783,6 @@ size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } */ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) { - const BYTE* const istart = (const BYTE*) src; - const BYTE* ip = istart; - BYTE* const ostart = (BYTE*) dst; BYTE* op = ostart; BYTE* const oend = ostart + dstSize; @@ -794,14 +801,14 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; /* Scan input and build symbol stats */ - { CHECK_E_F(maxCount, FSE_count(count, &maxSymbolValue, ip, srcSize) ); + { CHECK_E_F(maxCount, FSE_count(count, &maxSymbolValue, src, srcSize) ); if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ } tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue) ); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); /* Write table description header */ { CHECK_E_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); @@ -810,7 +817,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src /* Compress */ CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); - { CHECK_E_F(cSize, FSE_compress_usingCTable(op, oend - op, ip, srcSize, CTable) ); + { CHECK_E_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); if (cSize == 0) return 0; /* not enough space for compressed data */ op += cSize; } diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 3a1c05110..ff2e82ae8 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -56,6 +56,8 @@ * Error Management ****************************************************************/ #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define CHECK_E_F(e, f) size_t const e = f; if (ERR_isError(e)) return f +#define CHECK_F(f) { CHECK_E_F(_var_err__, f); } /* ************************************************************** @@ -70,18 +72,60 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS /* ******************************************************* * HUF : Huffman block compression *********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 +size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + BYTE scratchBuffer[1< not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); + + /* Write table description header */ + { CHECK_E_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); + { CHECK_E_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return op-ostart; +} + + struct HUF_CElt_s { U16 val; BYTE nbBits; }; /* typedef'd to HUF_CElt within "huf.h" */ -typedef struct nodeElt_s { - U32 count; - U16 parent; - BYTE byte; - BYTE nbBits; -} nodeElt; - /*! HUF_writeCTable() : `CTable` : huffman tree to save, using huf representation. @return : size of saved CTable */ @@ -91,8 +135,6 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; BYTE* op = (BYTE*)dst; -#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 - FSE_CTable scratchBuffer[FSE_WKSP_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; U32 n; /* check conditions */ @@ -106,18 +148,17 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, huffWeight[n] = bitsToWeight[CTable[n].nbBits]; /* attempt weights compression by FSE */ - { size_t const size = FSE_compress_wksp(op+1, maxDstSize-1, huffWeight, maxSymbolValue, HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER, scratchBuffer, sizeof(scratchBuffer)); - if (FSE_isError(size)) return size; - if ((size>1) & (size < maxSymbolValue/2)) { /* FSE compressed */ - op[0] = (BYTE)size; - return size+1; + { CHECK_E_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); + if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; } } - /* raw values */ + /* write raw values as 4-bits (max : 15) */ if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); - huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */ + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ for (n=0; n HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); @@ -175,6 +213,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si } +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) { const U32 largestBits = huffNode[lastNonNull].nbBits; @@ -280,6 +325,9 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) } +/** HUF_buildCTable() : + * Note : count is used before tree is written, so they can safely overlap + */ #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) { @@ -420,32 +468,28 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si if (srcSize < 12) return 0; /* no saving possible : too small input */ op += 6; /* jumpTable */ - { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(cSize)) return cSize; + { CHECK_E_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); if (cSize==0) return 0; MEM_writeLE16(ostart, (U16)cSize); op += cSize; } ip += segmentSize; - { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(cSize)) return cSize; + { CHECK_E_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); if (cSize==0) return 0; MEM_writeLE16(ostart+2, (U16)cSize); op += cSize; } ip += segmentSize; - { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(cSize)) return cSize; + { CHECK_E_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); if (cSize==0) return 0; MEM_writeLE16(ostart+4, (U16)cSize); op += cSize; } ip += segmentSize; - { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable); - if (HUF_isError(cSize)) return cSize; + { CHECK_E_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) ); if (cSize==0) return 0; op += cSize; } @@ -454,18 +498,21 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si } +/* `workSpace` must a table of at least 1024 unsigned */ static size_t HUF_compress_internal ( void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, - unsigned singleStream) + unsigned singleStream, unsigned* workSpace) { BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - U32 count[HUF_SYMBOLVALUE_MAX+1]; - HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1]; + union { + U32 count[HUF_SYMBOLVALUE_MAX+1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1]; + } table; /* `count` can overlap with `CTable`; saves 1 KB */ /* checks & inits */ if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ @@ -476,30 +523,27 @@ static size_t HUF_compress_internal ( if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; /* Scan input and build symbol stats */ - { size_t const largest = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); - if (HUF_isError(largest)) return largest; + { CHECK_E_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace) ); if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ } /* Build Huffman Tree */ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); - if (HUF_isError(maxBits)) return maxBits; + { CHECK_E_F(maxBits, HUF_buildCTable (table.CTable, table.count, maxSymbolValue, huffLog) ); huffLog = (U32)maxBits; } /* Write table description header */ - { size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); - if (HUF_isError(hSize)) return hSize; + { CHECK_E_F(hSize, HUF_writeCTable (op, dstSize, table.CTable, maxSymbolValue, huffLog) ); if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ op += hSize; } /* Compress */ { size_t const cSize = (singleStream) ? - HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : /* single segment */ - HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); + HUF_compress1X_usingCTable(op, oend - op, src, srcSize, table.CTable) : /* single segment */ + HUF_compress4X_usingCTable(op, oend - op, src, srcSize, table.CTable); if (HUF_isError(cSize)) return cSize; if (cSize==0) return 0; /* uncompressible */ op += cSize; @@ -513,21 +557,36 @@ static size_t HUF_compress_internal ( } +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, unsigned* workSpace) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace); +} + size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog) { - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1); + unsigned workSpace[1024]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace); +} + +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, unsigned* workSpace) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace); } size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog) { - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0); + unsigned workSpace[1024]; + return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace); } - size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e93f6671f..853b0c3bf 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -82,6 +82,7 @@ struct ZSTD_CCtx_s FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + unsigned tmpCounters[1024]; }; ZSTD_CCtx* ZSTD_createCCtx(void) @@ -470,8 +471,8 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, singleStream = 1; cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); } else { - cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11) - : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11); + cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters) + : HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters); } if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) @@ -594,7 +595,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* CTable for Literal Lengths */ { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); + size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->tmpCounters); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { *op++ = llCodeTable[0]; FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); @@ -618,7 +619,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* CTable for Offsets */ { U32 max = MaxOff; - size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq); + size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->tmpCounters); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { *op++ = ofCodeTable[0]; FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); @@ -642,7 +643,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* CTable for MatchLengths */ { U32 max = MaxML; - size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); + size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->tmpCounters); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { *op++ = *mlCodeTable; FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);