From: Yann Collet Date: Fri, 21 Apr 2017 05:54:54 +0000 (-0700) Subject: made room in workspace for FSE tables X-Git-Tag: v1.2.0^2~26^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=71ddeb67b1998cc48424dcc524d7e1f9bf568fd6;p=thirdparty%2Fzstd.git made room in workspace for FSE tables still need to be transferred from CCtx into workspace --- diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6855297fd..4530cff9a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -27,10 +27,11 @@ static const U32 g_searchStrength = 8; /* control skip over incompressible dat #define HASH_READ_SIZE 8 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +/* entropy tables always have same size */ +static size_t const hufCTable_size = HUF_CTABLE_SIZE(255); +static size_t const litlengthCTable_size = FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL) * sizeof(FSE_CTable); static size_t const offcodeCTable_size = FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff) * sizeof(FSE_CTable); static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE_U32(MLFSELog, MaxML) * sizeof(FSE_CTable); -static size_t const litlengthCTable_size = FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL) * sizeof(FSE_CTable); - /*-************************************* @@ -94,9 +95,9 @@ struct ZSTD_CCtx_s { U32* hashTable; U32* hashTable3; U32* chainTable; - HUF_CElt* hufTable; - U32 flagStaticTables; - HUF_repeat flagStaticHufTable; + HUF_repeat hufCTable_repeatMode; + HUF_CElt* hufCTable; + U32 fseCTables_ready; FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; @@ -221,11 +222,13 @@ size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams) size_t const hSize = ((size_t)1) << cParams.hashLog; U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog); size_t const h3Size = ((size_t)1) << hashLog3; + size_t const entropySpace = hufCTable_size + litlengthCTable_size + + offcodeCTable_size + matchlengthCTable_size; size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<params)) { - zc->flagStaticTables = 0; - zc->flagStaticHufTable = HUF_repeat_none; + zc->fseCTables_ready = 0; + zc->hufCTable_repeatMode = HUF_repeat_none; return ZSTD_continueCCtx(zc, params, frameContentSize); } @@ -287,12 +290,12 @@ static size_t ZSTD_resetCCtx_internal (ZSTD_CCtx* zc, void* ptr; /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<workSpaceSize < neededSpace) { zc->workSpaceSize = 0; ZSTD_free(zc->workSpace, zc->customMem); @@ -302,8 +305,8 @@ static size_t ZSTD_resetCCtx_internal (ZSTD_CCtx* zc, ptr = zc->workSpace; /* entropy space */ - zc->hufTable = (HUF_CElt*)ptr; - ptr = (U32*)zc->hufTable + HUF_CTABLE_SIZE_U32(255); /* note : HUF_CElt* is incomplete type, size is estimated via macro */ + zc->hufCTable = (HUF_CElt*)ptr; + ptr = (char*)zc->hufCTable + hufCTable_size; /* note : HUF_CElt* is incomplete type, size is estimated via macro */ } } @@ -317,8 +320,8 @@ static size_t ZSTD_resetCCtx_internal (ZSTD_CCtx* zc, zc->stage = ZSTDcs_init; zc->dictID = 0; zc->loadedDictEnd = 0; - zc->flagStaticTables = 0; - zc->flagStaticHufTable = HUF_repeat_none; + zc->fseCTables_ready = 0; + zc->hufCTable_repeatMode = HUF_repeat_none; zc->nextToUpdate = 1; zc->nextSrc = NULL; zc->base = NULL; @@ -329,7 +332,7 @@ static size_t ZSTD_resetCCtx_internal (ZSTD_CCtx* zc, zc->hashLog3 = hashLog3; zc->seqStore.litLengthSum = 0; - ptr = (U32*)zc->hufTable + HUF_CTABLE_SIZE_U32(255); /* note : HUF_CElt* is incomplete type, size is estimated via macro */ + ptr = (char*)zc->hufCTable + hufCTable_size; /* note : HUF_CElt* is incomplete type, size is estimated via macro */ /* opt parser space */ if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) { @@ -413,15 +416,15 @@ size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, dstCCtx->dictID = srcCCtx->dictID; /* copy entropy tables */ - dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; - if (srcCCtx->flagStaticTables) { + dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready; + if (srcCCtx->fseCTables_ready) { memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size); memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size); memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size); } - dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable; - if (srcCCtx->flagStaticHufTable) { - memcpy(dstCCtx->hufTable, srcCCtx->hufTable, HUF_CTABLE_SIZE(255)); + dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode; + if (srcCCtx->hufCTable_repeatMode) { + memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size); } return 0; @@ -549,28 +552,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, /* small ? don't even attempt compression (speed opt) */ # define LITERAL_NOENTROPY 63 - { size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY; + { size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY; if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ - { HUF_repeat repeat = zc->flagStaticHufTable; + { HUF_repeat repeat = zc->hufCTable_repeatMode; int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat) + zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufCTable, &repeat, preferRepeat) : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat); + zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufCTable, &repeat, preferRepeat); if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */ - else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */ + else { zc->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */ } if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) { - zc->flagStaticHufTable = HUF_repeat_none; + zc->hufCTable_repeatMode = HUF_repeat_none; return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } if (cLitSize==1) { - zc->flagStaticHufTable = HUF_repeat_none; + zc->hufCTable_repeatMode = HUF_repeat_none; return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); } @@ -694,7 +697,7 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc, *op++ = llCodeTable[0]; FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); LLtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { LLtype = set_repeat; } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); @@ -718,7 +721,7 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc, *op++ = ofCodeTable[0]; FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); Offtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { Offtype = set_repeat; } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); @@ -742,7 +745,7 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc, *op++ = *mlCodeTable; FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); MLtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { MLtype = set_repeat; } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); @@ -760,7 +763,7 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc, } } *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - zc->flagStaticTables = 0; + zc->fseCTables_ready = 0; /* Encoding Sequences */ { BIT_CStream_t blockStream; @@ -839,7 +842,7 @@ _check_compressibility: { size_t const minGain = ZSTD_minGain(srcSize); size_t const maxCSize = srcSize - minGain; if ((size_t)(op-ostart) >= maxCSize) { - zc->flagStaticHufTable = HUF_repeat_none; + zc->hufCTable_repeatMode = HUF_repeat_none; return 0; } } @@ -2678,7 +2681,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); dictPtr += 4; - { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dictPtr, dictEnd-dictPtr); + { size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); dictPtr += hufHeaderSize; } @@ -2738,8 +2741,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); } } - cctx->flagStaticTables = 1; - cctx->flagStaticHufTable = HUF_repeat_valid; + cctx->fseCTables_ready = 1; + cctx->hufCTable_repeatMode = HUF_repeat_valid; return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize); } } diff --git a/lib/zstd.h b/lib/zstd.h index 5101fd26c..a99e497fc 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -71,48 +71,48 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< library version number; to * Simple API ***************************************/ /*! ZSTD_compress() : - Compresses `src` content as a single zstd compressed frame into already allocated `dst`. - Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. - @return : compressed size written into `dst` (<= `dstCapacity), - or an error code if it fails (which can be tested using ZSTD_isError()). */ + * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); /*! ZSTD_decompress() : - `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - `dstCapacity` is an upper bound of originalSize. - If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - or an errorCode if it fails (which can be tested using ZSTD_isError()). */ + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); /*! ZSTD_getDecompressedSize() : -* NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize. -* ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single -* frame, but distinguishes empty frames from frames with an unknown size, or errors. -* -* Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple -* concatenated frames in one buffer, and so is more general. -* As a result however, it requires more computation and entire frames to be passed to it, -* as opposed to ZSTD_getFrameContentSize which requires only a single frame's header. -* -* 'src' is the start of a zstd compressed frame. -* @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise. -* note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. -* When `return==0`, data to decompress could be any size. -* In which case, it's necessary to use streaming mode to decompress data. -* Optionally, application can still use ZSTD_decompress() while relying on implied limits. -* (For example, data may be necessarily cut into blocks <= 16 KB). -* note 2 : decompressed size is always present when compression is done with ZSTD_compress() -* note 3 : decompressed size can be very large (64-bits value), -* potentially larger than what local system can handle as a single memory segment. -* In which case, it's necessary to use streaming mode to decompress data. -* note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. -* Always ensure result fits within application's authorized limits. -* Each application can set its own limits. -* note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */ + * NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize. + * ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single + * frame, but distinguishes empty frames from frames with an unknown size, or errors. + * + * Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple + * concatenated frames in one buffer, and so is more general. + * As a result however, it requires more computation and entire frames to be passed to it, + * as opposed to ZSTD_getFrameContentSize which requires only a single frame's header. + * + * 'src' is the start of a zstd compressed frame. + * @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise. + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==0`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can still use ZSTD_decompress() while relying on implied limits. + * (For example, data may be necessarily cut into blocks <= 16 KB). + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */ ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); @@ -127,29 +127,29 @@ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readab * Explicit memory management ***************************************/ /*= Compression context -* When compressing many times, -* it is recommended to allocate a context just once, and re-use it for each successive compression operation. -* This will make workload friendlier for system's memory. -* Use one context per thread for parallel execution in multi-threaded environments. */ + * When compressing many times, + * it is recommended to allocate a context just once, and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution in multi-threaded environments. */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /*! ZSTD_compressCCtx() : - Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */ + * Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); /*= Decompression context -* When decompressing many times, -* it is recommended to allocate a context just once, and re-use it for each successive compression operation. -* This will make workload friendlier for system's memory. -* Use one context per thread for parallel execution in multi-threaded environments. */ + * When decompressing many times, + * it is recommended to allocate a context just once, and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution in multi-threaded environments. */ typedef struct ZSTD_DCtx_s ZSTD_DCtx; ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /*! ZSTD_decompressDCtx() : -* Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */ + * Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);