From: Nick Terrell Date: Thu, 19 Jan 2023 00:30:10 +0000 (-0800) Subject: Replace Huffman boolean args with flags bit set X-Git-Tag: v1.5.4^2~40 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=329169189c248696c197ef9b88eb97c315bfb831;p=thirdparty%2Fzstd.git Replace Huffman boolean args with flags bit set --- diff --git a/lib/common/huf.h b/lib/common/huf.h index d085d7741..3d574bbec 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -78,15 +78,43 @@ typedef U32 HUF_DTable; * Advanced decompression functions ******************************************/ +/** + * Huffman flags bitset. + * For all flags, 0 is the default value. + */ +typedef enum { + /** + * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime. + * Otherwise: Ignored. + */ + HUF_flags_bmi2 = (1 << 0), + /** + * If set: Test possible table depths to find the one that produces the smallest header + encoded size. + * If unset: Use heuristic to find the table depth. + */ + HUF_flags_optimalDepth = (1 << 1), + /** + * If set: If the previous table can encode the input, always reuse the previous table. + * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output. + */ + HUF_flags_preferRepeat = (1 << 2), + /** + * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress. + * If unset: Always histogram the entire input. + */ + HUF_flags_suspectUncompressible = (1 << 3), + /** + * If set: Don't use assembly implementations + * If unset: Allow using assembly implementations + */ + HUF_flags_disableAsm = (1 << 4), +} HUF_flags_e; + /* **************************************** * HUF detailed API * ****************************************/ #define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra -typedef enum { - HUF_depth_fast, /** Use heuristic to find the table depth**/ - HUF_depth_optimal /** Test possible table depths to find the one that produces the smallest header + encoded size**/ - } HUF_depth_mode; /*! HUF_compress() does the following: * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") @@ -102,9 +130,9 @@ typedef enum { unsigned HUF_minTableLog(unsigned symbolCardinality); unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue); unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, - size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode); /* table is used as scratch space for building and testing tables, not a return value */ + size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); -size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); @@ -124,8 +152,7 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, - int suspectUncompressible, HUF_depth_mode depthMode); + HUF_CElt* hufTable, HUF_repeat* repeat, int flags); /** HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. @@ -157,7 +184,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, void* workspace, size_t wkspSize, - int bmi2); + int flags); /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ @@ -200,7 +227,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); /* single stream variants */ /* ====================== */ -size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); /** HUF_compress1X_repeat() : * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * If it uses hufTable it does not modify hufTable or repeat. @@ -211,28 +238,27 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, - int suspectUncompressible, HUF_depth_mode depthMode); + HUF_CElt* hufTable, HUF_repeat* repeat, int flags); -size_t HUF_decompress1X_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); #ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /**< double-symbols decoder */ #endif /* BMI2 variants. * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. */ -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); #ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); #endif -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); #ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); #endif #ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); #endif #endif /* HUF_H_298734234 */ diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index e04ccd2eb..29871877a 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1105,9 +1105,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, static size_t HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) + const HUF_CElt* CTable, const int flags) { - if (bmi2) { + if (flags & HUF_flags_bmi2) { return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); } return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); @@ -1118,23 +1118,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, static size_t HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) + const HUF_CElt* CTable, const int flags) { - (void)bmi2; + (void)flags; return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); } #endif -size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) { - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); } static size_t HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, const void* src, size_t srcSize, - const HUF_CElt* CTable, int bmi2) + const HUF_CElt* CTable, int flags) { size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ const BYTE* ip = (const BYTE*) src; @@ -1148,7 +1148,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, op += 6; /* jumpTable */ assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart, (U16)cSize); op += cSize; @@ -1156,7 +1156,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+2, (U16)cSize); op += cSize; @@ -1164,7 +1164,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); if (cSize == 0 || cSize > 65535) return 0; MEM_writeLE16(ostart+4, (U16)cSize); op += cSize; @@ -1173,7 +1173,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, ip += segmentSize; assert(op <= oend); assert(ip <= iend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) ); if (cSize == 0 || cSize > 65535) return 0; op += cSize; } @@ -1181,9 +1181,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, return (size_t)(op-ostart); } -size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) { - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); } typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; @@ -1191,11 +1191,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; static size_t HUF_compressCTable_internal( BYTE* const ostart, BYTE* op, BYTE* const oend, const void* src, size_t srcSize, - HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags) { size_t const cSize = (nbStreams==HUF_singleStream) ? - HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : - HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags); if (HUF_isError(cSize)) { return cSize; } if (cSize==0) { return 0; } /* uncompressible */ op += cSize; @@ -1243,12 +1243,12 @@ unsigned HUF_optimalTableLog( void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, - HUF_depth_mode depthMode) + int flags) { assert(srcSize > 1); /* Not supported, RLE should be used instead */ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables)); - if (depthMode != HUF_depth_optimal) { + if (!(flags & HUF_flags_optimalDepth)) { /* cheap evaluation, based on FSE */ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); } @@ -1300,8 +1300,7 @@ HUF_compress_internal (void* dst, size_t dstSize, unsigned maxSymbolValue, unsigned huffLog, HUF_nbStreams_e nbStreams, void* workSpace, size_t wkspSize, - HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, - const int bmi2, int suspectUncompressible, HUF_depth_mode depthMode) + HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags) { HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); BYTE* const ostart = (BYTE*)dst; @@ -1322,15 +1321,15 @@ HUF_compress_internal (void* dst, size_t dstSize, if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; /* Heuristic : If old table is valid, use it for small inputs */ - if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) { return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, - nbStreams, oldHufTable, bmi2); + nbStreams, oldHufTable, flags); } /* If uncompressible data is suspected, do a smaller sampling first */ DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); - if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { + if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { size_t largestTotal = 0; DEBUGLOG(5, "input suspected incompressible : sampling to check"); { unsigned maxSymbolValueBegin = maxSymbolValue; @@ -1358,14 +1357,14 @@ HUF_compress_internal (void* dst, size_t dstSize, *repeat = HUF_repeat_none; } /* Heuristic : use existing table for small inputs */ - if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) { return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, - nbStreams, oldHufTable, bmi2); + nbStreams, oldHufTable, flags); } /* Build Huffman Tree */ - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, depthMode); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags); { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, maxSymbolValue, huffLog, &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); @@ -1390,7 +1389,7 @@ HUF_compress_internal (void* dst, size_t dstSize, if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, - nbStreams, oldHufTable, bmi2); + nbStreams, oldHufTable, flags); } } /* Use the new huffman table */ @@ -1402,21 +1401,20 @@ HUF_compress_internal (void* dst, size_t dstSize, } return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, - nbStreams, table->CTable, bmi2); + nbStreams, table->CTable, flags); } size_t HUF_compress1X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, - int bmi2, int suspectUncompressible, HUF_depth_mode depthMode) + HUF_CElt* hufTable, HUF_repeat* repeat, int flags) { DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize); return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_singleStream, workSpace, wkspSize, hufTable, - repeat, preferRepeat, bmi2, suspectUncompressible, depthMode); + repeat, flags); } /* HUF_compress4X_repeat(): @@ -1427,12 +1425,11 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, - int suspectUncompressible, HUF_depth_mode depthMode) + HUF_CElt* hufTable, HUF_repeat* repeat, int flags) { DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize); return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, HUF_fourStreams, workSpace, wkspSize, - hufTable, repeat, preferRepeat, bmi2, suspectUncompressible, depthMode); + hufTable, repeat, flags); } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a0463581a..fbb051ccc 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3369,7 +3369,7 @@ ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, ZSTD_hufCTablesMetadata_t* hufMetadata, const int literalsCompressionIsDisabled, void* workspace, size_t wkspSize, - HUF_depth_mode depthMode) + int hufFlags) { BYTE* const wkspStart = (BYTE*)workspace; BYTE* const wkspEnd = wkspStart + wkspSize; @@ -3430,7 +3430,7 @@ ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, /* Build Huffman Tree */ ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, depthMode); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags); assert(huffLog <= LitHufLog); { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue, huffLog, @@ -3538,14 +3538,14 @@ size_t ZSTD_buildBlockEntropyStats( { size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD); - HUF_depth_mode const depthMode = huf_useOptDepth ? HUF_depth_optimal : HUF_depth_fast; + int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0; entropyMetadata->hufMetadata.hufDesSize = ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, &prevEntropy->huf, &nextEntropy->huf, &entropyMetadata->hufMetadata, ZSTD_literalsCompressionIsDisabled(cctxParams), - workspace, wkspSize, depthMode); + workspace, wkspSize, hufFlags); FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); entropyMetadata->fseMetadata.fseTablesSize = diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c index 0dbc2c4d4..bfd4f11ab 100644 --- a/lib/compress/zstd_compress_literals.c +++ b/lib/compress/zstd_compress_literals.c @@ -160,9 +160,13 @@ size_t ZSTD_compressLiterals ( RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); { HUF_repeat repeat = prevHuf->repeatMode; - int const preferRepeat = (strategy < ZSTD_lazy) ? srcSize <= 1024 : 0; - HUF_depth_mode const depthMode = (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD) ? HUF_depth_optimal : HUF_depth_fast; - typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int, int, int, HUF_depth_mode); + int const flags = 0 + | (bmi2 ? HUF_flags_bmi2 : 0) + | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) + | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) + | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); + + typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); huf_compress_f huf_compress; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; @@ -171,8 +175,7 @@ size_t ZSTD_compressLiterals ( HUF_SYMBOLVALUE_MAX, LitHufLog, entropyWorkspace, entropyWorkspaceSize, (HUF_CElt*)nextHuf->CTable, - &repeat, preferRepeat, - bmi2, suspectUncompressible, depthMode); + &repeat, flags); DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); if (repeat != HUF_repeat_none) { /* reused the existing table */ diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index f8624ee8e..638c4acbe 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -75,8 +75,9 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); } - { const size_t cSize = singleStream ? HUF_compress1X_usingCTable_bmi2(op, oend-op, literals, litSize, hufTable, bmi2) - : HUF_compress4X_usingCTable_bmi2(op, oend-op, literals, litSize, hufTable, bmi2); + { int const flags = bmi2 ? HUF_flags_bmi2 : 0; + const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags); op += cSize; cLitSize += cSize; if (cSize == 0 || ERR_isError(cSize)) { diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 260398962..400a4e44d 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -62,12 +62,6 @@ # define HUF_NEED_BMI2_FUNCTION 0 #endif -#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) -# define HUF_NEED_DEFAULT_FUNCTION 1 -#else -# define HUF_NEED_DEFAULT_FUNCTION 0 -#endif - /* ************************************************************** * Error Management ****************************************************************/ @@ -105,9 +99,9 @@ } \ \ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + size_t cSrcSize, HUF_DTable const* DTable, int flags) \ { \ - if (bmi2) { \ + if (flags & HUF_flags_bmi2) { \ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ } \ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ @@ -117,9 +111,9 @@ #define HUF_DGEN(fn) \ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + size_t cSrcSize, HUF_DTable const* DTable, int flags) \ { \ - (void)bmi2; \ + (void)flags; \ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ } @@ -335,7 +329,7 @@ typedef struct { BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; } HUF_ReadDTableX1_Workspace; -size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags) { U32 tableLog = 0; U32 nbSymbols = 0; @@ -350,7 +344,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags); if (HUF_isError(iSize)) return iSize; @@ -653,13 +647,11 @@ size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo } #endif -#if HUF_NEED_DEFAULT_FUNCTION static size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, HUF_DTable const* DTable) { return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); } -#endif #if ZSTD_ENABLE_ASM_X86_64_BMI2 @@ -725,39 +717,39 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, HUF_DGEN(HUF_decompress1X1_usingDTable_internal) static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) + size_t cSrcSize, HUF_DTable const* DTable, int flags) { #if DYNAMIC_BMI2 - if (bmi2) { + if (flags & HUF_flags_bmi2) { # if ZSTD_ENABLE_ASM_X86_64_BMI2 - return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); -# else - return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); + if (!(flags & HUF_flags_disableAsm)) + return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); # endif + return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); } #else - (void)bmi2; + (void)flags; #endif #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) - return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); -#else - return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); + if (!(flags & HUF_flags_disableAsm)) + return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); #endif + return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); } -static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, +static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) + void* workSpace, size_t wkspSize, int flags) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); } #endif /* HUF_FORCE_DECOMPRESS_X2 */ @@ -995,9 +987,9 @@ typedef struct { U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; } HUF_ReadDTableX2_Workspace; -size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, - void* workSpace, size_t wkspSize, int bmi2) + void* workSpace, size_t wkspSize, int flags) { U32 tableLog, maxW, nbSymbols; DTableDesc dtd = HUF_getDTableDesc(DTable); @@ -1019,7 +1011,7 @@ size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2); + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags); if (HUF_isError(iSize)) return iSize; /* check result */ @@ -1322,13 +1314,11 @@ size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo } #endif -#if HUF_NEED_DEFAULT_FUNCTION static size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, HUF_DTable const* DTable) { return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); } -#endif #if ZSTD_ENABLE_ASM_X86_64_BMI2 @@ -1385,57 +1375,57 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm( #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) + size_t cSrcSize, HUF_DTable const* DTable, int flags) { #if DYNAMIC_BMI2 - if (bmi2) { + if (flags & HUF_flags_bmi2) { # if ZSTD_ENABLE_ASM_X86_64_BMI2 - return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); -# else - return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); + if (!(flags & HUF_flags_disableAsm)) + return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); # endif + return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); } #else - (void)bmi2; + (void)flags; #endif #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) - return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); -#else - return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); + if (!(flags & HUF_flags_disableAsm)) + return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); #endif + return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); } HUF_DGEN(HUF_decompress1X2_usingDTable_internal) -size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* DCtx, void* dst, size_t dstSize, +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) + void* workSpace, size_t wkspSize, int flags) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX2_wksp_bmi2(DCtx, cSrc, cSrcSize, - workSpace, wkspSize, bmi2); + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize, flags); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, bmi2); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags); } -static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, +static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) + void* workSpace, size_t wkspSize, int flags) { const BYTE* ip = (const BYTE*) cSrc; - size_t hSize = HUF_readDTableX2_wksp_bmi2(dctx, cSrc, cSrcSize, - workSpace, wkspSize, bmi2); + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize, flags); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); } #endif /* HUF_FORCE_DECOMPRESS_X1 */ @@ -1499,9 +1489,9 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) #endif } -size_t HUF_decompress1X_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) + void* workSpace, size_t wkspSize, int flags) { /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); @@ -1513,72 +1503,72 @@ size_t HUF_decompress1X_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSi #if defined(HUF_FORCE_DECOMPRESS_X1) (void)algoNb; assert(algoNb == 0); - return HUF_decompress1X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize, bmi2); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize, flags); #elif defined(HUF_FORCE_DECOMPRESS_X2) (void)algoNb; assert(algoNb == 1); - return HUF_decompress1X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize, bmi2); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize, flags); #else - return algoNb ? HUF_decompress1X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize, bmi2): - HUF_decompress1X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize, bmi2); + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize, flags): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize, flags); #endif } } -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) { DTableDesc const dtd = HUF_getDTableDesc(DTable); #if defined(HUF_FORCE_DECOMPRESS_X1) (void)dtd; assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); #elif defined(HUF_FORCE_DECOMPRESS_X2) (void)dtd; assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); #else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); #endif } #ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); } #endif -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) { DTableDesc const dtd = HUF_getDTableDesc(DTable); #if defined(HUF_FORCE_DECOMPRESS_X1) (void)dtd; assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); #elif defined(HUF_FORCE_DECOMPRESS_X2) (void)dtd; assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); #else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); #endif } -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) { /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); @@ -1588,14 +1578,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds #if defined(HUF_FORCE_DECOMPRESS_X1) (void)algoNb; assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); #elif defined(HUF_FORCE_DECOMPRESS_X2) (void)algoNb; assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); #else - return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : - HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) : + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); #endif } } diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index f0e4f99d2..7e6ecd7df 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1435,13 +1435,13 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); #ifdef HUF_FORCE_DECOMPRESS_X1 /* in minimal huffman, we always use X1 variants */ - size_t const hSize = HUF_readDTableX1_wksp_bmi2(entropy->hufTable, + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, - workspace, workspaceSize, /* bmi2 */ 0); + workspace, workspaceSize, /* flags */ 0); #else - size_t const hSize = HUF_readDTableX2_wksp_bmi2(entropy->hufTable, + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, dictPtr, (size_t)(dictEnd - dictPtr), - workspace, workspaceSize, /* bmi2 */ 0); + workspace, workspaceSize, /* flags */ 0); #endif RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); dictPtr += hSize; diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 131114896..fbe4bf674 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -141,6 +141,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, U32 const lhc = MEM_readLE32(istart); size_t hufSuccess; size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); + int const flags = ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0; switch(lhlCode) { case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -180,33 +181,33 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, if (litEncType==set_repeat) { if (singleStream) { - hufSuccess = HUF_decompress1X_usingDTable_bmi2( + hufSuccess = HUF_decompress1X_usingDTable( dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); + dctx->HUFptr, flags); } else { assert(litSize >= MIN_LITERALS_FOR_4_STREAMS); - hufSuccess = HUF_decompress4X_usingDTable_bmi2( + hufSuccess = HUF_decompress4X_usingDTable( dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); + dctx->HUFptr, flags); } } else { if (singleStream) { #if defined(HUF_FORCE_DECOMPRESS_X2) - hufSuccess = HUF_decompress1X_DCtx_wksp_bmi2( + hufSuccess = HUF_decompress1X_DCtx_wksp( dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); + sizeof(dctx->workspace), flags); #else - hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + hufSuccess = HUF_decompress1X1_DCtx_wksp( dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); + sizeof(dctx->workspace), flags); #endif } else { - hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + hufSuccess = HUF_decompress4X_hufOnly_wksp( dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); + sizeof(dctx->workspace), flags); } } if (dctx->litBufferLocation == ZSTD_split) diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index 68c147196..148088a76 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -562,12 +562,12 @@ static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t con do { compressedSize = sizeFormat == 0 - ? HUF_compress1X_usingCTable_bmi2( + ? HUF_compress1X_usingCTable( op, opend - op, LITERAL_BUFFER, litSize, - frame->stats.hufTable, /* bmi2 */ 0) - : HUF_compress4X_usingCTable_bmi2( + frame->stats.hufTable, /* flags */ 0) + : HUF_compress4X_usingCTable( op, opend - op, LITERAL_BUFFER, litSize, - frame->stats.hufTable, /* bmi2 */ 0); + frame->stats.hufTable, /* flags */ 0); CHECKERR(compressedSize); /* this only occurs when it could not compress or similar */ } while (compressedSize <= 0); diff --git a/tests/fullbench.c b/tests/fullbench.c index 25c6d2b40..3a72d89d3 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -171,6 +171,7 @@ FORCE_NOINLINE size_t ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, void const* src size_t lhSize, litSize, litCSize; U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhc = MEM_readLE32(istart); + int const flags = ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0; switch(lhlCode) { case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -195,16 +196,16 @@ FORCE_NOINLINE size_t ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, void const* src RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); #ifndef HUF_FORCE_DECOMPRESS_X2 - return HUF_readDTableX1_wksp_bmi2( + return HUF_readDTableX1_wksp( dctx->entropy.hufTable, istart+lhSize, litCSize, dctx->workspace, sizeof(dctx->workspace), - ZSTD_DCtx_get_bmi2(dctx)); + flags); #else return HUF_readDTableX2_wksp( dctx->entropy.hufTable, istart+lhSize, litCSize, - dctx->workspace, sizeof(dctx->workspace)); + dctx->workspace, sizeof(dctx->workspace), flags); #endif } } diff --git a/tests/fuzz/huf_decompress.c b/tests/fuzz/huf_decompress.c index 25cc9bdb0..a9b3540ab 100644 --- a/tests/fuzz/huf_decompress.c +++ b/tests/fuzz/huf_decompress.c @@ -28,7 +28,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) /* Select random parameters: #streams, X1 or X2 decoding, bmi2 */ int const streams = FUZZ_dataProducer_int32Range(producer, 0, 1); int const symbols = FUZZ_dataProducer_int32Range(producer, 0, 1); - int const bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1); + int const flags = 0 + | (ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_bmi2 : 0) + | (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_optimalDepth : 0) + | (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_preferRepeat : 0) + | (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_suspectUncompressible : 0) + | (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableAsm : 0); /* Select a random cBufSize - it may be too small */ size_t const dBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 8 * size + 500); size_t const maxTableLog = FUZZ_dataProducer_uint32Range(producer, 1, HUF_TABLELOG_MAX); @@ -40,18 +45,18 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size = FUZZ_dataProducer_remainingBytes(producer); if (symbols == 0) { - size_t const err = HUF_readDTableX1_wksp_bmi2(dt, src, size, wksp, wkspSize, bmi2); + size_t const err = HUF_readDTableX1_wksp(dt, src, size, wksp, wkspSize, flags); if (ZSTD_isError(err)) goto _out; } else { - size_t const err = HUF_readDTableX2_wksp_bmi2(dt, src, size, wksp, wkspSize, bmi2); + size_t const err = HUF_readDTableX2_wksp(dt, src, size, wksp, wkspSize, flags); if (ZSTD_isError(err)) goto _out; } if (streams == 0) - HUF_decompress1X_usingDTable_bmi2(dBuf, dBufSize, src, size, dt, bmi2); + HUF_decompress1X_usingDTable(dBuf, dBufSize, src, size, dt, flags); else - HUF_decompress4X_usingDTable_bmi2(dBuf, dBufSize, src, size, dt, bmi2); + HUF_decompress4X_usingDTable(dBuf, dBufSize, src, size, dt, flags); _out: free(dt); diff --git a/tests/fuzz/huf_round_trip.c b/tests/fuzz/huf_round_trip.c index 19b3bea4f..9967c57bf 100644 --- a/tests/fuzz/huf_round_trip.c +++ b/tests/fuzz/huf_round_trip.c @@ -44,7 +44,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) /* Select random parameters: #streams, X1 or X2 decoding, bmi2 */ int const streams = FUZZ_dataProducer_int32Range(producer, 0, 1); int const symbols = FUZZ_dataProducer_int32Range(producer, 0, 1); - int const bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1); + int const flags = 0 + | (ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_bmi2 : 0) + | (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_optimalDepth : 0) + | (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_preferRepeat : 0) + | (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_suspectUncompressible : 0) + | (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableAsm : 0); /* Select a random cBufSize - it may be too small */ size_t const cBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 4 * size); /* Select a random tableLog - we'll adjust it up later */ @@ -81,9 +86,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) HUF_DTable* dt = (HUF_DTable*)FUZZ_malloc(HUF_DTABLE_SIZE(tableLog) * sizeof(HUF_DTable)); dt[0] = tableLog * 0x01000001; - HUF_depth_mode depthMode = rand() & 1 ? HUF_depth_fast : HUF_depth_optimal; - - tableLog = HUF_optimalTableLog(tableLog, size, maxSymbol, wksp, wkspSize, ct, count, depthMode); + tableLog = HUF_optimalTableLog(tableLog, size, maxSymbol, wksp, wkspSize, ct, count, flags); FUZZ_ASSERT(tableLog <= 12); tableLog = HUF_buildCTable_wksp(ct, count, maxSymbol, tableLog, wksp, wkspSize); FUZZ_ZASSERT(tableLog); @@ -94,11 +97,11 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) } FUZZ_ZASSERT(tableSize); if (symbols == 0) { - FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2)); + FUZZ_ZASSERT(HUF_readDTableX1_wksp(dt, cBuf, tableSize, wksp, wkspSize, flags)); } else { - size_t const ret = HUF_readDTableX2_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2); + size_t const ret = HUF_readDTableX2_wksp(dt, cBuf, tableSize, wksp, wkspSize, flags); if (ERR_getErrorCode(ret) == ZSTD_error_tableLog_tooLarge) { - FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2)); + FUZZ_ZASSERT(HUF_readDTableX1_wksp(dt, cBuf, tableSize, wksp, wkspSize, flags)); } else { FUZZ_ZASSERT(ret); } @@ -107,15 +110,15 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size_t cSize; size_t rSize; if (streams == 0) { - cSize = HUF_compress1X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2); + cSize = HUF_compress1X_usingCTable(cBuf, cBufSize, src, size, ct, flags); FUZZ_ZASSERT(cSize); if (cSize != 0) - rSize = HUF_decompress1X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2); + rSize = HUF_decompress1X_usingDTable(rBuf, size, cBuf, cSize, dt, flags); } else { - cSize = HUF_compress4X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2); + cSize = HUF_compress4X_usingCTable(cBuf, cBufSize, src, size, ct, flags); FUZZ_ZASSERT(cSize); if (cSize != 0) - rSize = HUF_decompress4X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2); + rSize = HUF_decompress4X_usingDTable(rBuf, size, cBuf, cSize, dt, flags); } if (cSize != 0) { FUZZ_ZASSERT(rSize);