From: Yann Collet Date: Mon, 25 Jan 2016 03:10:46 +0000 (+0100) Subject: stream control X-Git-Tag: v0.5.0~1^2~3^2~32 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=afe0709964757252dfaecb03776d697c54cf3669;p=thirdparty%2Fzstd.git stream control --- diff --git a/lib/huff0.c b/lib/huff0.c index 2544c9e69..9bc905a5b 100644 --- a/lib/huff0.c +++ b/lib/huff0.c @@ -473,10 +473,76 @@ static size_t HUF_compress_into4Segments(void* dst, size_t dstSize, const void* } +static size_t HUF_compress_internal ( + void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + unsigned singleStream) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 count[HUF_MAX_SYMBOL_VALUE+1]; + HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1]; + size_t errorCode; + + /* checks & inits */ + if (srcSize < 2) return 0; /* Uncompressed */ + if (dstSize < 1) return 0; /* not compressible within dst budget */ + if (srcSize > 128 * 1024) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE; + if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG; + + /* Scan input and build symbol stats */ + errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } + if (errorCode <= (srcSize >> 7)+1) return 0; /* Heuristic : not compressible enough */ + + /* Build Huffman Tree */ + errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); + if (HUF_isError(errorCode)) return errorCode; + huffLog = (U32)errorCode; + + /* Write table description header */ + errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode + 12 >= srcSize) return 0; /* not useful to try compression */ + op += errorCode; + + /* Compress */ + if (singleStream) + errorCode = HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable); /* single segment */ + else + errorCode = HUF_compress_into4Segments(op, oend - op, src, srcSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + op += errorCode; + + /* check compressibility */ + if ((size_t)(op-ostart) >= srcSize-1) + return 0; + + return op-ostart; +} + + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1); +} + size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog) { +#if 1 + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0); +#else BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; BYTE* const oend = ostart + dstSize; @@ -522,8 +588,10 @@ size_t HUF_compress2 (void* dst, size_t dstSize, return 0; return op-ostart; +#endif } + size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG); @@ -620,9 +688,9 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, } -/* ************************/ -/* single-symbol decoding */ -/* ************************/ +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) { diff --git a/lib/huff0_static.h b/lib/huff0_static.h index 28ae92ab2..bb39496b1 100644 --- a/lib/huff0_static.h +++ b/lib/huff0_static.h @@ -111,14 +111,19 @@ size_t HUF_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize size_t HUF_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX6 (unsigned* DTable, const void* src, size_t srcSize); -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ -size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbol decoder */ - size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable); size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable); size_t HUF_decompress4X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable); + +/* single stream variants */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbol decoder */ + size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable); size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable); size_t HUF_decompress1X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable); diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 7cf03ead7..626000934 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -55,7 +55,7 @@ #include /* memset */ #include "mem.h" #include "fse_static.h" -#include "huff0.h" +#include "huff0_static.h" #include "zstd_static.h" #include "zstd_internal.h" @@ -259,10 +259,14 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, Note : delta map ? => compressed ? 1.1.1) Huff0-compressed literal block : 3-5 bytes + srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream srcSize < 1 KB => 3 bytes (2-2-10-10) srcSize < 17KB => 4 bytes (2-2-14-14) else => 5 bytes (2-2-18-18) big endian convention + Note : 1 or 4 streams ? => controlled by zstd => requires 1 bit => reserved to < 1 KB + 1 stream : orig size ? (note : not required ) + if not : get orig size from decoding, & saves 10 bits 1.1.2) Raw (uncompressed) literal block header : 1-3 bytes size : 5 bits: (IS_RAW<<6) + (0<<4) + size @@ -281,8 +285,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, size&255 1.1.4) Unused - Use Huff0 w/ precalculated DTable ? - FSE ? => probably not, not efficient on literals + Use Huff0 w/ precalculated CTable ? + Store CTable into workspace : build during dict Loading, use during encoding + Same issue about size and Nstreams 1.2) Literal block content @@ -382,11 +387,13 @@ static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, const size_t minGain = ZSTD_minGain(srcSize); BYTE* const ostart = (BYTE*)dst; size_t lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + U32 singleStream = srcSize < 256; size_t clitSize; if (maxDstSize < 4) return ERROR(dstSize_tooSmall); /* not enough space for compression */ - clitSize = HUF_compress(ostart+lhSize, maxDstSize-lhSize, src, srcSize); + clitSize = singleStream ? HUF_compress1X(ostart+lhSize, maxDstSize-lhSize, src, srcSize, 255, 12) + : HUF_compress2 (ostart+lhSize, maxDstSize-lhSize, src, srcSize, 255, 12); if ((clitSize==0) || (clitSize >= srcSize - minGain)) return ZSTD_noCompressLiterals(dst, maxDstSize, src, srcSize); if (clitSize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize); @@ -395,7 +402,7 @@ static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, switch(lhSize) { case 3: /* 2 - 2 - 10 - 10 */ - ostart[0] = (BYTE) (srcSize>>6) + (0<< 4); + ostart[0] = (BYTE) (srcSize>>6) + (singleStream << 4); ostart[1] = (BYTE)((srcSize<<2) + (clitSize>>8)); ostart[2] = (BYTE)(clitSize); break; @@ -419,7 +426,7 @@ static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize, } -#define LITERAL_NOENTROPY 63 /* cheap heuristic */ +#define LITERAL_NOENTROPY 63 /* don't even attempt to compress literals below this threshold (cheap heuristic) */ size_t ZSTD_compressSequences(void* dst, size_t maxDstSize, const seqStore_t* seqStorePtr, diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 6dc6f9a7d..88ec78e6c 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -61,7 +61,7 @@ #include "zstd_static.h" #include "zstd_internal.h" #include "fse_static.h" -#include "huff0.h" +#include "huff0_static.h" #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) # include "zstd_legacy.h" @@ -297,13 +297,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, { case IS_HUF: { - size_t litSize, litCSize; + size_t litSize, litCSize, singleStream=0; U32 lhSize = ((istart[0]) >> 4) & 3; switch(lhSize) { case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ /* 2 - 2 - 10 - 10 */ lhSize=3; + singleStream = istart[0] & 16; litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); litCSize = ((istart[1] & 3) << 8) + istart[2]; break; @@ -321,7 +322,9 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } - if (HUF_isError( HUF_decompress(dctx->litBuffer, litSize, istart+lhSize, litCSize) )) + if (HUF_isError(singleStream ? + HUF_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) : + HUF_decompress (dctx->litBuffer, litSize, istart+lhSize, litCSize) )) return ERROR(corruption_detected); dctx->litPtr = dctx->litBuffer;