From: Yann Collet Date: Thu, 28 Jan 2016 16:56:33 +0000 (+0100) Subject: minor compression speed improvement X-Git-Tag: v0.5.0~1^2~3^2~20 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=863ec40f1e163969a072453dbec9b1bdec5bce3f;p=thirdparty%2Fzstd.git minor compression speed improvement --- diff --git a/dictBuilder/dictBuilder.c b/dictBuilder/dictBuilder.c index b41259677..8bb100573 100644 --- a/dictBuilder/dictBuilder.c +++ b/dictBuilder/dictBuilder.c @@ -1,5 +1,5 @@ /* - dictBuilder.c + dictBuilder - dictionary builder for LZ algorithms Copyright (C) Yann Collet 2016 GPL v2 License @@ -20,7 +20,6 @@ You can contact the author at : - zstd source repository : https://github.com/Cyan4973/zstd - - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c */ /* ************************************** @@ -40,13 +39,8 @@ # define _LARGEFILE64_SOURCE #endif -/* S_ISREG & gettimeofday() are not supported by MSVC */ -#if defined(_MSC_VER) || defined(_WIN32) -# define BMK_LEGACY_TIMER 1 -#endif - -/* ************************************* +/*-************************************* * Includes ***************************************/ #include /* malloc, free */ @@ -75,7 +69,7 @@ #endif -/* ************************************* +/*-************************************* * Constants ***************************************/ #define KB *(1 <<10) @@ -93,8 +87,8 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t #define MINRATIO 4 -/* ************************************* -* console display +/*-************************************* +* Console display ***************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } @@ -121,7 +115,7 @@ void DiB_printHex(U32 dlevel, const void* ptr, size_t length) } -/* ************************************* +/*-************************************* * Exceptions ***************************************/ #ifndef DEBUG diff --git a/lib/fse.c b/lib/fse.c index 61272a624..1f382ed10 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -1052,8 +1052,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) /* Build Decoding Table */ DTableH->tableLog = (U16)nbBits; DTableH->fastMode = 1; - for (s=0; s<=maxSymbolValue; s++) - { + for (s=0; s<=maxSymbolValue; s++) { dinfo[s].newState = 0; dinfo[s].symbol = (BYTE)s; dinfo[s].nbBits = (BYTE)nbBits; @@ -1087,8 +1086,7 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic( #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) /* 4 symbols per loop */ - for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ @@ -1109,8 +1107,7 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic( /* tail */ /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ - while (1) - { + while (1) { if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) break; diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index dc7b57490..2a8df235d 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -1,6 +1,6 @@ /* Buffered version of Zstd compression library - Copyright (C) 2015, Yann Collet. + Copyright (C) 2015-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -127,16 +127,14 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic neededInBuffSize = (size_t)1 << params.windowLog; /* allocate buffers */ - if (zbc->inBuffSize < neededInBuffSize) - { + if (zbc->inBuffSize < neededInBuffSize) { zbc->inBuffSize = neededInBuffSize; free(zbc->inBuff); /* should not be necessary */ zbc->inBuff = (char*)malloc(neededInBuffSize); if (zbc->inBuff == NULL) return ERROR(memory_allocation); } zbc->blockSize = MIN(BLOCKSIZE, zbc->inBuffSize); - if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) - { + if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) { zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1; free(zbc->outBuff); /* should not be necessary */ zbc->outBuff = (char*)malloc(zbc->outBuffSize); @@ -188,8 +186,7 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc, char* op = ostart; char* const oend = ostart + *maxDstSizePtr; - while (notDone) - { + while (notDone) { switch(zbc->stage) { case ZBUFFcs_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ @@ -201,9 +198,9 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc, size_t loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip); zbc->inBuffPos += loaded; ip += loaded; - if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) - { notDone = 0; break; } /* not enough input to get a full block : stop there, wait for more */ - } + if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) { + notDone = 0; break; /* not enough input to get a full block : stop there, wait for more */ + } } /* compress current block (note : this stage cannot be stopped in the middle) */ { void* cDst; @@ -235,8 +232,7 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc, size_t flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush); op += flushed; zbc->outBuffFlushedSize += flushed; - if (toFlush!=flushed) - { notDone = 0; break; } /* not enough space within dst to store compressed block : stop there */ + if (toFlush!=flushed) { notDone = 0; break; } /* not enough space within dst to store compressed block : stop there */ zbc->outBuffContentSize = 0; zbc->outBuffFlushedSize = 0; zbc->stage = ZBUFFcs_load; @@ -259,7 +255,9 @@ static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc, size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr) -{ return ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, src, srcSizePtr, 0); } +{ + return ZBUFF_compressContinue_generic(zbc, dst, maxDstSizePtr, src, srcSizePtr, 0); +} @@ -387,11 +385,9 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt char* const oend = ostart + *maxDstSizePtr; U32 notDone = 1; - while (notDone) - { + while (notDone) { switch(zbc->stage) { - case ZBUFFds_init : return ERROR(init_missing); @@ -400,8 +396,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt { size_t headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr); if (ZSTD_isError(headerSize)) return headerSize; - if (headerSize) - { + if (headerSize) { /* not enough input to decode header : tell how many bytes would be necessary */ memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr); zbc->hPos += *srcSizePtr; @@ -423,8 +418,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt ip += headerSize; headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos); if (ZSTD_isError(headerSize)) return headerSize; - if (headerSize) - { + if (headerSize) { /* not enough input to decode header : tell how many bytes would be necessary */ *maxDstSizePtr = 0; return headerSize - zbc->hPos; @@ -437,23 +431,19 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt { size_t neededOutSize = (size_t)1 << zbc->params.windowLog; size_t neededInSize = BLOCKSIZE; /* a block is never > BLOCKSIZE */ - if (zbc->inBuffSize < neededInSize) - { + if (zbc->inBuffSize < neededInSize) { free(zbc->inBuff); zbc->inBuffSize = neededInSize; zbc->inBuff = (char*)malloc(neededInSize); if (zbc->inBuff == NULL) return ERROR(memory_allocation); } - if (zbc->outBuffSize < neededOutSize) - { + if (zbc->outBuffSize < neededOutSize) { free(zbc->outBuff); zbc->outBuffSize = neededOutSize; zbc->outBuff = (char*)malloc(neededOutSize); if (zbc->outBuff == NULL) return ERROR(memory_allocation); - } - } - if (zbc->hPos) - { + } } + if (zbc->hPos) { /* some data already loaded into headerBuffer : transfer into inBuff */ memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos); zbc->inPos = zbc->hPos; @@ -466,14 +456,12 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt case ZBUFFds_read: { size_t neededInSize = ZSTD_nextSrcSizeToDecompress(zbc->zc); - if (neededInSize==0) /* end of frame */ - { + if (neededInSize==0) { /* end of frame */ zbc->stage = ZBUFFds_init; notDone = 0; break; } - if ((size_t)(iend-ip) >= neededInSize) - { + if ((size_t)(iend-ip) >= neededInSize) { /* directly decode from src */ size_t decodedSize = ZSTD_decompressContinue(zbc->zc, zbc->outBuff + zbc->outStart, zbc->outBuffSize - zbc->outStart, @@ -509,16 +497,14 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt zbc->outEnd = zbc->outStart + decodedSize; zbc->stage = ZBUFFds_flush; // break; /* ZBUFFds_flush follows */ - } - } + } } case ZBUFFds_flush: { size_t toFlushSize = zbc->outEnd - zbc->outStart; size_t flushedSize = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outStart, toFlushSize); op += flushedSize; zbc->outStart += flushedSize; - if (flushedSize == toFlushSize) - { + if (flushedSize == toFlushSize) { zbc->stage = ZBUFFds_read; if (zbc->outStart + BLOCKSIZE > zbc->outBuffSize) zbc->outStart = zbc->outEnd = 0; @@ -529,8 +515,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt break; } default: return ERROR(GENERIC); /* impossible */ - } - } + } } *srcSizePtr = ip-istart; *maxDstSizePtr = op-ostart; @@ -545,11 +530,6 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDstSizePt - - - - - /* ************************************* * Tool functions ***************************************/ diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 2c67c8ae0..31eb15912 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1,6 +1,6 @@ /* ZSTD HC - High Compression Mode of Zstandard - Copyright (C) 2015, Yann Collet. + Copyright (C) 2015-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -181,29 +181,25 @@ void ZSTD_validateParams(ZSTD_parameters* params) static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, ZSTD_parameters params) -{ - /* note : params considered validated here */ +{ /* note : params considered validated here */ const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog); - /* reserve table memory */ - { - const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; - const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32); - const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize); - if (zc->workSpaceSize < neededSpace) { - free(zc->workSpace); - zc->workSpace = malloc(neededSpace); - if (zc->workSpace == NULL) return ERROR(memory_allocation); - zc->workSpaceSize = neededSpace; - } - memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ - zc->hashTable = (U32*)(zc->workSpace); - zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog); - zc->seqStore.buffer = zc->contentTable + ((size_t)1 << contentLog); - zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; - zc->flagStaticTables = 0; - zc->seqStore.buffer = (U32*)(zc->seqStore.buffer) + 256; + const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; + const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32); + const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize); + if (zc->workSpaceSize < neededSpace) { + free(zc->workSpace); + zc->workSpace = malloc(neededSpace); + if (zc->workSpace == NULL) return ERROR(memory_allocation); + zc->workSpaceSize = neededSpace; } + memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ + zc->hashTable = (U32*)(zc->workSpace); + zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog); + zc->seqStore.buffer = zc->contentTable + ((size_t)1 << contentLog); + zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; + zc->flagStaticTables = 0; + zc->seqStore.buffer = (U32*)(zc->seqStore.buffer) + 256; zc->nextToUpdate = 1; zc->nextSrc = NULL; @@ -256,7 +252,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) /* copy entropy tables */ dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; - if (dstCCtx->flagStaticTables) { + if (srcCCtx->flagStaticTables) { memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4); memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); @@ -267,7 +263,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) } -/** ZSTD_reduceIndex +/*! ZSTD_reduceIndex * rescale indexes to avoid future overflow (indexes are U32) */ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) @@ -284,7 +280,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, } -/* ******************************************************* +/*-******************************************************* * Block entropic compression *********************************************************/ @@ -553,7 +549,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, MEM_writeLE16(op, (U16)nbSeq); op+=2; seqHead = op; - /* dumps : contains too large lengths */ + /* dumps : contains rests of large lengths */ { size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; if (dumpsLength < 512) { @@ -773,10 +769,8 @@ static unsigned ZSTD_highbit(U32 val) static unsigned ZSTD_NbCommonBytes (register size_t val) { - if (MEM_isLittleEndian()) - { - if (MEM_64bits()) - { + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) unsigned long r = 0; _BitScanForward64( &r, (U64)val ); @@ -787,9 +781,7 @@ static unsigned ZSTD_NbCommonBytes (register size_t val) static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; # endif - } - else /* 32 bits */ - { + } else { /* 32 bits */ # if defined(_MSC_VER) unsigned long r=0; _BitScanForward( &r, (U32)val ); @@ -801,11 +793,8 @@ static unsigned ZSTD_NbCommonBytes (register size_t val) return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; # endif } - } - else /* Big Endian CPU */ - { - if (MEM_64bits()) - { + } else { /* Big Endian CPU */ + if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) unsigned long r = 0; _BitScanReverse64( &r, val ); @@ -820,9 +809,7 @@ static unsigned ZSTD_NbCommonBytes (register size_t val) r += (!val); return r; # endif - } - else /* 32 bits */ - { + } else { /* 32 bits */ # if defined(_MSC_VER) unsigned long r = 0; _BitScanReverse( &r, (unsigned long)val ); @@ -835,8 +822,7 @@ static unsigned ZSTD_NbCommonBytes (register size_t val) r += (!val); return r; # endif - } - } + } } } @@ -874,24 +860,23 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE -/* ************************************* +/*-************************************* * Hashes ***************************************/ - static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; } +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_read64(p), h); } static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_read64(p), h); } static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_read64(p), h); } static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) @@ -906,10 +891,10 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) } } + /* ************************************* * Fast Scan ***************************************/ - #define FILLHASHSTEP 3 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) {