From: Stella Lau Date: Wed, 19 Jul 2017 01:05:10 +0000 (-0700) Subject: Switch to using ZSTD_count instead of function pointer X-Git-Tag: v1.3.1^2~12^2~7^2~20 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1fa223859fb7f53d5a03c8b30dfd74d40898b05f;p=thirdparty%2Fzstd.git Switch to using ZSTD_count instead of function pointer --- diff --git a/contrib/long_distance_matching/basic_table.c b/contrib/long_distance_matching/basic_table.c index 8b3588e81..6c12b5087 100644 --- a/contrib/long_distance_matching/basic_table.c +++ b/contrib/long_distance_matching/basic_table.c @@ -36,14 +36,38 @@ LDM_hashEntry *HASH_getEntryFromHash( return getBucket(table, hash); } +static int isValidMatch(const BYTE *pIn, const BYTE *pMatch, + U32 minMatchLength, U32 maxWindowSize) { + U32 lengthLeft = minMatchLength; + const BYTE *curIn = pIn; + const BYTE *curMatch = pMatch; + + if (pIn - pMatch > maxWindowSize) { + return 0; + } + + for (; lengthLeft >= 4; lengthLeft -= 4) { + if (MEM_read32(curIn) != MEM_read32(curMatch)) { + return 0; + } + curIn += 4; + curMatch += 4; + } + return 1; +} + LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const hash_t hash, const U32 checksum, const BYTE *pIn, - int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) { + const BYTE *pEnd, + U32 minMatchLength, + U32 maxWindowSize) { LDM_hashEntry *entry = getBucket(table, hash); (void)checksum; - if ((*isValid)(pIn, entry->offset + table->offsetBase)) { + (void)pEnd; + if (isValidMatch(pIn, entry->offset + table->offsetBase, + minMatchLength, maxWindowSize)) { return entry; } return NULL; diff --git a/contrib/long_distance_matching/circular_buffer_table.c b/contrib/long_distance_matching/circular_buffer_table.c index bc7503f17..653d9e51b 100644 --- a/contrib/long_distance_matching/circular_buffer_table.c +++ b/contrib/long_distance_matching/circular_buffer_table.c @@ -9,11 +9,14 @@ // Number of elements per hash bucket. // HASH_BUCKET_SIZE_LOG defined in ldm.h -#define HASH_BUCKET_SIZE_LOG 0 // MAX is 4 for now +#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now #define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG)) +// TODO: rename. Number of hash buckets. #define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG) +//#define TMP_ZSTDTOGGLE + struct LDM_hashTable { U32 size; // Number of buckets U32 maxEntries; // Rename... @@ -39,20 +42,162 @@ static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) { return table->entries + (hash << HASH_BUCKET_SIZE_LOG); } +#ifdef TMP_ZSTDTOGGLE +static unsigned ZSTD_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + +// From lib/compress/zstd_compress.c +static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, + const BYTE *const pInLimit) { + const BYTE * const pStart = pIn; + const BYTE * const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn += sizeof(size_t); + pMatch += sizeof(size_t); + continue; + } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } + + if (MEM_64bits()) { + if ((pIn < (pInLimit - 3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { + pIn += 4; + pMatch += 4; + } + } + if ((pIn < (pInLimit - 1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { + pIn += 2; + pMatch += 2; + } + if ((pIn < pInLimit) && (*pMatch == *pIn)) { + pIn++; + } + return (size_t)(pIn - pStart); +} + +#else + +static int isValidMatch(const BYTE *pIn, const BYTE *pMatch, + U32 minMatchLength, U32 maxWindowSize) { + U32 lengthLeft = minMatchLength; + const BYTE *curIn = pIn; + const BYTE *curMatch = pMatch; + + if (pIn - pMatch > maxWindowSize) { + return 0; + } + + for (; lengthLeft >= 4; lengthLeft -= 4) { + if (MEM_read32(curIn) != MEM_read32(curMatch)) { + return 0; + } + curIn += 4; + curMatch += 4; + } + return 1; +} + +#endif // TMP_ZSTDTOGGLE + LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const hash_t hash, const U32 checksum, const BYTE *pIn, - int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) { + const BYTE *pEnd, + U32 minMatchLength, + U32 maxWindowSize) { LDM_hashEntry *bucket = getBucket(table, hash); LDM_hashEntry *cur = bucket; // TODO: in order of recency? for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) { // Check checksum for faster check. + const BYTE *pMatch = cur->offset + table->offsetBase; +#ifdef TMP_ZSTDTOGGLE + if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) { + U32 matchLength = ZSTD_count(pIn, pMatch, pEnd); + if (matchLength >= minMatchLength) { + return cur; + } + } +#else + (void)pEnd; + (void)minMatchLength; + (void)maxWindowSize; + if (cur->checksum == checksum && - (*isValid)(pIn, cur->offset + table->offsetBase)) { + isValidMatch(pIn, pMatch, minMatchLength, maxWindowSize)) { return cur; } +#endif } return NULL; } diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c index 4d8ca40bc..56b22d288 100644 --- a/contrib/long_distance_matching/ldm.c +++ b/contrib/long_distance_matching/ldm.c @@ -91,6 +91,7 @@ struct LDM_CCtx { hash_t lagHash; U32 lagSum; + U64 numHashInserts; // DEBUG const BYTE *DEBUG_setNextHash; }; @@ -164,7 +165,6 @@ void LDM_printCompressStats(const LDM_compressStats *stats) { } printf("\n"); printf("=====================\n"); - } int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) { @@ -376,7 +376,7 @@ void LDM_outputConfiguration(void) { printf("Min match, hash length: %d, %d\n", LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH); printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE); - printf("HASH_ONLY_EVERY: %d\n", HASH_ONLY_EVERY); + printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG); printf("LDM_LAG %d\n", LDM_LAG); printf("=====================\n"); } @@ -456,8 +456,10 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) { #ifdef HASH_CHECK entry = HASH_getEntryFromHash(cctx->hashTable, h, sum); #else - entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip, - &LDM_isValidMatch); + entry = HASH_getValidEntry(cctx->hashTable, h, sum, + cctx->ip, cctx->iend, + LDM_MIN_MATCH_LENGTH, + LDM_WINDOW_SIZE); #endif if (entry != NULL) { @@ -534,9 +536,10 @@ size_t LDM_compress(const void *src, size_t srcSize, LDM_CCtx cctx; const BYTE *match = NULL; // printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64); - printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG); +// printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG); LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize); + LDM_outputConfiguration(); /* Hash the first position and put it into the hash table. */ LDM_putHashOfCurrentPosition(&cctx); @@ -553,11 +556,10 @@ size_t LDM_compress(const void *src, size_t srcSize, * and encode the final literals. */ while (LDM_findBestMatch(&cctx, &match) == 0) { + U32 backwardsMatchLen = 0; #ifdef COMPUTE_STATS cctx.stats.numMatches++; #endif - -// printf("HERE %zu\n", cctx.ip - cctx.ibase); /** * Catch up: look back to extend the match backwards from the found match. */ @@ -565,6 +567,7 @@ size_t LDM_compress(const void *src, size_t srcSize, cctx.ip[-1] == match[-1]) { cctx.ip--; match--; + backwardsMatchLen++; } /** @@ -575,8 +578,9 @@ size_t LDM_compress(const void *src, size_t srcSize, const U32 literalLength = cctx.ip - cctx.anchor; const U32 offset = cctx.ip - match; const U32 matchLength = LDM_countMatchLength( - cctx.ip + LDM_MIN_MATCH_LENGTH, match + LDM_MIN_MATCH_LENGTH, - cctx.ihashLimit); + cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen, + match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen, + cctx.ihashLimit) + backwardsMatchLen; LDM_outputBlock(&cctx, literalLength, offset, matchLength); diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h index 2d4ff9cf2..735435e8d 100644 --- a/contrib/long_distance_matching/ldm.h +++ b/contrib/long_distance_matching/ldm.h @@ -12,18 +12,17 @@ // Defines the size of the hash table. // Currently this should be less than WINDOW_SIZE_LOG + 4? -#define LDM_MEMORY_USAGE 24 +#define LDM_MEMORY_USAGE 23 -//#define LDM_LAG (1 << 23) //#define LDM_LAG (1 << 20) -#define LDM_LAG 0 +#define LDM_LAG (0) #define LDM_WINDOW_SIZE_LOG 28 #define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG)) //These should be multiples of four (and perhaps set to the same values?). -#define LDM_MIN_MATCH_LENGTH 512 -#define LDM_HASH_LENGTH 512 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_LENGTH 64 typedef struct LDM_compressStats LDM_compressStats; typedef struct LDM_CCtx LDM_CCtx; diff --git a/contrib/long_distance_matching/ldm_hashtable.h b/contrib/long_distance_matching/ldm_hashtable.h index 4fef66214..7566751dc 100644 --- a/contrib/long_distance_matching/ldm_hashtable.h +++ b/contrib/long_distance_matching/ldm_hashtable.h @@ -40,7 +40,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const hash_t hash, const U32 checksum, const BYTE *pIn, - int (*isValid)(const BYTE *pIn, const BYTE *pMatch)); + const BYTE *pEnd, + U32 minMatchLength, + U32 maxWindowSize); hash_t HASH_hashU32(U32 value);