From: Stella Lau Date: Wed, 19 Jul 2017 21:14:26 +0000 (-0700) Subject: Experiment with integrating ZSTD_count with findBestMatch X-Git-Tag: v1.3.1^2~12^2~7^2~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=030264ca51814d3bef8debcedace65f56779f691;p=thirdparty%2Fzstd.git Experiment with integrating ZSTD_count with findBestMatch --- diff --git a/contrib/long_distance_matching/Makefile b/contrib/long_distance_matching/Makefile index 131638fdb..3aa3f8bd9 100644 --- a/contrib/long_distance_matching/Makefile +++ b/contrib/long_distance_matching/Makefile @@ -25,16 +25,20 @@ LDFLAGS += -lzstd default: all -all: main-basic main-circular-buffer +all: main-circular-buffer main-integrated -main-basic : basic_table.c ldm.c main-ldm.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ +#main-basic : basic_table.c ldm.c main-ldm.c +# $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ main-circular-buffer: circular_buffer_table.c ldm.c main-ldm.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ +main-integrated: ldm_with_table.c main-ldm.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ + + clean: @rm -f core *.o tmp* result* *.ldm *.ldm.dec \ - main-basic main-circular-buffer + main-basic main-circular-buffer main-integrated @echo Cleaning completed diff --git a/contrib/long_distance_matching/circular_buffer_table.c b/contrib/long_distance_matching/circular_buffer_table.c index 104d1b339..9b7ad088c 100644 --- a/contrib/long_distance_matching/circular_buffer_table.c +++ b/contrib/long_distance_matching/circular_buffer_table.c @@ -14,8 +14,8 @@ // TODO: rename. Number of hash buckets. #define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG) - -#define TMP_ZSTDTOGGLE +#define ZSTD_SKIP +//#define TMP_TST struct LDM_hashTable { U32 size; // Number of buckets @@ -25,15 +25,20 @@ struct LDM_hashTable { // Position corresponding to offset=0 in LDM_hashEntry. const BYTE *offsetBase; + U32 minMatchLength; + U32 maxWindowSize; }; -LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) { +LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase, + U32 minMatchLength, U32 maxWindowSize) { LDM_hashTable *table = malloc(sizeof(LDM_hashTable)); table->size = size >> HASH_BUCKET_SIZE_LOG; table->maxEntries = size; table->entries = calloc(size, sizeof(LDM_hashEntry)); table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE)); table->offsetBase = offsetBase; + table->minMatchLength = minMatchLength; + table->maxWindowSize = maxWindowSize; return table; } @@ -41,7 +46,7 @@ static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) { return table->entries + (hash << HASH_BUCKET_SIZE_LOG); } -#ifdef TMP_ZSTDTOGGLE +#if TMP_ZSTDTOGGLE static unsigned ZSTD_NbCommonBytes (register size_t val) { if (MEM_isLittleEndian()) { @@ -143,10 +148,85 @@ static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, return (size_t)(pIn - pStart); } +U32 countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor, + const BYTE *pMatch, const BYTE *pBase) { + U32 matchLength = 0; + while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, + const hash_t hash, + const U32 checksum, + const BYTE *pIn, + const BYTE *pEnd, + U32 *matchLength, + U32 *backwardsMatchLength, + const BYTE *pAnchor) { + LDM_hashEntry *bucket = getBucket(table, hash); + LDM_hashEntry *cur = bucket; + LDM_hashEntry *bestEntry = NULL; + U32 bestMatchLength = 0; + U32 forwardMatch = 0; + U32 backwardMatch = 0; +#ifdef TMP_TST + U32 numBetter = 0; +#endif + for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) { + // Check checksum for faster check. + const BYTE *pMatch = cur->offset + table->offsetBase; + if (cur->checksum == checksum && pIn - pMatch <= table->maxWindowSize) { + U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd); + U32 backwardMatchLength, totalMatchLength; + if (forwardMatchLength < table->minMatchLength) { + continue; + } + backwardMatchLength = + countBackwardsMatch(pIn, pAnchor, cur->offset + table->offsetBase, + table->offsetBase); + + totalMatchLength = forwardMatchLength + backwardMatchLength; + + if (totalMatchLength >= bestMatchLength) { + bestMatchLength = totalMatchLength; + forwardMatch = forwardMatchLength; + backwardMatch = backwardMatchLength; + bestEntry = cur; +#ifdef TMP_TST + numBetter++; +#endif + +#ifdef ZSTD_SKIP + *matchLength = forwardMatchLength; + *backwardsMatchLength = backwardMatchLength; + + return cur; +#endif +// *matchLength = forwardMatchLength; +// return cur; + } + } + } + if (bestEntry != NULL && bestMatchLength > table->minMatchLength) { +#ifdef TMP_TST + printf("Num better %u\n", numBetter - 1); +#endif + *matchLength = forwardMatch; + *backwardsMatchLength = backwardMatch; + return bestEntry; + } + return NULL; +} + #else static int isValidMatch(const BYTE *pIn, const BYTE *pMatch, U32 minMatchLength, U32 maxWindowSize) { + printf("HERE\n"); U32 lengthLeft = minMatchLength; const BYTE *curIn = pIn; const BYTE *curMatch = pMatch; @@ -165,44 +245,33 @@ static int isValidMatch(const BYTE *pIn, const BYTE *pMatch, return 1; } -#endif // TMP_ZSTDTOGGLE - +//TODO: clean up function call. This is not at all decoupled from LDM. LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const hash_t hash, const U32 checksum, const BYTE *pIn, const BYTE *pEnd, - U32 minMatchLength, - U32 maxWindowSize, - U32 *matchLength) { + U32 *matchLength, + U32 *backwardsMatchLength, + const BYTE *pAnchor) { LDM_hashEntry *bucket = getBucket(table, hash); LDM_hashEntry *cur = bucket; - // TODO: in order of recency? - for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) { + (void)matchLength; + (void)backwardsMatchLength; + (void)pAnchor; for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) { // Check checksum for faster check. const BYTE *pMatch = cur->offset + table->offsetBase; -#ifdef TMP_ZSTDTOGGLE - if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) { - U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd); - if (forwardMatchLength >= minMatchLength) { - *matchLength = forwardMatchLength; - return cur; - } - } -#else (void)pEnd; - (void)minMatchLength; - (void)maxWindowSize; if (cur->checksum == checksum && - isValidMatch(pIn, pMatch, minMatchLength, maxWindowSize)) { + isValidMatch(pIn, pMatch, table->minMatchLength, table->maxWindowSize)) { return cur; } -#endif } return NULL; } +#endif hash_t HASH_hashU32(U32 value) { return ((value * 2654435761U) >> (32 - LDM_HASHLOG)); } diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c index 1512ab8c5..a116af70a 100644 --- a/contrib/long_distance_matching/ldm.c +++ b/contrib/long_distance_matching/ldm.c @@ -24,8 +24,6 @@ #define OUTPUT_CONFIGURATION #define CHECKSUM_CHAR_OFFSET 10 -//#define LDM_LAG 0 - //#define HASH_CHECK //#define RUN_CHECKS //#define TMP_RECOMPUTE_LENGTHS @@ -410,7 +408,8 @@ void LDM_initializeCCtx(LDM_CCtx *cctx, cctx->anchor = cctx->ibase; memset(&(cctx->stats), 0, sizeof(cctx->stats)); - cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64, cctx->ibase); + cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64, cctx->ibase, + LDM_MIN_MATCH_LENGTH, LDM_WINDOW_SIZE); cctx->stats.minOffset = UINT_MAX; cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG; @@ -439,7 +438,7 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) { * matchLength contains the forward length of the match. */ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match, - U32 *matchLength) { + U32 *matchLength, U32 *backwardMatchLength) { LDM_hashEntry *entry = NULL; cctx->nextIp = cctx->ip + cctx->step; @@ -461,9 +460,8 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match, #else entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip, cctx->iend, - LDM_MIN_MATCH_LENGTH, - LDM_WINDOW_SIZE, - matchLength); + matchLength, backwardMatchLength, + cctx->anchor); #endif if (entry != NULL) { @@ -540,6 +538,7 @@ size_t LDM_compress(const void *src, size_t srcSize, LDM_CCtx cctx; const BYTE *match = NULL; U32 forwardMatchLength = 0; + U32 backwardsMatchLength = 0; LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize); LDM_outputConfiguration(); @@ -558,11 +557,14 @@ size_t LDM_compress(const void *src, size_t srcSize, * is less than the minimum match length), then stop searching for matches * and encode the final literals. */ - while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength) == 0) { - U32 backwardsMatchLen = 0; + while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength, + &backwardsMatchLength) == 0) { #ifdef COMPUTE_STATS cctx.stats.numMatches++; #endif + +#if TMP_RECOMPUTE_LENGTHS + backwardsMatchLength = 0; /** * Catch up: look back to extend the match backwards from the found match. */ @@ -570,8 +572,12 @@ size_t LDM_compress(const void *src, size_t srcSize, cctx.ip[-1] == match[-1]) { cctx.ip--; match--; - backwardsMatchLen++; + backwardsMatchLength++; } +#else + cctx.ip -= backwardsMatchLength; + match -= backwardsMatchLength; +#endif /** * Write current block (literals, literal length, match offset, match @@ -580,13 +586,14 @@ size_t LDM_compress(const void *src, size_t srcSize, { const U32 literalLength = cctx.ip - cctx.anchor; const U32 offset = cctx.ip - match; -#ifdef TMP_RECOMPUTE_LENGTHS +#if TMP_RECOMPUTE_LENGTHS const U32 matchLength = LDM_countMatchLength( - cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen, - match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen, - cctx.ihashLimit) + backwardsMatchLen; + cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLength, + match + LDM_MIN_MATCH_LENGTH + backwardsMatchLength, + cctx.ihashLimit) + backwardsMatchLength; #else - const U32 matchLength = forwardMatchLength + backwardsMatchLen - + const U32 matchLength = forwardMatchLength + + backwardsMatchLength - LDM_MIN_MATCH_LENGTH; #endif diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h index 735435e8d..2396227d2 100644 --- a/contrib/long_distance_matching/ldm.h +++ b/contrib/long_distance_matching/ldm.h @@ -20,9 +20,12 @@ #define LDM_WINDOW_SIZE_LOG 28 #define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG)) -//These should be multiples of four (and perhaps set to the same values?). -#define LDM_MIN_MATCH_LENGTH 64 -#define LDM_HASH_LENGTH 64 +//These should be multiples of four (and perhaps set to the same value?). +#define LDM_MIN_MATCH_LENGTH 1024 +#define LDM_HASH_LENGTH 1024 + +#define TMP_ZSTDTOGGLE 1 +#define TMP_RECOMPUTE_LENGTHS (!(TMP_ZSTDTOGGLE)) typedef struct LDM_compressStats LDM_compressStats; typedef struct LDM_CCtx LDM_CCtx; diff --git a/contrib/long_distance_matching/ldm_hashtable.h b/contrib/long_distance_matching/ldm_hashtable.h index 2ea159f71..51d825258 100644 --- a/contrib/long_distance_matching/ldm_hashtable.h +++ b/contrib/long_distance_matching/ldm_hashtable.h @@ -19,7 +19,8 @@ typedef struct LDM_hashTable LDM_hashTable; * LDM_hashEntry.offset is added to offsetBase to calculate pMatch in * HASH_getValidEntry. */ -LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase); +LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase, + U32 minMatchLength, U32 maxWindowSize); /** * Returns an LDM_hashEntry from the table that matches the checksum. @@ -41,9 +42,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const U32 checksum, const BYTE *pIn, const BYTE *pEnd, - const U32 minMatchLength, - const U32 maxWindowSize, - U32 *matchLength); + U32 *matchLength, + U32 *backwardsMatchLength, + const BYTE *pAnchor); hash_t HASH_hashU32(U32 value); diff --git a/contrib/long_distance_matching/ldm_with_table.c b/contrib/long_distance_matching/ldm_with_table.c new file mode 100644 index 000000000..68a33d0ff --- /dev/null +++ b/contrib/long_distance_matching/ldm_with_table.c @@ -0,0 +1,959 @@ +#include +#include +#include +#include +#include + +#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE)) +//#define LDM_HASH_ENTRY_SIZE 4 +#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2) +#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 4) + +// Insert every (HASH_ONLY_EVERY + 1) into the hash table. +#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE) - 4)) +#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1) + +/* Hash table stuff. */ +#define HASH_BUCKET_SIZE_LOG 3 // MAX is 4 for now +#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG)) +#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG) + +#define ML_BITS 4 +#define ML_MASK ((1U<size = size >> HASH_BUCKET_SIZE_LOG; + table->maxEntries = size; + table->entries = calloc(size, sizeof(LDM_hashEntry)); + table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE)); + return table; +} + +static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) { + return table->entries + (hash << HASH_BUCKET_SIZE_LOG); +} + + + +static unsigned ZSTD_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + +// From lib/compress/zstd_compress.c +static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, + const BYTE *const pInLimit) { + const BYTE * const pStart = pIn; + const BYTE * const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn += sizeof(size_t); + pMatch += sizeof(size_t); + continue; + } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } + + if (MEM_64bits()) { + if ((pIn < (pInLimit - 3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { + pIn += 4; + pMatch += 4; + } + } + if ((pIn < (pInLimit - 1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { + pIn += 2; + pMatch += 2; + } + if ((pIn < pInLimit) && (*pMatch == *pIn)) { + pIn++; + } + return (size_t)(pIn - pStart); +} + +U32 countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor, + const BYTE *pMatch, const BYTE *pBase) { + U32 matchLength = 0; + while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +LDM_hashEntry *HASH_getValidEntry(const LDM_CCtx *cctx, + const hash_t hash, + const U32 checksum, + U32 *matchLength, + U32 *backwardsMatchLength) { + LDM_hashTable *table = cctx->hashTable; + LDM_hashEntry *bucket = getBucket(table, hash); + LDM_hashEntry *cur = bucket; + LDM_hashEntry *bestEntry = NULL; + U32 bestMatchLength = 0; + for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) { + // Check checksum for faster check. + const BYTE *pMatch = cur->offset + cctx->ibase; + + if (cur->checksum == checksum && + cctx->ip - pMatch <= LDM_WINDOW_SIZE) { + U32 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend); + U32 backwardMatchLength, totalMatchLength; + + // For speed. + if (forwardMatchLength < LDM_MIN_MATCH_LENGTH) { + continue; + } + + backwardMatchLength = + countBackwardsMatch(cctx->ip, cctx->anchor, + cur->offset + cctx->ibase, + cctx->ibase); + + totalMatchLength = forwardMatchLength + backwardMatchLength; + + if (totalMatchLength >= bestMatchLength && + totalMatchLength >= LDM_MIN_MATCH_LENGTH) { + bestMatchLength = totalMatchLength; + *matchLength = forwardMatchLength; + *backwardsMatchLength = backwardMatchLength; + + bestEntry = cur; +#ifdef ZSTD_SKIP + return cur; +#endif + } + } + } + if (bestEntry != NULL && bestMatchLength > LDM_MIN_MATCH_LENGTH) { + return bestEntry; + } + return NULL; +} + +void HASH_insert(LDM_hashTable *table, + const hash_t hash, const LDM_hashEntry entry) { + *(getBucket(table, hash) + table->bucketOffsets[hash]) = entry; + table->bucketOffsets[hash]++; + table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1; +} + +U32 HASH_getSize(const LDM_hashTable *table) { + return table->size; +} + +void HASH_destroyTable(LDM_hashTable *table) { + free(table->entries); + free(table->bucketOffsets); + free(table); +} + +void HASH_outputTableOccupancy(const LDM_hashTable *table) { + U32 ctr = 0; + LDM_hashEntry *cur = table->entries; + LDM_hashEntry *end = table->entries + (table->size * HASH_BUCKET_SIZE); + for (; cur < end; ++cur) { + if (cur->offset == 0) { + ctr++; + } + } + + printf("Num buckets, bucket size: %d, %d\n", table->size, HASH_BUCKET_SIZE); + printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n", + table->maxEntries, ctr, + 100.0 * (double)(ctr) / table->maxEntries); +} + + +// TODO: This can be done more efficiently (but it is not that important as it +// is only used for computing stats). +static int intLog2(U32 x) { + int ret = 0; + while (x >>= 1) { + ret++; + } + return ret; +} + +// TODO: Maybe we would eventually prefer to have linear rather than +// exponential buckets. +/** +void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) { + U32 i = 0; + int buckets[32] = { 0 }; + + printf("\n"); + printf("Hash table histogram\n"); + for (; i < HASH_getSize(cctx->hashTable); i++) { + int offset = (cctx->ip - cctx->ibase) - + HASH_getEntryFromHash(cctx->hashTable, i)->offset; + buckets[intLog2(offset)]++; + } + + i = 0; + for (; i < 32; i++) { + printf("2^%*d: %10u %6.3f%%\n", 2, i, + buckets[i], + 100.0 * (double) buckets[i] / + (double) HASH_getSize(cctx->hashTable)); + } + printf("\n"); +} +*/ + +void LDM_printCompressStats(const LDM_compressStats *stats) { + int i = 0; + printf("=====================\n"); + printf("Compression statistics\n"); + //TODO: compute percentage matched? + printf("Window size, hash table size (bytes): 2^%u, 2^%u\n", + stats->windowSizeLog, stats->hashTableSizeLog); + printf("num matches, total match length, %% matched: %u, %llu, %.3f\n", + stats->numMatches, + stats->totalMatchLength, + 100.0 * (double)stats->totalMatchLength / + (double)(stats->totalMatchLength + stats->totalLiteralLength)); + printf("avg match length: %.1f\n", ((double)stats->totalMatchLength) / + (double)stats->numMatches); + printf("avg literal length, total literalLength: %.1f, %llu\n", + ((double)stats->totalLiteralLength) / (double)stats->numMatches, + stats->totalLiteralLength); + printf("avg offset length: %.1f\n", + ((double)stats->totalOffset) / (double)stats->numMatches); + printf("min offset, max offset: %u, %u\n", + stats->minOffset, stats->maxOffset); + + printf("\n"); + printf("offset histogram: offset, num matches, %% of matches\n"); + + for (; i <= intLog2(stats->maxOffset); i++) { + printf("2^%*d: %10u %6.3f%%\n", 2, i, + stats->offsetHistogram[i], + 100.0 * (double) stats->offsetHistogram[i] / + (double) stats->numMatches); + } + printf("\n"); + printf("=====================\n"); +} + +int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) { + U32 lengthLeft = LDM_MIN_MATCH_LENGTH; + const BYTE *curIn = pIn; + const BYTE *curMatch = pMatch; + + if (pIn - pMatch > LDM_WINDOW_SIZE) { + return 0; + } + + for (; lengthLeft >= 4; lengthLeft -= 4) { + if (MEM_read32(curIn) != MEM_read32(curMatch)) { + return 0; + } + curIn += 4; + curMatch += 4; + } + return 1; +} + +hash_t HASH_hashU32(U32 value) { + return ((value * 2654435761U) >> (32 - LDM_HASHLOG)); +} + +/** + * Convert a sum computed from getChecksum to a hash value in the range + * of the hash table. + */ +static hash_t checksumToHash(U32 sum) { + return HASH_hashU32(sum); +// return ((sum * 2654435761U) >> (32 - LDM_HASHLOG)); +} + +/** + * Computes a checksum based on rsync's checksum. + * + * a(k,l) = \sum_{i = k}^l x_i (mod M) + * b(k,l) = \sum_{i = k}^l ((l - i + 1) * x_i) (mod M) + * checksum(k,l) = a(k,l) + 2^{16} * b(k,l) + */ +static U32 getChecksum(const BYTE *buf, U32 len) { + U32 i; + U32 s1, s2; + + s1 = s2 = 0; + for (i = 0; i < (len - 4); i += 4) { + s2 += (4 * (s1 + buf[i])) + (3 * buf[i + 1]) + + (2 * buf[i + 2]) + (buf[i + 3]) + + (10 * CHECKSUM_CHAR_OFFSET); + s1 += buf[i] + buf[i + 1] + buf[i + 2] + buf[i + 3] + + + (4 * CHECKSUM_CHAR_OFFSET); + + } + for(; i < len; i++) { + s1 += buf[i] + CHECKSUM_CHAR_OFFSET; + s2 += s1; + } + return (s1 & 0xffff) + (s2 << 16); +} + +/** + * Update a checksum computed from getChecksum(data, len). + * + * The checksum can be updated along its ends as follows: + * a(k+1, l+1) = (a(k,l) - x_k + x_{l+1}) (mod M) + * b(k+1, l+1) = (b(k,l) - (l-k+1)*x_k + (a(k+1,l+1)) (mod M) + * + * Thus toRemove should correspond to data[0]. + */ +static U32 updateChecksum(U32 sum, U32 len, + BYTE toRemove, BYTE toAdd) { + U32 s1 = (sum & 0xffff) - toRemove + toAdd; + U32 s2 = (sum >> 16) - ((toRemove + CHECKSUM_CHAR_OFFSET) * len) + s1; + + return (s1 & 0xffff) + (s2 << 16); +} + +/** + * Update cctx->nextSum, cctx->nextHash, and cctx->nextPosHashed + * based on cctx->lastSum and cctx->lastPosHashed. + * + * This uses a rolling hash and requires that the last position hashed + * corresponds to cctx->nextIp - step. + */ +static void setNextHash(LDM_CCtx *cctx) { +#ifdef RUN_CHECKS + U32 check; + if ((cctx->nextIp - cctx->ibase != 1) && + (cctx->nextIp - cctx->DEBUG_setNextHash != 1)) { + printf("CHECK debug fail: %zu %zu\n", cctx->nextIp - cctx->ibase, + cctx->DEBUG_setNextHash - cctx->ibase); + } + + cctx->DEBUG_setNextHash = cctx->nextIp; +#endif + +// cctx->nextSum = getChecksum((const char *)cctx->nextIp, LDM_HASH_LENGTH); + cctx->nextSum = updateChecksum( + cctx->lastSum, LDM_HASH_LENGTH, + cctx->lastPosHashed[0], + cctx->lastPosHashed[LDM_HASH_LENGTH]); + cctx->nextPosHashed = cctx->nextIp; + cctx->nextHash = checksumToHash(cctx->nextSum); + +#if LDM_LAG +// printf("LDM_LAG %zu\n", cctx->ip - cctx->lagIp); + if (cctx->ip - cctx->ibase > LDM_LAG) { + cctx->lagSum = updateChecksum( + cctx->lagSum, LDM_HASH_LENGTH, + cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]); + cctx->lagIp++; + cctx->lagHash = checksumToHash(cctx->lagSum); + } +#endif + +#ifdef RUN_CHECKS + check = getChecksum(cctx->nextIp, LDM_HASH_LENGTH); + + if (check != cctx->nextSum) { + printf("CHECK: setNextHash failed %u %u\n", check, cctx->nextSum); + } + + if ((cctx->nextIp - cctx->lastPosHashed) != 1) { + printf("setNextHash: nextIp != lastPosHashed + 1. %zu %zu %zu\n", + cctx->nextIp - cctx->ibase, cctx->lastPosHashed - cctx->ibase, + cctx->ip - cctx->ibase); + } +#endif +} + +static void putHashOfCurrentPositionFromHash( + LDM_CCtx *cctx, hash_t hash, U32 sum) { + // Hash only every HASH_ONLY_EVERY times, based on cctx->ip. + // Note: this works only when cctx->step is 1. + if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) { + /** + const LDM_hashEntry entry = { cctx->ip - cctx->ibase , + MEM_read32(cctx->ip) }; + */ +#if LDM_LAG + // TODO: off by 1, but whatever + if (cctx->lagIp - cctx->ibase > 0) { + const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, cctx->lagSum }; + HASH_insert(cctx->hashTable, cctx->lagHash, entry); + } else { + const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum }; + HASH_insert(cctx->hashTable, hash, entry); + } +#else + const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum }; + HASH_insert(cctx->hashTable, hash, entry); +#endif + } + + cctx->lastPosHashed = cctx->ip; + cctx->lastHash = hash; + cctx->lastSum = sum; +} + +/** + * Copy over the cctx->lastHash, cctx->lastSum, and cctx->lastPosHashed + * fields from the "next" fields. + * + * This requires that cctx->ip == cctx->nextPosHashed. + */ +static void LDM_updateLastHashFromNextHash(LDM_CCtx *cctx) { +#ifdef RUN_CHECKS + if (cctx->ip != cctx->nextPosHashed) { + printf("CHECK failed: updateLastHashFromNextHash %zu\n", + cctx->ip - cctx->ibase); + } +#endif + putHashOfCurrentPositionFromHash(cctx, cctx->nextHash, cctx->nextSum); +} + +/** + * Insert hash of the current position into the hash table. + */ +static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) { + U32 sum = getChecksum(cctx->ip, LDM_HASH_LENGTH); + hash_t hash = checksumToHash(sum); + +#ifdef RUN_CHECKS + if (cctx->nextPosHashed != cctx->ip && (cctx->ip != cctx->ibase)) { + printf("CHECK failed: putHashOfCurrentPosition %zu\n", + cctx->ip - cctx->ibase); + } +#endif + + putHashOfCurrentPositionFromHash(cctx, hash, sum); +} + +U32 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch, + const BYTE *pInLimit) { + const BYTE * const pStart = pIn; + while (pIn < pInLimit - 1) { + BYTE const diff = (*pMatch) ^ *(pIn); + if (!diff) { + pIn++; + pMatch++; + continue; + } + return (U32)(pIn - pStart); + } + return (U32)(pIn - pStart); +} + +void LDM_outputConfiguration(void) { + printf("=====================\n"); + printf("Configuration\n"); + printf("Window size log: %d\n", LDM_WINDOW_SIZE_LOG); + printf("Min match, hash length: %d, %d\n", + LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH); + printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE); + printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG); + printf("LDM_LAG %d\n", LDM_LAG); + printf("=====================\n"); +} + +void LDM_readHeader(const void *src, U64 *compressedSize, + U64 *decompressedSize) { + const BYTE *ip = (const BYTE *)src; + *compressedSize = MEM_readLE64(ip); + ip += sizeof(U64); + *decompressedSize = MEM_readLE64(ip); + // ip += sizeof(U64); +} + +void LDM_initializeCCtx(LDM_CCtx *cctx, + const void *src, size_t srcSize, + void *dst, size_t maxDstSize) { + cctx->isize = srcSize; + cctx->maxOSize = maxDstSize; + + cctx->ibase = (const BYTE *)src; + cctx->ip = cctx->ibase; + cctx->iend = cctx->ibase + srcSize; + + cctx->ihashLimit = cctx->iend - LDM_HASH_LENGTH; + cctx->imatchLimit = cctx->iend - LDM_MIN_MATCH_LENGTH; + + cctx->obase = (BYTE *)dst; + cctx->op = (BYTE *)dst; + + cctx->anchor = cctx->ibase; + + memset(&(cctx->stats), 0, sizeof(cctx->stats)); + cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64); + + cctx->stats.minOffset = UINT_MAX; + cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG; + cctx->stats.hashTableSizeLog = LDM_MEMORY_USAGE; + + + cctx->lastPosHashed = NULL; + + cctx->step = 1; // Fixed to be 1 for now. Changing may break things. + cctx->nextIp = cctx->ip + cctx->step; + cctx->nextPosHashed = 0; + + cctx->DEBUG_setNextHash = 0; +} + +void LDM_destroyCCtx(LDM_CCtx *cctx) { + HASH_destroyTable(cctx->hashTable); +} + +/** + * Finds the "best" match. + * + * Returns 0 if successful and 1 otherwise (i.e. no match can be found + * in the remaining input that is long enough). + * + * matchLength contains the forward length of the match. + */ +static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match, + U32 *matchLength, U32 *backwardMatchLength) { + + LDM_hashEntry *entry = NULL; + cctx->nextIp = cctx->ip + cctx->step; + + while (entry == NULL) { + hash_t h; + U32 sum; + setNextHash(cctx); + h = cctx->nextHash; + sum = cctx->nextSum; + cctx->ip = cctx->nextIp; + cctx->nextIp += cctx->step; + + if (cctx->ip > cctx->imatchLimit) { + return 1; + } + + entry = HASH_getValidEntry(cctx, h, sum, + matchLength, backwardMatchLength); + + if (entry != NULL) { + *match = entry->offset + cctx->ibase; + } + putHashOfCurrentPositionFromHash(cctx, h, sum); + } + setNextHash(cctx); + return 0; +} + +void LDM_encodeLiteralLengthAndLiterals( + LDM_CCtx *cctx, BYTE *pToken, const U32 literalLength) { + /* Encode the literal length. */ + if (literalLength >= RUN_MASK) { + int len = (int)literalLength - RUN_MASK; + *pToken = (RUN_MASK << ML_BITS); + for (; len >= 255; len -= 255) { + *(cctx->op)++ = 255; + } + *(cctx->op)++ = (BYTE)len; + } else { + *pToken = (BYTE)(literalLength << ML_BITS); + } + + /* Encode the literals. */ + memcpy(cctx->op, cctx->anchor, literalLength); + cctx->op += literalLength; +} + +void LDM_outputBlock(LDM_CCtx *cctx, + const U32 literalLength, + const U32 offset, + const U32 matchLength) { + BYTE *pToken = cctx->op++; + + /* Encode the literal length and literals. */ + LDM_encodeLiteralLengthAndLiterals(cctx, pToken, literalLength); + + /* Encode the offset. */ + MEM_write32(cctx->op, offset); + cctx->op += LDM_OFFSET_SIZE; + + /* Encode the match length. */ + if (matchLength >= ML_MASK) { + unsigned matchLengthRemaining = matchLength; + *pToken += ML_MASK; + matchLengthRemaining -= ML_MASK; + MEM_write32(cctx->op, 0xFFFFFFFF); + while (matchLengthRemaining >= 4*0xFF) { + cctx->op += 4; + MEM_write32(cctx->op, 0xffffffff); + matchLengthRemaining -= 4*0xFF; + } + cctx->op += matchLengthRemaining / 255; + *(cctx->op)++ = (BYTE)(matchLengthRemaining % 255); + } else { + *pToken += (BYTE)(matchLength); + } +} + +// TODO: maxDstSize is unused. This function may seg fault when writing +// beyond the size of dst, as it does not check maxDstSize. Writing to +// a buffer and performing checks is a possible solution. +// +// This is based upon lz4. +size_t LDM_compress(const void *src, size_t srcSize, + void *dst, size_t maxDstSize) { + LDM_CCtx cctx; + const BYTE *match = NULL; + U32 forwardMatchLength = 0; + U32 backwardsMatchLength = 0; + + LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize); + LDM_outputConfiguration(); + + /* Hash the first position and put it into the hash table. */ + LDM_putHashOfCurrentPosition(&cctx); + +#if LDM_LAG + cctx.lagIp = cctx.ip; + cctx.lagHash = cctx.lastHash; + cctx.lagSum = cctx.lastSum; +#endif + /** + * Find a match. + * If no more matches can be found (i.e. the length of the remaining input + * is less than the minimum match length), then stop searching for matches + * and encode the final literals. + */ + while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength, + &backwardsMatchLength) == 0) { +#ifdef COMPUTE_STATS + cctx.stats.numMatches++; +#endif + + cctx.ip -= backwardsMatchLength; + match -= backwardsMatchLength; + + /** + * Write current block (literals, literal length, match offset, match + * length) and update pointers and hashes. + */ + { + const U32 literalLength = cctx.ip - cctx.anchor; + const U32 offset = cctx.ip - match; + const U32 matchLength = forwardMatchLength + + backwardsMatchLength - + LDM_MIN_MATCH_LENGTH; + + LDM_outputBlock(&cctx, literalLength, offset, matchLength); + +#ifdef COMPUTE_STATS + cctx.stats.totalLiteralLength += literalLength; + cctx.stats.totalOffset += offset; + cctx.stats.totalMatchLength += matchLength + LDM_MIN_MATCH_LENGTH; + cctx.stats.minOffset = + offset < cctx.stats.minOffset ? offset : cctx.stats.minOffset; + cctx.stats.maxOffset = + offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset; + cctx.stats.offsetHistogram[(U32)intLog2(offset)]++; +#endif + + // Move ip to end of block, inserting hashes at each position. + cctx.nextIp = cctx.ip + cctx.step; + while (cctx.ip < cctx.anchor + LDM_MIN_MATCH_LENGTH + + matchLength + literalLength) { + if (cctx.ip > cctx.lastPosHashed) { + // TODO: Simplify. + LDM_updateLastHashFromNextHash(&cctx); + setNextHash(&cctx); + } + cctx.ip++; + cctx.nextIp++; + } + } + + // Set start of next block to current input pointer. + cctx.anchor = cctx.ip; + LDM_updateLastHashFromNextHash(&cctx); + } + + // HASH_outputTableOffsetHistogram(&cctx); + + /* Encode the last literals (no more matches). */ + { + const U32 lastRun = cctx.iend - cctx.anchor; + BYTE *pToken = cctx.op++; + LDM_encodeLiteralLengthAndLiterals(&cctx, pToken, lastRun); + } + +#ifdef COMPUTE_STATS + LDM_printCompressStats(&cctx.stats); + HASH_outputTableOccupancy(cctx.hashTable); +#endif + + { + const size_t ret = cctx.op - cctx.obase; + LDM_destroyCCtx(&cctx); + return ret; + } +} + +struct LDM_DCtx { + size_t compressedSize; + size_t maxDecompressedSize; + + const BYTE *ibase; /* Base of input */ + const BYTE *ip; /* Current input position */ + const BYTE *iend; /* End of source */ + + const BYTE *obase; /* Base of output */ + BYTE *op; /* Current output position */ + const BYTE *oend; /* End of output */ +}; + +void LDM_initializeDCtx(LDM_DCtx *dctx, + const void *src, size_t compressedSize, + void *dst, size_t maxDecompressedSize) { + dctx->compressedSize = compressedSize; + dctx->maxDecompressedSize = maxDecompressedSize; + + dctx->ibase = src; + dctx->ip = (const BYTE *)src; + dctx->iend = dctx->ip + dctx->compressedSize; + dctx->op = dst; + dctx->oend = dctx->op + dctx->maxDecompressedSize; +} + +size_t LDM_decompress(const void *src, size_t compressedSize, + void *dst, size_t maxDecompressedSize) { + LDM_DCtx dctx; + LDM_initializeDCtx(&dctx, src, compressedSize, dst, maxDecompressedSize); + + while (dctx.ip < dctx.iend) { + BYTE *cpy; + const BYTE *match; + size_t length, offset; + + /* Get the literal length. */ + const unsigned token = *(dctx.ip)++; + if ((length = (token >> ML_BITS)) == RUN_MASK) { + unsigned s; + do { + s = *(dctx.ip)++; + length += s; + } while (s == 255); + } + + /* Copy the literals. */ + cpy = dctx.op + length; + memcpy(dctx.op, dctx.ip, length); + dctx.ip += length; + dctx.op = cpy; + + //TODO : dynamic offset size + offset = MEM_read32(dctx.ip); + dctx.ip += LDM_OFFSET_SIZE; + match = dctx.op - offset; + + /* Get the match length. */ + length = token & ML_MASK; + if (length == ML_MASK) { + unsigned s; + do { + s = *(dctx.ip)++; + length += s; + } while (s == 255); + } + length += LDM_MIN_MATCH_LENGTH; + + /* Copy match. */ + cpy = dctx.op + length; + + // Inefficient for now. + while (match < cpy - offset && dctx.op < dctx.oend) { + *(dctx.op)++ = *match++; + } + } + return dctx.op - (BYTE *)dst; +} + +// TODO: implement and test hash function +void LDM_test(void) { +} + +/* +void LDM_test(const void *src, size_t srcSize, + void *dst, size_t maxDstSize) { + const BYTE *ip = (const BYTE *)src + 1125; + U32 sum = getChecksum((const char *)ip, LDM_HASH_LENGTH); + U32 sum2; + ++ip; + for (; ip < (const BYTE *)src + 1125 + 100; ip++) { + sum2 = updateChecksum(sum, LDM_HASH_LENGTH, + ip[-1], ip[LDM_HASH_LENGTH - 1]); + sum = getChecksum((const char *)ip, LDM_HASH_LENGTH); + printf("TEST HASH: %zu %u %u\n", ip - (const BYTE *)src, sum, sum2); + } +} +*/ + +