From: Stella Lau Date: Wed, 19 Jul 2017 23:56:28 +0000 (-0700) Subject: Minor refactoring X-Git-Tag: v1.3.1^2~12^2~7^2~17 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2427a154cb6a5af622bdbe679f4b4c5b906b4821;p=thirdparty%2Fzstd.git Minor refactoring --- diff --git a/contrib/long_distance_matching/basic_table.c b/contrib/long_distance_matching/basic_table.c deleted file mode 100644 index 30c548d2a..000000000 --- a/contrib/long_distance_matching/basic_table.c +++ /dev/null @@ -1,109 +0,0 @@ -#include -#include - -#include "ldm.h" -#include "ldm_hashtable.h" -#include "mem.h" - -#define LDM_HASHLOG ((LDM_MEMORY_USAGE) - 4) - -struct LDM_hashTable { - U32 size; - LDM_hashEntry *entries; - const BYTE *offsetBase; -}; - -LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) { - LDM_hashTable *table = malloc(sizeof(LDM_hashTable)); - table->size = size; - table->entries = calloc(size, sizeof(LDM_hashEntry)); - table->offsetBase = offsetBase; - return table; -} - -void HASH_initializeTable(LDM_hashTable *table, U32 size) { - table->size = size; - table->entries = calloc(size, sizeof(LDM_hashEntry)); -} - -LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) { - return table->entries + hash; -} - -LDM_hashEntry *HASH_getEntryFromHash( - const LDM_hashTable *table, const hash_t hash, const U32 checksum) { - (void)checksum; - return getBucket(table, hash); -} - -static int isValidMatch(const BYTE *pIn, const BYTE *pMatch, - U32 minMatchLength, U32 maxWindowSize) { - U32 lengthLeft = minMatchLength; - const BYTE *curIn = pIn; - const BYTE *curMatch = pMatch; - - if (pIn - pMatch > maxWindowSize) { - return 0; - } - - for (; lengthLeft >= 4; lengthLeft -= 4) { - if (MEM_read32(curIn) != MEM_read32(curMatch)) { - return 0; - } - curIn += 4; - curMatch += 4; - } - return 1; -} - -LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, - const hash_t hash, - const U32 checksum, - const BYTE *pIn, - const BYTE *pEnd, - U32 minMatchLength, - U32 maxWindowSize, - U32 *matchLength) { - LDM_hashEntry *entry = getBucket(table, hash); - (void)checksum; - (void)pEnd; - (void)matchLength; - // TODO: Count the entire forward match length rather than check if valid. - if (isValidMatch(pIn, entry->offset + table->offsetBase, - minMatchLength, maxWindowSize)) { - - return entry; - } - return NULL; -} - -hash_t HASH_hashU32(U32 value) { - return ((value * 2654435761U) >> (32 - LDM_HASHLOG)); -} - -void HASH_insert(LDM_hashTable *table, - const hash_t hash, const LDM_hashEntry entry) { - *getBucket(table, hash) = entry; -} - -U32 HASH_getSize(const LDM_hashTable *table) { - return table->size; -} - -void HASH_destroyTable(LDM_hashTable *table) { - free(table->entries); - free(table); -} - -void HASH_outputTableOccupancy(const LDM_hashTable *hashTable) { - U32 i = 0; - U32 ctr = 0; - for (; i < HASH_getSize(hashTable); i++) { - if (getBucket(hashTable, i)->offset == 0) { - ctr++; - } - } - printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n", - HASH_getSize(hashTable), ctr, - 100.0 * (double)(ctr) / (double)HASH_getSize(hashTable)); -} diff --git a/contrib/long_distance_matching/circular_buffer_table.c b/contrib/long_distance_matching/circular_buffer_table.c index 9b7ad088c..9429fbcde 100644 --- a/contrib/long_distance_matching/circular_buffer_table.c +++ b/contrib/long_distance_matching/circular_buffer_table.c @@ -5,22 +5,19 @@ #include "ldm_hashtable.h" #include "mem.h" -//TODO: move def somewhere else. // Number of elements per hash bucket. // HASH_BUCKET_SIZE_LOG defined in ldm.h -#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now #define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG)) // TODO: rename. Number of hash buckets. #define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG) -#define ZSTD_SKIP -//#define TMP_TST +//#define ZSTD_SKIP struct LDM_hashTable { - U32 size; // Number of buckets - U32 maxEntries; // Rename... - LDM_hashEntry *entries; // 1-D array for now. + U32 numBuckets; + U32 numEntries; + LDM_hashEntry *entries; BYTE *bucketOffsets; // Pointer to current insert position. // Position corresponding to offset=0 in LDM_hashEntry. @@ -32,8 +29,8 @@ struct LDM_hashTable { LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase, U32 minMatchLength, U32 maxWindowSize) { LDM_hashTable *table = malloc(sizeof(LDM_hashTable)); - table->size = size >> HASH_BUCKET_SIZE_LOG; - table->maxEntries = size; + table->numBuckets = size >> HASH_BUCKET_SIZE_LOG; + table->numEntries = size; table->entries = calloc(size, sizeof(LDM_hashEntry)); table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE)); table->offsetBase = offsetBase; @@ -46,7 +43,6 @@ static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) { return table->entries + (hash << HASH_BUCKET_SIZE_LOG); } -#if TMP_ZSTDTOGGLE static unsigned ZSTD_NbCommonBytes (register size_t val) { if (MEM_isLittleEndian()) { @@ -159,26 +155,22 @@ U32 countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor, return matchLength; } -LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, - const hash_t hash, - const U32 checksum, - const BYTE *pIn, - const BYTE *pEnd, - U32 *matchLength, - U32 *backwardsMatchLength, - const BYTE *pAnchor) { +LDM_hashEntry *HASH_getBestEntry(const LDM_hashTable *table, + const hash_t hash, + const U32 checksum, + const BYTE *pIn, + const BYTE *pEnd, + const BYTE *pAnchor, + U32 *pForwardMatchLength, + U32 *pBackwardMatchLength) { LDM_hashEntry *bucket = getBucket(table, hash); LDM_hashEntry *cur = bucket; LDM_hashEntry *bestEntry = NULL; U32 bestMatchLength = 0; - U32 forwardMatch = 0; - U32 backwardMatch = 0; -#ifdef TMP_TST - U32 numBetter = 0; -#endif for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) { - // Check checksum for faster check. const BYTE *pMatch = cur->offset + table->offsetBase; + + // Check checksum for faster check. if (cur->checksum == checksum && pIn - pMatch <= table->maxWindowSize) { U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd); U32 backwardMatchLength, totalMatchLength; @@ -193,105 +185,27 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, if (totalMatchLength >= bestMatchLength) { bestMatchLength = totalMatchLength; - forwardMatch = forwardMatchLength; - backwardMatch = backwardMatchLength; + *pForwardMatchLength = forwardMatchLength; + *pBackwardMatchLength = backwardMatchLength; + bestEntry = cur; -#ifdef TMP_TST - numBetter++; -#endif #ifdef ZSTD_SKIP - *matchLength = forwardMatchLength; - *backwardsMatchLength = backwardMatchLength; - return cur; #endif -// *matchLength = forwardMatchLength; -// return cur; } } } - if (bestEntry != NULL && bestMatchLength > table->minMatchLength) { -#ifdef TMP_TST - printf("Num better %u\n", numBetter - 1); -#endif - *matchLength = forwardMatch; - *backwardsMatchLength = backwardMatch; + if (bestEntry != NULL) { return bestEntry; } return NULL; } -#else - -static int isValidMatch(const BYTE *pIn, const BYTE *pMatch, - U32 minMatchLength, U32 maxWindowSize) { - printf("HERE\n"); - U32 lengthLeft = minMatchLength; - const BYTE *curIn = pIn; - const BYTE *curMatch = pMatch; - - if (pIn - pMatch > maxWindowSize) { - return 0; - } - - for (; lengthLeft >= 4; lengthLeft -= 4) { - if (MEM_read32(curIn) != MEM_read32(curMatch)) { - return 0; - } - curIn += 4; - curMatch += 4; - } - return 1; -} - -//TODO: clean up function call. This is not at all decoupled from LDM. -LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, - const hash_t hash, - const U32 checksum, - const BYTE *pIn, - const BYTE *pEnd, - U32 *matchLength, - U32 *backwardsMatchLength, - const BYTE *pAnchor) { - LDM_hashEntry *bucket = getBucket(table, hash); - LDM_hashEntry *cur = bucket; - (void)matchLength; - (void)backwardsMatchLength; - (void)pAnchor; for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) { - // Check checksum for faster check. - const BYTE *pMatch = cur->offset + table->offsetBase; - (void)pEnd; - - if (cur->checksum == checksum && - isValidMatch(pIn, pMatch, table->minMatchLength, table->maxWindowSize)) { - return cur; - } - } - return NULL; -} - -#endif hash_t HASH_hashU32(U32 value) { return ((value * 2654435761U) >> (32 - LDM_HASHLOG)); } - -LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table, - const hash_t hash, - const U32 checksum) { - // Loop through bucket. - // TODO: in order of recency??? - LDM_hashEntry *bucket = getBucket(table, hash); - LDM_hashEntry *cur = bucket; - for(; cur < bucket + HASH_BUCKET_SIZE; ++cur) { - if (cur->checksum == checksum) { - return cur; - } - } - return NULL; -} - void HASH_insert(LDM_hashTable *table, const hash_t hash, const LDM_hashEntry entry) { *(getBucket(table, hash) + table->bucketOffsets[hash]) = entry; @@ -300,7 +214,7 @@ void HASH_insert(LDM_hashTable *table, } U32 HASH_getSize(const LDM_hashTable *table) { - return table->size; + return table->numBuckets; } void HASH_destroyTable(LDM_hashTable *table) { @@ -312,15 +226,16 @@ void HASH_destroyTable(LDM_hashTable *table) { void HASH_outputTableOccupancy(const LDM_hashTable *table) { U32 ctr = 0; LDM_hashEntry *cur = table->entries; - LDM_hashEntry *end = table->entries + (table->size * HASH_BUCKET_SIZE); + LDM_hashEntry *end = table->entries + (table->numBuckets * HASH_BUCKET_SIZE); for (; cur < end; ++cur) { if (cur->offset == 0) { ctr++; } } - printf("Num buckets, bucket size: %d, %d\n", table->size, HASH_BUCKET_SIZE); + printf("Num buckets, bucket size: %d, %d\n", + table->numBuckets, HASH_BUCKET_SIZE); printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n", - table->maxEntries, ctr, - 100.0 * (double)(ctr) / table->maxEntries); + table->numEntries, ctr, + 100.0 * (double)(ctr) / table->numEntries); } diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c index a116af70a..6e9addf73 100644 --- a/contrib/long_distance_matching/ldm.c +++ b/contrib/long_distance_matching/ldm.c @@ -14,7 +14,6 @@ #define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE) - 4)) #define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1) - #define ML_BITS 4 #define ML_MASK ((1U<windowSizeLog, stats->hashTableSizeLog); printf("num matches, total match length, %% matched: %u, %llu, %.3f\n", @@ -191,7 +188,6 @@ int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) { */ static hash_t checksumToHash(U32 sum) { return HASH_hashU32(sum); -// return ((sum * 2654435761U) >> (32 - LDM_HASHLOG)); } /** @@ -455,22 +451,14 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match, if (cctx->ip > cctx->imatchLimit) { return 1; } -#ifdef HASH_CHECK - entry = HASH_getEntryFromHash(cctx->hashTable, h, sum); -#else - entry = HASH_getValidEntry(cctx->hashTable, h, sum, - cctx->ip, cctx->iend, - matchLength, backwardMatchLength, - cctx->anchor); -#endif + + entry = HASH_getBestEntry(cctx->hashTable, h, sum, + cctx->ip, cctx->iend, + cctx->anchor, + matchLength, backwardMatchLength); if (entry != NULL) { *match = entry->offset + cctx->ibase; -#ifdef HASH_CHECK - if (!LDM_isValidMatch(cctx->ip, *match)) { - entry = NULL; - } -#endif } putHashOfCurrentPositionFromHash(cctx, h, sum); } @@ -563,21 +551,8 @@ size_t LDM_compress(const void *src, size_t srcSize, cctx.stats.numMatches++; #endif -#if TMP_RECOMPUTE_LENGTHS - backwardsMatchLength = 0; - /** - * Catch up: look back to extend the match backwards from the found match. - */ - while (cctx.ip > cctx.anchor && match > cctx.ibase && - cctx.ip[-1] == match[-1]) { - cctx.ip--; - match--; - backwardsMatchLength++; - } -#else cctx.ip -= backwardsMatchLength; match -= backwardsMatchLength; -#endif /** * Write current block (literals, literal length, match offset, match @@ -586,16 +561,9 @@ size_t LDM_compress(const void *src, size_t srcSize, { const U32 literalLength = cctx.ip - cctx.anchor; const U32 offset = cctx.ip - match; -#if TMP_RECOMPUTE_LENGTHS - const U32 matchLength = LDM_countMatchLength( - cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLength, - match + LDM_MIN_MATCH_LENGTH + backwardsMatchLength, - cctx.ihashLimit) + backwardsMatchLength; -#else const U32 matchLength = forwardMatchLength + backwardsMatchLength - LDM_MIN_MATCH_LENGTH; -#endif LDM_outputBlock(&cctx, literalLength, offset, matchLength); diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h index 2396227d2..1d5b2f13b 100644 --- a/contrib/long_distance_matching/ldm.h +++ b/contrib/long_distance_matching/ldm.h @@ -11,21 +11,21 @@ #define LDM_OFFSET_SIZE 4 // Defines the size of the hash table. +// Note that this is not the number of buckets. // Currently this should be less than WINDOW_SIZE_LOG + 4? #define LDM_MEMORY_USAGE 23 +#define HASH_BUCKET_SIZE_LOG 3 // MAX is 4 for now -//#define LDM_LAG (1 << 20) -#define LDM_LAG (0) +// Defines the lag in inserting elements into the hash table. +#define LDM_LAG 0 #define LDM_WINDOW_SIZE_LOG 28 #define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG)) //These should be multiples of four (and perhaps set to the same value?). -#define LDM_MIN_MATCH_LENGTH 1024 -#define LDM_HASH_LENGTH 1024 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_LENGTH 64 -#define TMP_ZSTDTOGGLE 1 -#define TMP_RECOMPUTE_LENGTHS (!(TMP_ZSTDTOGGLE)) typedef struct LDM_compressStats LDM_compressStats; typedef struct LDM_CCtx LDM_CCtx; diff --git a/contrib/long_distance_matching/ldm_hashtable.h b/contrib/long_distance_matching/ldm_hashtable.h index 51d825258..df9dcd789 100644 --- a/contrib/long_distance_matching/ldm_hashtable.h +++ b/contrib/long_distance_matching/ldm_hashtable.h @@ -14,37 +14,17 @@ typedef struct LDM_hashEntry { typedef struct LDM_hashTable LDM_hashTable; -/** - * Create a hash table with size hash buckets. - * LDM_hashEntry.offset is added to offsetBase to calculate pMatch in - * HASH_getValidEntry. - */ LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase, U32 minMatchLength, U32 maxWindowSize); -/** - * Returns an LDM_hashEntry from the table that matches the checksum. - * Returns NULL if one does not exist. - */ -LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table, - const hash_t hash, - const U32 checksum); - -/** - * Gets a valid entry that matches the checksum. A valid entry is defined by - * *isValid. - * - * The function finds an entry matching the checksum, computes pMatch as - * offset + table.offsetBase, and calls isValid. - */ -LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, - const hash_t hash, - const U32 checksum, - const BYTE *pIn, - const BYTE *pEnd, - U32 *matchLength, - U32 *backwardsMatchLength, - const BYTE *pAnchor); +LDM_hashEntry *HASH_getBestEntry(const LDM_hashTable *table, + const hash_t hash, + const U32 checksum, + const BYTE *pIn, + const BYTE *pEnd, + const BYTE *pAnchor, + U32 *matchLength, + U32 *backwardsMatchLength); hash_t HASH_hashU32(U32 value); diff --git a/contrib/long_distance_matching/ldm_with_table.c b/contrib/long_distance_matching/ldm_with_table.c index 68a33d0ff..5919d588c 100644 --- a/contrib/long_distance_matching/ldm_with_table.c +++ b/contrib/long_distance_matching/ldm_with_table.c @@ -4,6 +4,8 @@ #include #include +#include "ldm.h" + #define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE)) //#define LDM_HASH_ENTRY_SIZE 4 #define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2) @@ -14,7 +16,6 @@ #define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1) /* Hash table stuff. */ -#define HASH_BUCKET_SIZE_LOG 3 // MAX is 4 for now #define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG)) #define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG) @@ -32,18 +33,15 @@ //#define RUN_CHECKS -#include "ldm.h" - /* Hash table stuff */ typedef U32 hash_t; typedef struct LDM_hashEntry { - U32 offset; // TODO: Replace with pointer? + U32 offset; U32 checksum; } LDM_hashEntry; -// TODO: Scanning speed // TODO: Memory usage struct LDM_compressStats { U32 windowSizeLog, hashTableSizeLog; @@ -110,18 +108,22 @@ struct LDM_CCtx { }; struct LDM_hashTable { - U32 size; // Number of buckets - U32 maxEntries; // Rename... - LDM_hashEntry *entries; // 1-D array for now. + U32 numBuckets; // Number of buckets + U32 numEntries; // Rename... + LDM_hashEntry *entries; BYTE *bucketOffsets; // Position corresponding to offset=0 in LDM_hashEntry. }; +/** + * Create a hash table that can contain size elements. + * The number of buckets is determined by size >> HASH_BUCKET_SIZE_LOG. + */ LDM_hashTable *HASH_createTable(U32 size) { LDM_hashTable *table = malloc(sizeof(LDM_hashTable)); - table->size = size >> HASH_BUCKET_SIZE_LOG; - table->maxEntries = size; + table->numBuckets = size >> HASH_BUCKET_SIZE_LOG; + table->numEntries = size; table->entries = calloc(size, sizeof(LDM_hashEntry)); table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE)); return table; @@ -131,10 +133,7 @@ static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) { return table->entries + (hash << HASH_BUCKET_SIZE_LOG); } - - -static unsigned ZSTD_NbCommonBytes (register size_t val) -{ +static unsigned ZSTD_NbCommonBytes (register size_t val) { if (MEM_isLittleEndian()) { if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) @@ -234,6 +233,11 @@ static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, return (size_t)(pIn - pStart); } +/** + * Count number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch > pBaes and pIn > pAnchor. + */ U32 countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor, const BYTE *pMatch, const BYTE *pBase) { U32 matchLength = 0; @@ -245,20 +249,32 @@ U32 countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor, return matchLength; } -LDM_hashEntry *HASH_getValidEntry(const LDM_CCtx *cctx, - const hash_t hash, - const U32 checksum, - U32 *matchLength, - U32 *backwardsMatchLength) { +/** + * Returns a pointer to the entry in the hash table matching the hash and + * checksum with the "longest match length" as defined below. The forward and + * backward match lengths are written to *pForwardMatchLength and + * *pBackwardMatchLength. + * + * The match length is defined based on cctx->ip and the entry's offset. + * The forward match is computed from cctx->ip and entry->offset + cctx->ibase. + * The backward match is computed backwards from cctx->ip and + * cctx->ibase only if the forward match is longer than LDM_MIN_MATCH_LENGTH. + * + */ +LDM_hashEntry *HASH_getBestEntry(const LDM_CCtx *cctx, + const hash_t hash, + const U32 checksum, + U32 *pForwardMatchLength, + U32 *pBackwardMatchLength) { LDM_hashTable *table = cctx->hashTable; LDM_hashEntry *bucket = getBucket(table, hash); LDM_hashEntry *cur = bucket; LDM_hashEntry *bestEntry = NULL; U32 bestMatchLength = 0; for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) { - // Check checksum for faster check. const BYTE *pMatch = cur->offset + cctx->ibase; + // Check checksum for faster check. if (cur->checksum == checksum && cctx->ip - pMatch <= LDM_WINDOW_SIZE) { U32 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend); @@ -279,8 +295,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_CCtx *cctx, if (totalMatchLength >= bestMatchLength && totalMatchLength >= LDM_MIN_MATCH_LENGTH) { bestMatchLength = totalMatchLength; - *matchLength = forwardMatchLength; - *backwardsMatchLength = backwardMatchLength; + *pForwardMatchLength = forwardMatchLength; + *pBackwardMatchLength = backwardMatchLength; bestEntry = cur; #ifdef ZSTD_SKIP @@ -303,7 +319,7 @@ void HASH_insert(LDM_hashTable *table, } U32 HASH_getSize(const LDM_hashTable *table) { - return table->size; + return table->numBuckets; } void HASH_destroyTable(LDM_hashTable *table) { @@ -315,20 +331,20 @@ void HASH_destroyTable(LDM_hashTable *table) { void HASH_outputTableOccupancy(const LDM_hashTable *table) { U32 ctr = 0; LDM_hashEntry *cur = table->entries; - LDM_hashEntry *end = table->entries + (table->size * HASH_BUCKET_SIZE); + LDM_hashEntry *end = table->entries + (table->numBuckets * HASH_BUCKET_SIZE); for (; cur < end; ++cur) { if (cur->offset == 0) { ctr++; } } - printf("Num buckets, bucket size: %d, %d\n", table->size, HASH_BUCKET_SIZE); + printf("Num buckets, bucket size: %d, %d\n", + table->numBuckets, HASH_BUCKET_SIZE); printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n", - table->maxEntries, ctr, - 100.0 * (double)(ctr) / table->maxEntries); + table->numEntries, ctr, + 100.0 * (double)(ctr) / table->numEntries); } - // TODO: This can be done more efficiently (but it is not that important as it // is only used for computing stats). static int intLog2(U32 x) { @@ -339,7 +355,7 @@ static int intLog2(U32 x) { return ret; } -// TODO: Maybe we would eventually prefer to have linear rather than +// Maybe we would eventually prefer to have linear rather than // exponential buckets. /** void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) { @@ -369,7 +385,6 @@ void LDM_printCompressStats(const LDM_compressStats *stats) { int i = 0; printf("=====================\n"); printf("Compression statistics\n"); - //TODO: compute percentage matched? printf("Window size, hash table size (bytes): 2^%u, 2^%u\n", stats->windowSizeLog, stats->hashTableSizeLog); printf("num matches, total match length, %% matched: %u, %llu, %.3f\n", @@ -429,7 +444,6 @@ hash_t HASH_hashU32(U32 value) { */ static hash_t checksumToHash(U32 sum) { return HASH_hashU32(sum); -// return ((sum * 2654435761U) >> (32 - LDM_HASHLOG)); } /** @@ -672,10 +686,10 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) { * Returns 0 if successful and 1 otherwise (i.e. no match can be found * in the remaining input that is long enough). * - * matchLength contains the forward length of the match. + * forwardMatchLength contains the forward length of the match. */ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match, - U32 *matchLength, U32 *backwardMatchLength) { + U32 *forwardMatchLength, U32 *backwardMatchLength) { LDM_hashEntry *entry = NULL; cctx->nextIp = cctx->ip + cctx->step; @@ -693,8 +707,8 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match, return 1; } - entry = HASH_getValidEntry(cctx, h, sum, - matchLength, backwardMatchLength); + entry = HASH_getBestEntry(cctx, h, sum, + forwardMatchLength, backwardMatchLength); if (entry != NULL) { *match = entry->offset + cctx->ibase; diff --git a/contrib/long_distance_matching/main-ldm.c b/contrib/long_distance_matching/main-ldm.c index a43ec0002..96db0c220 100644 --- a/contrib/long_distance_matching/main-ldm.c +++ b/contrib/long_distance_matching/main-ldm.c @@ -29,6 +29,7 @@ static int compress(const char *fname, const char *oname) { size_t maxCompressedSize, compressedSize; struct timeval tv1, tv2; + double timeTaken; /* Open the input file. */ if ((fdin = open(fname, O_RDONLY)) < 0) { @@ -53,18 +54,7 @@ static int compress(const char *fname, const char *oname) { // The compress function should check before writing or buffer writes. maxCompressedSize += statbuf.st_size / 255; - /* Go to the location corresponding to the last byte. */ - /* TODO: fallocate? */ - if (lseek(fdout, maxCompressedSize - 1, SEEK_SET) == -1) { - perror("lseek error"); - return 1; - } - - /* Write a dummy byte at the last location. */ - if (write(fdout, "", 1) != 1) { - perror("write error"); - return 1; - } + ftruncate(fdout, maxCompressedSize); /* mmap the input file. */ if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0)) @@ -103,12 +93,12 @@ static int compress(const char *fname, const char *oname) { (unsigned)statbuf.st_size, (unsigned)compressedSize, oname, (double)compressedSize / (statbuf.st_size) * 100); + timeTaken = (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + + (double) (tv2.tv_sec - tv1.tv_sec), + printf("Total compress time = %.3f seconds, Average compression speed: %.3f MB/s\n", - (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + - (double) (tv2.tv_sec - tv1.tv_sec), - ((double)statbuf.st_size / (double) (1 << 20)) / - ((double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + - (double) (tv2.tv_sec - tv1.tv_sec))); + timeTaken, + ((double)statbuf.st_size / (double) (1 << 20)) / timeTaken); // Close files. @@ -156,17 +146,7 @@ static int decompress(const char *fname, const char *oname) { /* Read the header. */ LDM_readHeader(src, &compressedSize, &decompressedSize); - /* Go to the location corresponding to the last byte. */ - if (lseek(fdout, decompressedSize - 1, SEEK_SET) == -1) { - perror("lseek error"); - return 1; - } - - /* write a dummy byte at the last location */ - if (write(fdout, "", 1) != 1) { - perror("write error"); - return 1; - } + ftruncate(fdout, decompressedSize); /* mmap the output file */ if ((dst = mmap(0, decompressedSize, PROT_READ | PROT_WRITE,