default: all
-all: main-basic main-chaining
+all: main-basic main-circular-buffer
main-basic : basic_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
-main-chaining : chaining_table.c ldm.c main-ldm.c
+main-circular-buffer: circular_buffer_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
clean:
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
- main-basic main-chaining
+ main-basic main-circular-buffer
@echo Cleaning completed
return table->entries + hash;
}
+
LDM_hashEntry *HASH_getEntryFromHash(
const LDM_hashTable *table, const hash_t hash, const U32 checksum) {
(void)checksum;
return getBucket(table, hash);
}
+LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
+ const hash_t hash,
+ const U32 checksum,
+ const BYTE *pIn,
+ int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
+ LDM_hashEntry *entry = getBucket(table, hash);
+ (void)checksum;
+ if ((*isValid)(pIn, entry->offset + table->offsetBase)) {
+ return entry;
+ } else {
+ return NULL;
+ }
+}
+
+
+
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
*getBucket(table, hash) = entry;
// refactor code to scale the number of elements appropriately.
// Number of elements per hash bucket.
-#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
+#define HASH_BUCKET_SIZE_LOG 1 // MAX is 4 for now
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
struct LDM_hashTable {
}
*/
+LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
+ const hash_t hash,
+ const U32 checksum,
+ const BYTE *pIn,
+ int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
+ LDM_hashEntry *bucket = getBucket(table, hash);
+ LDM_hashEntry *cur = bucket;
+ // TODO: in order of recency?
+ for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
+ // CHeck checksum for faster check.
+ if (cur->checksum == checksum &&
+ (*isValid)(pIn, cur->offset + table->offsetBase)) {
+ return cur;
+ }
+ }
+ return NULL;
+}
+
+
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum) {
#include <string.h>
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
-#define HASH_ONLY_EVERY 0
+#define HASH_ONLY_EVERY 31
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
U32 numCollisions;
U32 numHashInserts;
-// U64 numInvalidHashes, numValidHashes; // tmp
-
U32 offsetHistogram[32];
};
(double) stats->numMatches);
}
printf("\n");
-
- /*
- printf("Num invalid hashes, num valid hashes, %llu %llu\n",
- stats->numInvalidHashes, stats->numValidHashes);
- */
- /*
- printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n",
- stats->numCollisions, stats->numHashInserts,
- stats->numHashInserts == 0 ?
- 1.0 : (100.0 * (double)stats->numCollisions) /
- (double)stats->numHashInserts);
- */
printf("=====================\n");
}
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
- /*
- if (memcmp(pIn, pMatch, LDM_MIN_MATCH_LENGTH) == 0) {
- return 1;
- }
- return 0;
- */
-
- //TODO: This seems to be faster for some reason?
-
U32 lengthLeft = LDM_MIN_MATCH_LENGTH;
const BYTE *curIn = pIn;
const BYTE *curMatch = pMatch;
- for (; lengthLeft >= 8; lengthLeft -= 8) {
- if (MEM_read64(curIn) != MEM_read64(curMatch)) {
+ if (pIn - pMatch > LDM_WINDOW_SIZE) {
+ return 0;
+ }
+
+ for (; lengthLeft >= 4; lengthLeft -= 4) {
+ if (MEM_read32(curIn) != MEM_read32(curMatch)) {
return 0;
}
- curIn += 8;
- curMatch += 8;
- }
- if (lengthLeft > 0) {
- return (MEM_read32(curIn) == MEM_read32(curMatch));
+ curIn += 4;
+ curMatch += 4;
}
return 1;
}
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
// Note: this works only when cctx->step is 1.
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
+ /**
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
MEM_read32(cctx->ip) };
+ */
+ const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
HASH_insert(cctx->hashTable, hash, entry);
}
LDM_hashEntry *entry = NULL;
cctx->nextIp = cctx->ip + cctx->step;
- do {
+ while (entry == NULL) {
hash_t h;
U32 sum;
setNextHash(cctx);
return 1;
}
- entry = HASH_getEntryFromHash(cctx->hashTable, h, MEM_read32(cctx->ip));
+ entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip,
+ &LDM_isValidMatch);
if (entry != NULL) {
*match = entry->offset + cctx->ibase;
}
-
putHashOfCurrentPositionFromHash(cctx, h, sum);
-
- } while (entry == NULL ||
- (cctx->ip - *match > LDM_WINDOW_SIZE ||
- !LDM_isValidMatch(cctx->ip, *match)));
+ }
setNextHash(cctx);
return 0;
}
#define LDM_OFFSET_SIZE 4
// Defines the size of the hash table.
-#define LDM_MEMORY_USAGE 16
+#define LDM_MEMORY_USAGE 20
-#define LDM_WINDOW_SIZE_LOG 25
+#define LDM_WINDOW_SIZE_LOG 30
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
//These should be multiples of four.
-#define LDM_MIN_MATCH_LENGTH 1024
-#define LDM_HASH_LENGTH 1024
+#define LDM_MIN_MATCH_LENGTH 64
+#define LDM_HASH_LENGTH 64
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;
void LDM_printCompressStats(const LDM_compressStats *stats);
/**
* Checks whether the LDM_MIN_MATCH_LENGTH bytes from p are the same as the
- * LDM_MIN_MATCH_LENGTH bytes from match.
+ * LDM_MIN_MATCH_LENGTH bytes from match and also if
+ * pIn - pMatch <= LDM_WINDOW_SIZE.
*
* This assumes LDM_MIN_MATCH_LENGTH is a multiple of four.
*
typedef struct LDM_hashEntry {
U32 offset;
- U32 checksum; // Not needed?
+ U32 checksum;
} LDM_hashEntry;
typedef struct LDM_hashTable LDM_hashTable;
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase);
+//TODO: unneeded?
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum);
+LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
+ const hash_t hash,
+ const U32 checksum,
+ const BYTE *pIn,
+ int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
+
void HASH_insert(LDM_hashTable *table, const hash_t hash,
const LDM_hashEntry entry);