default: all
-all: main-ldm
+all: main-basic main-chaining
-main-ldm : basic_table.c ldm.c main-ldm.c
+main-basic : basic_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+main-chaining : chaining_table.c ldm.c main-ldm.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+
clean:
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
- main main-ldm
+ main-basic main-chaining
@echo Cleaning completed
#include <stdio.h>
#include "ldm_hashtable.h"
+#include "mem.h"
struct LDM_hashTable {
U32 size;
LDM_hashEntry *entries;
+ const BYTE *offsetBase;
};
-LDM_hashTable *HASH_createTable(U32 size) {
+LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
table->size = size;
table->entries = calloc(size, sizeof(LDM_hashEntry));
+ table->offsetBase = offsetBase;
return table;
}
table->entries = calloc(size, sizeof(LDM_hashEntry));
}
+LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
+ return table->entries + hash;
+}
LDM_hashEntry *HASH_getEntryFromHash(
- const LDM_hashTable *table, const hash_t hash) {
- return &(table->entries[hash]);
+ const LDM_hashTable *table, const hash_t hash, const U32 checksum) {
+ (void)checksum;
+ return getBucket(table, hash);
}
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
- *HASH_getEntryFromHash(table, hash) = entry;
+ *getBucket(table, hash) = entry;
}
U32 HASH_getSize(const LDM_hashTable *table) {
U32 i = 0;
U32 ctr = 0;
for (; i < HASH_getSize(hashTable); i++) {
- if (HASH_getEntryFromHash(hashTable, i)->offset == 0) {
+ if (getBucket(hashTable, i)->offset == 0) {
ctr++;
}
}
HASH_getSize(hashTable), ctr,
100.0 * (double)(ctr) / (double)HASH_getSize(hashTable));
}
-
-
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "ldm_hashtable.h"
+#include "mem.h"
+
+//TODO: move def somewhere else.
+//TODO: memory usage is currently no longer LDM_MEMORY_USAGE.
+// refactor code to scale the number of elements appropriately.
+
+// Number of elements per hash bucket.
+#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
+#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
+
+struct LDM_hashTable {
+ U32 size;
+ LDM_hashEntry *entries; // 1-D array for now.
+
+ // Position corresponding to offset=0 in LDM_hashEntry.
+ const BYTE *offsetBase;
+ BYTE *bucketOffsets; // Pointer to current insert position.
+ // Last insert was at bucketOffsets - 1?
+};
+
+LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
+ LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
+ table->size = size;
+ table->entries = calloc(size * HASH_BUCKET_SIZE, sizeof(LDM_hashEntry));
+ table->bucketOffsets = calloc(size, sizeof(BYTE));
+ table->offsetBase = offsetBase;
+ return table;
+}
+
+static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
+ return table->entries + (hash << HASH_BUCKET_SIZE_LOG);
+}
+
+/*
+static LDM_hashEntry *getLastInsertFromHash(const LDM_hashTable *table,
+ const hash_t hash) {
+ LDM_hashEntry *bucket = getBucket(table, hash);
+ BYTE offset = (table->bucketOffsets[hash] - 1) & (HASH_BUCKET_SIZE - 1);
+ return bucket + offset;
+}
+*/
+
+LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
+ const hash_t hash,
+ const U32 checksum) {
+ // Loop through bucket.
+ // TODO: in order of recency???
+ LDM_hashEntry *bucket = getBucket(table, hash);
+ LDM_hashEntry *cur = bucket;
+ for(; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
+ if (cur->checksum == checksum) {
+ return cur;
+ }
+ }
+ return NULL;
+}
+
+void HASH_insert(LDM_hashTable *table,
+ const hash_t hash, const LDM_hashEntry entry) {
+ *(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
+ table->bucketOffsets[hash]++;
+ table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1;
+}
+
+U32 HASH_getSize(const LDM_hashTable *table) {
+ return table->size * HASH_BUCKET_SIZE;
+}
+
+void HASH_destroyTable(LDM_hashTable *table) {
+ free(table->entries);
+ free(table->bucketOffsets);
+ free(table);
+}
+
+void HASH_outputTableOccupancy(const LDM_hashTable *table) {
+ U32 ctr = 0;
+ LDM_hashEntry *cur = table->entries;
+ LDM_hashEntry *end = table->entries + (table->size * HASH_BUCKET_SIZE);
+ for (; cur < end; ++cur) {
+ if (cur->offset == 0) {
+ ctr++;
+ }
+ }
+
+ printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
+ HASH_getSize(table), ctr,
+ 100.0 * (double)(ctr) / (double)HASH_getSize(table));
+}
#define CHECKSUM_CHAR_OFFSET 10
//#define RUN_CHECKS
//#define LDM_DEBUG
-//
-#include "ldm.h"
+#include "ldm.h"
#include "ldm_hashtable.h"
// TODO: Scanning speed
// TODO: Maybe we would eventually prefer to have linear rather than
// exponential buckets.
+/**
void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) {
U32 i = 0;
int buckets[32] = { 0 };
}
printf("\n");
}
+*/
void LDM_printCompressStats(const LDM_compressStats *stats) {
int i = 0;
//TODO: compute percentage matched?
printf("Window size, hash table size (bytes): 2^%u, 2^%u\n",
stats->windowSizeLog, stats->hashTableSizeLog);
- printf("num matches, total match length: %u, %llu\n",
+ printf("num matches, total match length, %% matched: %u, %llu, %.3f\n",
stats->numMatches,
- stats->totalMatchLength);
+ stats->totalMatchLength,
+ 100.0 * (double)stats->totalMatchLength /
+ (double)(stats->totalMatchLength + stats->totalLiteralLength));
printf("avg match length: %.1f\n", ((double)stats->totalMatchLength) /
(double)stats->numMatches);
printf("avg literal length, total literalLength: %.1f, %llu\n",
printf("Num invalid hashes, num valid hashes, %llu %llu\n",
stats->numInvalidHashes, stats->numValidHashes);
*/
+ /*
printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n",
stats->numCollisions, stats->numHashInserts,
stats->numHashInserts == 0 ?
1.0 : (100.0 * (double)stats->numCollisions) /
(double)stats->numHashInserts);
+ */
printf("=====================\n");
}
*/
//TODO: This seems to be faster for some reason?
+
U32 lengthLeft = LDM_MIN_MATCH_LENGTH;
const BYTE *curIn = pIn;
const BYTE *curMatch = pMatch;
static void putHashOfCurrentPositionFromHash(
LDM_CCtx *cctx, hash_t hash, U32 sum) {
+ /*
#ifdef COMPUTE_STATS
- if (cctx->stats.numHashInserts < LDM_HASHTABLESIZE_U32) {
+ if (cctx->stats.numHashInserts < HASH_getSize(cctx->hashTable)) {
U32 offset = HASH_getEntryFromHash(cctx->hashTable, hash)->offset;
cctx->stats.numHashInserts++;
if (offset != 0 && !LDM_isValidMatch(cctx->ip, offset + cctx->ibase)) {
}
}
#endif
+*/
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
// Note: this works only when cctx->step is 1.
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
- const LDM_hashEntry entry = { cctx->ip - cctx->ibase };
+ const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
+ MEM_read32(cctx->ip) };
HASH_insert(cctx->hashTable, hash, entry);
}
cctx->anchor = cctx->ibase;
memset(&(cctx->stats), 0, sizeof(cctx->stats));
- cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32);
+ cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32, cctx->ibase);
//HASH_initializeTable(cctx->hashTable, LDM_HASHTABLESIZE_U32);
*
*/
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
+
+ LDM_hashEntry *entry = NULL;
cctx->nextIp = cctx->ip + cctx->step;
do {
hash_t h;
U32 sum;
- LDM_hashEntry *entry;
setNextHash(cctx);
h = cctx->nextHash;
sum = cctx->nextSum;
return 1;
}
- entry = HASH_getEntryFromHash(cctx->hashTable, h);
- *match = entry->offset + cctx->ibase;
+ entry = HASH_getEntryFromHash(cctx->hashTable, h, MEM_read32(cctx->ip));
+
+ if (entry != NULL) {
+ *match = entry->offset + cctx->ibase;
+ }
putHashOfCurrentPositionFromHash(cctx, h, sum);
- } while (cctx->ip - *match > LDM_WINDOW_SIZE ||
- !LDM_isValidMatch(cctx->ip, *match));
+ } while (entry == NULL ||
+ (cctx->ip - *match > LDM_WINDOW_SIZE ||
+ !LDM_isValidMatch(cctx->ip, *match)));
setNextHash(cctx);
return 0;
}
size_t LDM_compress(const void *src, size_t srcSize,
void *dst, size_t maxDstSize) {
LDM_CCtx cctx;
- const BYTE *match;
+ const BYTE *match = NULL;
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
/* Hash the first position and put it into the hash table. */
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
//These should be multiples of four.
-#define LDM_MIN_MATCH_LENGTH 4
-#define LDM_HASH_LENGTH 4
+#define LDM_MIN_MATCH_LENGTH 1024
+#define LDM_HASH_LENGTH 1024
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;
typedef struct LDM_hashEntry {
U32 offset;
+ U32 checksum; // Not needed?
} LDM_hashEntry;
typedef struct LDM_hashTable LDM_hashTable;
// TODO: rename functions
// TODO: comments
-LDM_hashTable *HASH_createTable(U32 size);
+LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase);
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
- const hash_t hash);
+ const hash_t hash,
+ const U32 checksum);
void HASH_insert(LDM_hashTable *table, const hash_t hash,
const LDM_hashEntry entry);