default: all
-all: main-basic main-circular-buffer
+all: main-basic main-circular-buffer main-lag
main-basic : basic_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
main-circular-buffer: circular_buffer_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+main-lag: lag_table.c ldm.c main-ldm.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
clean:
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
- main-basic main-circular-buffer
+ main-basic main-circular-buffer main-lag
@echo Cleaning completed
return table->entries + hash;
}
-
LDM_hashEntry *HASH_getEntryFromHash(
const LDM_hashTable *table, const hash_t hash, const U32 checksum) {
(void)checksum;
(void)checksum;
if ((*isValid)(pIn, entry->offset + table->offsetBase)) {
return entry;
- } else {
- return NULL;
}
+ return NULL;
}
-
-
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
*getBucket(table, hash) = entry;
// refactor code to scale the number of elements appropriately.
// Number of elements per hash bucket.
-#define HASH_BUCKET_SIZE_LOG 1 // MAX is 4 for now
+#define HASH_BUCKET_SIZE_LOG 0 // MAX is 4 for now
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
struct LDM_hashTable {
// Position corresponding to offset=0 in LDM_hashEntry.
const BYTE *offsetBase;
BYTE *bucketOffsets; // Pointer to current insert position.
+
// Last insert was at bucketOffsets - 1?
};
return table->entries + (hash << HASH_BUCKET_SIZE_LOG);
}
-/*
-static LDM_hashEntry *getLastInsertFromHash(const LDM_hashTable *table,
- const hash_t hash) {
- LDM_hashEntry *bucket = getBucket(table, hash);
- BYTE offset = (table->bucketOffsets[hash] - 1) & (HASH_BUCKET_SIZE - 1);
- return bucket + offset;
-}
-*/
-
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
LDM_hashEntry *cur = bucket;
// TODO: in order of recency?
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
- // CHeck checksum for faster check.
+ /*
+ if (cur->checksum == 0 && cur->offset == 0) {
+ return NULL;
+ }
+ */
+ // Check checksum for faster check.
if (cur->checksum == checksum &&
(*isValid)(pIn, cur->offset + table->offsetBase)) {
return cur;
return NULL;
}
-
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum) {
#include <string.h>
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
-#define HASH_ONLY_EVERY 31
+#define HASH_ONLY_EVERY 15
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
#define COMPUTE_STATS
#define CHECKSUM_CHAR_OFFSET 10
+
+#define LAG 0
+
+//#define HASH_CHECK
//#define RUN_CHECKS
//#define LDM_DEBUG
unsigned step; // ip step, should be 1.
+ const BYTE *lagIp;
+ hash_t lagHash;
+ U32 lagSum;
+
// DEBUG
const BYTE *DEBUG_setNextHash;
};
cctx->nextPosHashed = cctx->nextIp;
cctx->nextHash = checksumToHash(cctx->nextSum);
+#if LAG
+ if (cctx->ip - cctx->ibase > LAG) {
+// printf("LAG %zu\n", cctx->ip - cctx->lagIp);
+ cctx->lagSum = updateChecksum(
+ cctx->lagSum, LDM_HASH_LENGTH,
+ cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]);
+ cctx->lagIp++;
+ cctx->lagHash = checksumToHash(cctx->lagSum);
+ }
+#endif
+
#ifdef RUN_CHECKS
check = getChecksum(cctx->nextIp, LDM_HASH_LENGTH);
static void putHashOfCurrentPositionFromHash(
LDM_CCtx *cctx, hash_t hash, U32 sum) {
- /*
-#ifdef COMPUTE_STATS
- if (cctx->stats.numHashInserts < HASH_getSize(cctx->hashTable)) {
- U32 offset = HASH_getEntryFromHash(cctx->hashTable, hash)->offset;
- cctx->stats.numHashInserts++;
- if (offset != 0 && !LDM_isValidMatch(cctx->ip, offset + cctx->ibase)) {
- cctx->stats.numCollisions++;
- }
- }
-#endif
-*/
-
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
// Note: this works only when cctx->step is 1.
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
MEM_read32(cctx->ip) };
*/
+#if LAG
+ // TODO: off by 1, but whatever
+ if (cctx->lagIp - cctx->ibase > 0) {
+ const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, cctx->lagSum };
+ HASH_insert(cctx->hashTable, cctx->lagHash, entry);
+ } else {
+ const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
+ HASH_insert(cctx->hashTable, hash, entry);
+ }
+#else
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
HASH_insert(cctx->hashTable, hash, entry);
+#endif
}
cctx->lastPosHashed = cctx->ip;
putHashOfCurrentPositionFromHash(cctx, hash, sum);
}
-/**
- * Returns the position of the entry at hashTable[hash].
- */
-/*
-static const BYTE *getPositionOnHash(const LDM_CCtx *cctx, const hash_t hash) {
- return HASH_getEntryFromHash(cctx->hashTable, hash)->offset + cctx->ibase;
-}
-*/
-
U32 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
const BYTE *pInLimit) {
const BYTE * const pStart = pIn;
if (cctx->ip > cctx->imatchLimit) {
return 1;
}
-
+#ifdef HASH_CHECK
+ entry = HASH_getEntryFromHash(cctx->hashTable, h, sum);
+#else
entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip,
&LDM_isValidMatch);
+#endif
if (entry != NULL) {
*match = entry->offset + cctx->ibase;
+#ifdef HASH_CHECK
+ if (!LDM_isValidMatch(cctx->ip, *match)) {
+ entry = NULL;
+ }
+#endif
}
putHashOfCurrentPositionFromHash(cctx, h, sum);
}
/* Hash the first position and put it into the hash table. */
LDM_putHashOfCurrentPosition(&cctx);
+#if LAG
+ cctx.lagIp = cctx.ip;
+ cctx.lagHash = cctx.lastHash;
+ cctx.lagSum = cctx.lastSum;
+#endif
+
/**
* Find a match.
* If no more matches can be found (i.e. the length of the remaining input
/* Encode the last literals (no more matches). */
{
- const size_t lastRun = cctx.iend - cctx.anchor;
+ const U32 lastRun = cctx.iend - cctx.anchor;
BYTE *pToken = cctx.op++;
LDM_encodeLiteralLengthAndLiterals(&cctx, pToken, lastRun);
}
#define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE))
#define LDM_OFFSET_SIZE 4
-// Defines the size of the hash table.
-#define LDM_MEMORY_USAGE 20
+// Defines the size of the hash table (currently the number of elements).
+#define LDM_MEMORY_USAGE 12
#define LDM_WINDOW_SIZE_LOG 30
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
#include "mem.h"
+// TODO: clean up comments
+
typedef U32 hash_t;
typedef struct LDM_hashEntry {
- U32 offset;
+ U32 offset; // TODO: Replace with pointer?
U32 checksum;
} LDM_hashEntry;
typedef struct LDM_hashTable LDM_hashTable;
-// TODO: rename functions
-// TODO: comments
-
+/**
+ * Create a hash table with size hash buckets.
+ * LDM_hashEntry.offset is added to offsetBase to calculate pMatch in
+ * HASH_getValidEntry.
+ */
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase);
-//TODO: unneeded?
+/**
+ * Returns an LDM_hashEntry from the table that matches the checksum.
+ * Returns NULL if one does not exist.
+ */
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum);
+/**
+ * Gets a valid entry that matches the checksum. A valid entry is defined by
+ * *isValid.
+ *
+ * The function finds an entry matching the checksum, computes pMatch as
+ * offset + table.offsetBase, and calls isValid.
+ */
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
+/**
+ * Insert an LDM_hashEntry into the bucket corresponding to hash.
+ */
void HASH_insert(LDM_hashTable *table, const hash_t hash,
- const LDM_hashEntry entry);
+ const LDM_hashEntry entry);
+/**
+ * Return the number of distinct hash buckets.
+ */
U32 HASH_getSize(const LDM_hashTable *table);
void HASH_destroyTable(LDM_hashTable *table);
outSize = LDM_decompress(
src + LDM_HEADER_SIZE, statbuf.st_size - LDM_HEADER_SIZE,
dst, decompressedSize);
-
printf("Ret size out: %zu\n", outSize);
ftruncate(fdout, outSize);