default: all
-all: main-basic main-circular-buffer main-lag
+all: main-basic main-circular-buffer
main-basic : basic_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
main-circular-buffer: circular_buffer_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
-main-lag: lag_table.c ldm.c main-ldm.c
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
-
clean:
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
- main-basic main-circular-buffer main-lag
+ main-basic main-circular-buffer
@echo Cleaning completed
#include <stdlib.h>
#include <stdio.h>
+#include "ldm.h"
#include "ldm_hashtable.h"
#include "mem.h"
+#define LDM_HASHLOG ((LDM_MEMORY_USAGE) - 4)
+
struct LDM_hashTable {
U32 size;
LDM_hashEntry *entries;
return NULL;
}
+hash_t HASH_hashU32(U32 value) {
+ return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
+}
+
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
*getBucket(table, hash) = entry;
#include <stdlib.h>
#include <stdio.h>
+#include "ldm.h"
#include "ldm_hashtable.h"
#include "mem.h"
//TODO: move def somewhere else.
-//TODO: memory usage is currently no longer LDM_MEMORY_USAGE.
-// refactor code to scale the number of elements appropriately.
// Number of elements per hash bucket.
+// HASH_BUCKET_SIZE_LOG defined in ldm.h
#define HASH_BUCKET_SIZE_LOG 0 // MAX is 4 for now
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
+#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
+
struct LDM_hashTable {
- U32 size;
+ U32 size; // Number of buckets
+ U32 maxEntries; // Rename...
LDM_hashEntry *entries; // 1-D array for now.
// Position corresponding to offset=0 in LDM_hashEntry.
const BYTE *offsetBase;
BYTE *bucketOffsets; // Pointer to current insert position.
-
// Last insert was at bucketOffsets - 1?
};
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
- table->size = size;
- table->entries = calloc(size * HASH_BUCKET_SIZE, sizeof(LDM_hashEntry));
- table->bucketOffsets = calloc(size, sizeof(BYTE));
+ table->size = size >> HASH_BUCKET_SIZE_LOG;
+ table->maxEntries = size;
+ table->entries = calloc(size, sizeof(LDM_hashEntry));
+ table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE));
table->offsetBase = offsetBase;
return table;
}
LDM_hashEntry *cur = bucket;
// TODO: in order of recency?
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
- /*
- if (cur->checksum == 0 && cur->offset == 0) {
- return NULL;
- }
- */
// Check checksum for faster check.
if (cur->checksum == checksum &&
(*isValid)(pIn, cur->offset + table->offsetBase)) {
return NULL;
}
+hash_t HASH_hashU32(U32 value) {
+ return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
+}
+
+
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum) {
}
U32 HASH_getSize(const LDM_hashTable *table) {
- return table->size * HASH_BUCKET_SIZE;
+ return table->size;
}
void HASH_destroyTable(LDM_hashTable *table) {
}
}
+ printf("Num buckets, bucket size: %d, %d\n", table->size, HASH_BUCKET_SIZE);
printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
- HASH_getSize(table), ctr,
- 100.0 * (double)(ctr) / (double)HASH_getSize(table));
+ table->maxEntries, ctr,
+ 100.0 * (double)(ctr) / table->maxEntries);
}
#include <stdlib.h>
#include <string.h>
-// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
-#define HASH_ONLY_EVERY 15
-#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
+//#define LDM_HASH_ENTRY_SIZE 4
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
+#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 4)
+
+// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
+#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE) - 4))
+#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1)
+
#define ML_BITS 4
#define ML_MASK ((1U<<ML_BITS)-1)
#define RUN_MASK ((1U<<RUN_BITS)-1)
#define COMPUTE_STATS
+#define OUTPUT_CONFIGURATION
#define CHECKSUM_CHAR_OFFSET 10
-#define LAG 0
+//#define LDM_LAG 0
//#define HASH_CHECK
//#define RUN_CHECKS
-//#define LDM_DEBUG
#include "ldm.h"
#include "ldm_hashtable.h"
* of the hash table.
*/
static hash_t checksumToHash(U32 sum) {
- return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
+ return HASH_hashU32(sum);
+// return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
}
/**
cctx->nextPosHashed = cctx->nextIp;
cctx->nextHash = checksumToHash(cctx->nextSum);
-#if LAG
- if (cctx->ip - cctx->ibase > LAG) {
-// printf("LAG %zu\n", cctx->ip - cctx->lagIp);
+#if LDM_LAG
+// printf("LDM_LAG %zu\n", cctx->ip - cctx->lagIp);
+ if (cctx->ip - cctx->ibase > LDM_LAG) {
cctx->lagSum = updateChecksum(
cctx->lagSum, LDM_HASH_LENGTH,
cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]);
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
MEM_read32(cctx->ip) };
*/
-#if LAG
+#if LDM_LAG
// TODO: off by 1, but whatever
if (cctx->lagIp - cctx->ibase > 0) {
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, cctx->lagSum };
return (U32)(pIn - pStart);
}
+void LDM_outputConfiguration(void) {
+ printf("=====================\n");
+ printf("Configuration\n");
+ printf("Window size log: %d\n", LDM_WINDOW_SIZE_LOG);
+ printf("Min match, hash length: %d, %d\n",
+ LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
+ printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
+ printf("HASH_ONLY_EVERY: %d\n", HASH_ONLY_EVERY);
+ printf("LDM_LAG %d\n", LDM_LAG);
+ printf("=====================\n");
+}
+
void LDM_readHeader(const void *src, U64 *compressedSize,
U64 *decompressedSize) {
const BYTE *ip = (const BYTE *)src;
cctx->anchor = cctx->ibase;
memset(&(cctx->stats), 0, sizeof(cctx->stats));
- cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32, cctx->ibase);
-
- //HASH_initializeTable(cctx->hashTable, LDM_HASHTABLESIZE_U32);
+ cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64, cctx->ibase);
-// calloc(LDM_HASHTABLESIZE_U32, sizeof(LDM_hashEntry));
-// memset(cctx->hashTable, 0, sizeof(cctx->hashTable));
cctx->stats.minOffset = UINT_MAX;
cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG;
cctx->stats.hashTableSizeLog = LDM_MEMORY_USAGE;
void *dst, size_t maxDstSize) {
LDM_CCtx cctx;
const BYTE *match = NULL;
+// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64);
+ printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
+
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
/* Hash the first position and put it into the hash table. */
LDM_putHashOfCurrentPosition(&cctx);
-#if LAG
+#if LDM_LAG
cctx.lagIp = cctx.ip;
cctx.lagHash = cctx.lastHash;
cctx.lagSum = cctx.lastSum;
#endif
-
/**
* Find a match.
* If no more matches can be found (i.e. the length of the remaining input
cctx.stats.numMatches++;
#endif
+// printf("HERE %zu\n", cctx.ip - cctx.ibase);
/**
* Catch up: look back to extend the match backwards from the found match.
*/
#define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE))
#define LDM_OFFSET_SIZE 4
-// Defines the size of the hash table (currently the number of elements).
-#define LDM_MEMORY_USAGE 12
+// Defines the size of the hash table.
+// Currently this should be less than WINDOW_SIZE_LOG + 4?
+#define LDM_MEMORY_USAGE 24
-#define LDM_WINDOW_SIZE_LOG 30
+//#define LDM_LAG (1 << 23)
+//#define LDM_LAG (1 << 20)
+#define LDM_LAG 0
+
+#define LDM_WINDOW_SIZE_LOG 28
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
-//These should be multiples of four.
-#define LDM_MIN_MATCH_LENGTH 64
-#define LDM_HASH_LENGTH 64
+//These should be multiples of four (and perhaps set to the same values?).
+#define LDM_MIN_MATCH_LENGTH 512
+#define LDM_HASH_LENGTH 512
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;
* The lower four bits of the token encode the match length. With additional
* bytes added similarly to the additional literal length bytes after the offset.
*
- * The last sequence is incomplete and stops right after the lieterals.
+ * The last sequence is incomplete and stops right after the literals.
*
*/
size_t LDM_compress(const void *src, size_t srcSize,
void LDM_readHeader(const void *src, U64 *compressedSize,
U64 *decompressedSize);
+void LDM_outputConfiguration(void);
+
void LDM_test(void);
#endif /* LDM_H */
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
+hash_t HASH_hashU32(U32 value);
+
/**
* Insert an LDM_hashEntry into the bucket corresponding to hash.
*/
*/
void HASH_outputTableOccupancy(const LDM_hashTable *hashTable);
-
#endif /* LDM_HASHTABLE_H */
/* Compress file given by fname and output to oname.
* Returns 0 if successful, error code otherwise.
*
- * TODO: This currently seg faults if the compressed size is > the decompress
+ * TODO: This might seg fault if the compressed size is > the decompress
* size due to the mmapping and output file size allocated to be the input size.
* The compress function should check before writing or buffer writes.
*/
char *src, *dst;
size_t maxCompressedSize, compressedSize;
+ struct timeval tv1, tv2;
+
/* Open the input file. */
if ((fdin = open(fname, O_RDONLY)) < 0) {
perror("Error in file opening");
return 1;
}
- maxCompressedSize = statbuf.st_size + LDM_HEADER_SIZE;
+ maxCompressedSize = (statbuf.st_size + LDM_HEADER_SIZE);
+ // Handle case where compressed size is > decompressed size.
+ // The compress function should check before writing or buffer writes.
+ maxCompressedSize += statbuf.st_size / 255;
/* Go to the location corresponding to the last byte. */
/* TODO: fallocate? */
perror("mmap error for output");
return 1;
}
+ gettimeofday(&tv1, NULL);
compressedSize = LDM_HEADER_SIZE +
LDM_compress(src, statbuf.st_size,
dst + LDM_HEADER_SIZE, maxCompressedSize);
+ gettimeofday(&tv2, NULL);
// Write compress and decompress size to header
// TODO: should depend on LDM_DECOMPRESS_SIZE write32
(unsigned)statbuf.st_size, (unsigned)compressedSize, oname,
(double)compressedSize / (statbuf.st_size) * 100);
+ printf("Total compress time = %.3f seconds, Average compression speed: %.3f MB/s\n",
+ (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
+ (double) (tv2.tv_sec - tv1.tv_sec),
+ ((double)statbuf.st_size / (double) (1 << 20)) /
+ ((double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
+ (double) (tv2.tv_sec - tv1.tv_sec)));
+
+
// Close files.
close(fdin);
close(fdout);
/* Compress */
{
- struct timeval tv1, tv2;
- gettimeofday(&tv1, NULL);
if (compress(inpFilename, ldmFilename)) {
printf("Compress error");
return 1;
}
- gettimeofday(&tv2, NULL);
- printf("Total compress time = %f seconds\n",
- (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
- (double) (tv2.tv_sec - tv1.tv_sec));
}
/* Decompress */