const BYTE *lastPosHashed; /* Last position hashed */
hash_t lastHash; /* Hash corresponding to lastPosHashed */
- U32 lastSum;
+ U64 lastSum;
const BYTE *nextIp; // TODO: this is redundant (ip + step)
const BYTE *nextPosHashed;
U64 nextSum;
-// hash_t nextHash; /* Hash corresponding to nextPosHashed */
-// U32 nextSum;
-
unsigned step; // ip step, should be 1.
const BYTE *lagIp;
U64 lagSum;
-// hash_t lagHash;
-// U32 lagSum;
U64 numHashInserts;
// DEBUG
LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket;
LDM_hashEntry *bestEntry = NULL;
- U32 bestMatchLength = 0;
+ U64 bestMatchLength = 0;
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
const BYTE *pMatch = cur->offset + cctx->ibase;
// Check checksum for faster check.
if (cur->checksum == checksum &&
cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
- U32 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend);
- U32 backwardMatchLength, totalMatchLength;
+ U64 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend);
+ U64 backwardMatchLength, totalMatchLength;
// For speed.
if (forwardMatchLength < LDM_MIN_MATCH_LENGTH) {
return NULL;
}
+#ifdef TMP_EVICTION
+
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
*(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1;
}
+#else
+
+void HASH_insert(LDM_hashTable *table,
+ const hash_t hash, const LDM_hashEntry entry) {
+ *(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
+ table->bucketOffsets[hash]++;
+ table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1;
+}
+#endif // TMP_EVICTION
+
+
U32 HASH_getSize(const LDM_hashTable *table) {
return table->numBuckets;
}
// TODO: This can be done more efficiently (but it is not that important as it
// is only used for computing stats).
-static int intLog2(U32 x) {
+static int intLog2(U64 x) {
int ret = 0;
while (x >>= 1) {
ret++;
return ret;
}
-// Maybe we would eventually prefer to have linear rather than
-// exponential buckets.
-/**
-void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) {
- U32 i = 0;
- int buckets[32] = { 0 };
-
- printf("\n");
- printf("Hash table histogram\n");
- for (; i < HASH_getSize(cctx->hashTable); i++) {
- int offset = (cctx->ip - cctx->ibase) -
- HASH_getEntryFromHash(cctx->hashTable, i)->offset;
- buckets[intLog2(offset)]++;
- }
-
- i = 0;
- for (; i < 32; i++) {
- printf("2^%*d: %10u %6.3f%%\n", 2, i,
- buckets[i],
- 100.0 * (double) buckets[i] /
- (double) HASH_getSize(cctx->hashTable));
- }
- printf("\n");
-}
-*/
-
void LDM_printCompressStats(const LDM_compressStats *stats) {
int i = 0;
printf("=====================\n");
return 1;
}
-#if 0
-hash_t HASH_hashU32(U32 value) {
- return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
-}
-#endif
-
-/**
- * Convert a sum computed from getChecksum to a hash value in the range
- * of the hash table.
- */
-#if 0
-static hash_t checksumToHash(U32 sum) {
- return HASH_hashU32(sum);
-}
-#endif
-
// Upper LDM_HASH_LOG bits.
static hash_t checksumToHash(U64 sum) {
return sum >> (64 - LDM_HASHLOG);
return (hfHash >> (64 - 32 - LDM_HASHLOG)) & 0xFFFFFFFF;
}
-#if 0
-/**
- * Computes a checksum based on rsync's checksum.
- *
- * a(k,l) = \sum_{i = k}^l x_i (mod M)
- * b(k,l) = \sum_{i = k}^l ((l - i + 1) * x_i) (mod M)
- * checksum(k,l) = a(k,l) + 2^{16} * b(k,l)
- */
-static U32 getChecksum(const BYTE *buf, U32 len) {
- U32 i;
- U32 s1, s2;
-
- s1 = s2 = 0;
- for (i = 0; i < (len - 4); i += 4) {
- s2 += (4 * (s1 + buf[i])) + (3 * buf[i + 1]) +
- (2 * buf[i + 2]) + (buf[i + 3]) +
- (10 * CHECKSUM_CHAR_OFFSET);
- s1 += buf[i] + buf[i + 1] + buf[i + 2] + buf[i + 3] +
- + (4 * CHECKSUM_CHAR_OFFSET);
-
- }
- for(; i < len; i++) {
- s1 += buf[i] + CHECKSUM_CHAR_OFFSET;
- s2 += s1;
- }
- return (s1 & 0xffff) + (s2 << 16);
-}
-#endif
-
static U64 getChecksum(const BYTE *buf, U32 len) {
static const U64 prime8bytes = 11400714785074694791ULL;
-// static const U64 prime8bytes = 5;
U64 ret = 0;
U32 i;
for (i = 0; i < len; i++) {
ret *= prime8bytes;
ret += buf[i] + CHECKSUM_CHAR_OFFSET;
-// printf("HERE %llu\n", ret);
}
return ret;
}
-#if 0
-/**
- * Update a checksum computed from getChecksum(data, len).
- *
- * The checksum can be updated along its ends as follows:
- * a(k+1, l+1) = (a(k,l) - x_k + x_{l+1}) (mod M)
- * b(k+1, l+1) = (b(k,l) - (l-k+1)*x_k + (a(k+1,l+1)) (mod M)
- *
- * Thus toRemove should correspond to data[0].
- */
-static U32 updateChecksum(U32 sum, U32 len,
- BYTE toRemove, BYTE toAdd) {
- U32 s1 = (sum & 0xffff) - toRemove + toAdd;
- U32 s2 = (sum >> 16) - ((toRemove + CHECKSUM_CHAR_OFFSET) * len) + s1;
-
- return (s1 & 0xffff) + (s2 << 16);
-}
-#endif
-
static U64 ipow(U64 base, U64 exp) {
U64 ret = 1;
while (exp) {
// TODO: deduplicate.
static const U64 prime8bytes = 11400714785074694791ULL;
+ // TODO: relying on compiler optimization here.
+ // The exponential can be calculated explicitly.
sum -= ((toRemove + CHECKSUM_CHAR_OFFSET) *
ipow(prime8bytes, len - 1));
sum *= prime8bytes;
*/
static void setNextHash(LDM_CCtx *cctx) {
#ifdef RUN_CHECKS
- U32 check;
+ U64 check;
if ((cctx->nextIp - cctx->ibase != 1) &&
(cctx->nextIp - cctx->DEBUG_setNextHash != 1)) {
printf("CHECK debug fail: %zu %zu\n", cctx->nextIp - cctx->ibase,
cctx->DEBUG_setNextHash = cctx->nextIp;
#endif
-// cctx->nextSum = getChecksum((const char *)cctx->nextIp, LDM_HASH_LENGTH);
cctx->nextSum = updateChecksum(
cctx->lastSum, LDM_HASH_LENGTH,
cctx->lastPosHashed[0],
cctx->lastPosHashed[LDM_HASH_LENGTH]);
cctx->nextPosHashed = cctx->nextIp;
-#if 0
- cctx->nextHash = checksumToHash(cctx->nextSum);
-#endif
-
#if LDM_LAG
// printf("LDM_LAG %zu\n", cctx->ip - cctx->lagIp);
cctx->lagSum, LDM_HASH_LENGTH,
cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]);
cctx->lagIp++;
-#if 0
- cctx->lagHash = checksumToHash(cctx->lagSum);
-#endif
}
#endif
check = getChecksum(cctx->nextIp, LDM_HASH_LENGTH);
if (check != cctx->nextSum) {
- printf("CHECK: setNextHash failed %u %u\n", check, cctx->nextSum);
+ printf("CHECK: setNextHash failed %llu %llu\n", check, cctx->nextSum);
}
if ((cctx->nextIp - cctx->lastPosHashed) != 1) {
static void putHashOfCurrentPositionFromHash(LDM_CCtx *cctx, U64 hfHash) {
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
// Note: this works only when cctx->step is 1.
- U32 hash = checksumToHash(hfHash);
- U32 sum = checksumFromHfHash(hfHash);
// printf("TMP %u %u %llu\n", hash, sum, hfHash);
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
#if LDM_LAG
// TODO: off by 1, but whatever
if (cctx->lagIp - cctx->ibase > 0) {
- const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, cctx->lagSum };
- HASH_insert(cctx->hashTable, cctx->lagHash, entry);
+ U32 hash = checksumToHash(cctx->lagSum);
+ U32 sum = checksumFromHfHash(cctx->lagSum);
+ const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, sum };
+ HASH_insert(cctx->hashTable, hash, entry);
} else {
+ U32 hash = checksumToHash(hfHash);
+ U32 sum = checksumFromHfHash(hfHash);
+
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
HASH_insert(cctx->hashTable, hash, entry);
}
#else
+ U32 hash = checksumToHash(hfHash);
+ U32 sum = checksumFromHfHash(hfHash);
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
HASH_insert(cctx->hashTable, hash, entry);
#endif
}
cctx->lastPosHashed = cctx->ip;
-#if 0
- cctx->lastHash = hash;
-#endif
cctx->lastSum = hfHash;
}
printf("CHECK failed: updateLastHashFromNextHash %zu\n",
cctx->ip - cctx->ibase);
}
-#endif
-#if 0
- putHashOfCurrentPositionFromHash(cctx, cctx->nextHash, cctx->nextSum);
#endif
putHashOfCurrentPositionFromHash(cctx, cctx->nextSum);
}
* Insert hash of the current position into the hash table.
*/
static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) {
- U32 sum = getChecksum(cctx->ip, LDM_HASH_LENGTH);
-#if 0
- hash_t hash = checksumToHash(sum);
-#endif
+ U64 sum = getChecksum(cctx->ip, LDM_HASH_LENGTH);
#ifdef RUN_CHECKS
if (cctx->nextPosHashed != cctx->ip && (cctx->ip != cctx->ibase)) {
cctx->ip - cctx->ibase);
}
#endif
-#if 0
- putHashOfCurrentPositionFromHash(cctx, hash, sum);
-#endif
+
putHashOfCurrentPositionFromHash(cctx, sum);
}
-U32 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
+U64 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
const BYTE *pInLimit) {
const BYTE * const pStart = pIn;
while (pIn < pInLimit - 1) {
pMatch++;
continue;
}
- return (U32)(pIn - pStart);
+ return (U64)(pIn - pStart);
}
- return (U32)(pIn - pStart);
+ return (U64)(pIn - pStart);
}
void LDM_outputConfiguration(void) {
U64 hash;
U32 sum;
setNextHash(cctx);
-#if 0
- h = cctx->nextHash;
- sum = cctx->nextSum;
-#endif
hash = cctx->nextSum;
h = checksumToHash(hash);
sum = checksumFromHfHash(hash);