]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Avoid recounting match lengths with ZSTD_count
authorStella Lau <laus@fb.com>
Wed, 19 Jul 2017 01:35:25 +0000 (18:35 -0700)
committerStella Lau <laus@fb.com>
Wed, 19 Jul 2017 01:35:25 +0000 (18:35 -0700)
contrib/long_distance_matching/basic_table.c
contrib/long_distance_matching/circular_buffer_table.c
contrib/long_distance_matching/ldm.c
contrib/long_distance_matching/ldm_hashtable.h

index 6c12b50879298dd9fc76921555ea9c70064deca7..30c548d2a172cc1e0e4235d19d30981d44f4096c 100644 (file)
@@ -62,12 +62,16 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
                                   const BYTE *pIn,
                                   const BYTE *pEnd,
                                   U32 minMatchLength,
-                                  U32 maxWindowSize) {
+                                  U32 maxWindowSize,
+                                  U32 *matchLength) {
   LDM_hashEntry *entry = getBucket(table, hash);
   (void)checksum;
   (void)pEnd;
+  (void)matchLength;
+  // TODO: Count the entire forward match length rather than check if valid.
   if (isValidMatch(pIn, entry->offset + table->offsetBase,
                    minMatchLength, maxWindowSize)) {
+
     return entry;
   }
   return NULL;
index 653d9e51bf3ebb58cd5cc7b99c590f19cb125089..104d1b33961fa95697633341de557c8f056f1d86 100644 (file)
 // TODO: rename. Number of hash buckets.
 #define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
 
-//#define TMP_ZSTDTOGGLE
+#define TMP_ZSTDTOGGLE
 
 struct LDM_hashTable {
   U32 size;  // Number of buckets
   U32 maxEntries;  // Rename...
   LDM_hashEntry *entries;  // 1-D array for now.
+  BYTE *bucketOffsets;     // Pointer to current insert position.
 
   // Position corresponding to offset=0 in LDM_hashEntry.
   const BYTE *offsetBase;
-  BYTE *bucketOffsets;     // Pointer to current insert position.
-                           // Last insert was at bucketOffsets - 1?
 };
 
 LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
@@ -174,7 +173,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
                                   const BYTE *pIn,
                                   const BYTE *pEnd,
                                   U32 minMatchLength,
-                                  U32 maxWindowSize) {
+                                  U32 maxWindowSize,
+                                  U32 *matchLength) {
   LDM_hashEntry *bucket = getBucket(table, hash);
   LDM_hashEntry *cur = bucket;
   // TODO: in order of recency?
@@ -183,8 +183,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
     const BYTE *pMatch = cur->offset + table->offsetBase;
 #ifdef TMP_ZSTDTOGGLE
     if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) {
-      U32 matchLength = ZSTD_count(pIn, pMatch, pEnd);
-      if (matchLength >= minMatchLength) {
+      U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd);
+      if (forwardMatchLength >= minMatchLength) {
+        *matchLength = forwardMatchLength;
         return cur;
       }
     }
index 56b22d288db6f752e756f1431d57ef65f0d9a889..1512ab8c5db5e80a250a409152c39461a405af66 100644 (file)
@@ -28,6 +28,7 @@
 
 //#define HASH_CHECK
 //#define RUN_CHECKS
+//#define TMP_RECOMPUTE_LENGTHS
 
 #include "ldm.h"
 #include "ldm_hashtable.h"
@@ -435,8 +436,10 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) {
  * Returns 0 if successful and 1 otherwise (i.e. no match can be found
  * in the remaining input that is long enough).
  *
+ * matchLength contains the forward length of the match.
  */
-static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
+static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
+                             U32 *matchLength) {
 
   LDM_hashEntry *entry = NULL;
   cctx->nextIp = cctx->ip + cctx->step;
@@ -459,7 +462,8 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
     entry = HASH_getValidEntry(cctx->hashTable, h, sum,
                                cctx->ip, cctx->iend,
                                LDM_MIN_MATCH_LENGTH,
-                               LDM_WINDOW_SIZE);
+                               LDM_WINDOW_SIZE,
+                               matchLength);
 #endif
 
     if (entry != NULL) {
@@ -535,8 +539,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
                     void *dst, size_t maxDstSize) {
   LDM_CCtx cctx;
   const BYTE *match = NULL;
-//  printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64);
-//  printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
+  U32 forwardMatchLength = 0;
 
   LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
   LDM_outputConfiguration();
@@ -555,7 +558,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
    * is less than the minimum match length), then stop searching for matches
    * and encode the final literals.
    */
-  while (LDM_findBestMatch(&cctx, &match) == 0) {
+  while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength) == 0) {
     U32 backwardsMatchLen = 0;
 #ifdef COMPUTE_STATS
     cctx.stats.numMatches++;
@@ -577,10 +580,15 @@ size_t LDM_compress(const void *src, size_t srcSize,
     {
       const U32 literalLength = cctx.ip - cctx.anchor;
       const U32 offset = cctx.ip - match;
+#ifdef TMP_RECOMPUTE_LENGTHS
       const U32 matchLength = LDM_countMatchLength(
           cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
           match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
           cctx.ihashLimit) + backwardsMatchLen;
+#else
+      const U32 matchLength = forwardMatchLength + backwardsMatchLen -
+                              LDM_MIN_MATCH_LENGTH;
+#endif
 
       LDM_outputBlock(&cctx, literalLength, offset, matchLength);
 
index 7566751dcc7edefab84fbb1efba535c0333e6c14..2ea159f7169feba5171d0a213432d9ec117d9beb 100644 (file)
@@ -41,8 +41,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
                                   const U32 checksum,
                                   const BYTE *pIn,
                                   const BYTE *pEnd,
-                                  U32 minMatchLength,
-                                  U32 maxWindowSize);
+                                  const U32 minMatchLength,
+                                  const U32 maxWindowSize,
+                                  U32 *matchLength);
 
 hash_t HASH_hashU32(U32 value);