]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Refactor existing functions to use posInSequence
authorsenhuang42 <senhuang96@fb.com>
Thu, 1 Oct 2020 14:12:21 +0000 (10:12 -0400)
committersenhuang42 <senhuang96@fb.com>
Wed, 7 Oct 2020 17:56:25 +0000 (13:56 -0400)
lib/compress/zstd_opt.c

index 5f7466b9b931abed47614eed0751466c10fafa37..bafff82610a569eb8c05fd0e24993224322eae4a 100644 (file)
@@ -768,6 +768,53 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
 *  LDM helper functions
 *********************************/
 
+/* Skips past srcSize bytes in an ldm seqstore */
+static void ldm_skipBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t bytesToSkip) {
+    while (bytesToSkip > 0 && ldmSeqStore->pos < ldmSeqStore->size) {
+        rawSeq* seq = ldmSeqStore->seq + ldmSeqStore->pos;
+        if (bytesToSkip <= seq->litLength) {
+            /* Skip past srcSize literals */
+            seq->litLength -= (U32)bytesToSkip;
+            return;
+        }
+        bytesToSkip -= seq->litLength;
+        seq->litLength = 0;
+        if (bytesToSkip < seq->matchLength) {
+            seq->matchLength -= (U32)bytesToSkip;
+            return;
+        }
+        bytesToSkip -= seq->matchLength;
+        seq->matchLength = 0;
+        ldmSeqStore->pos++;
+    }
+}
+
+/* Splits a sequence if it's across the boundary. May update pos in the seq store too
+ * Pretty much the same function as maybeSplitSequence() in zstd_ldm.c
+ */
+static rawSeq ldm_splitSequenceAndUpdateSeqStore(rawSeqStore_t* ldmSeqStore, U32 remainingBytes) {
+    rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos];
+    /* Case where don't split the match*/
+    if (remainingBytes >= currSeq.litLength + currSeq.matchLength) {
+        ldmSeqStore->pos++;
+        return currSeq;
+    }
+    /* Need a split */
+    if (remainingBytes <= currSeq.litLength) {
+        currSeq.offset = 0;
+    } else if (remainingBytes < currSeq.litLength + currSeq.matchLength) {
+        currSeq.matchLength = remainingBytes - currSeq.litLength;
+    }
+
+    /* After deriving currSeq which is the sequence before the block boundary,
+     * we now must skip past the remaining number of bytes unaccounted for,
+     * and update the entry at pos in the seqStore, which represents the second half
+     * of the sequence after the block boundary
+     */
+    ldm_skipBytesInSeqStore(ldmSeqStore, remainingBytes);
+    return currSeq;
+}
+
 /* Moves forward in rawSeqStore by nbBytes bytes, which will updating the fields
  * 'pos' and 'posInSequence' accordingly.
  */
@@ -780,6 +827,7 @@ static void ldm_moveForwardBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t nb
             ldmSeqStore->posInSequence += nbBytes;
             return;
         } else {
+            ldmSeqStore->posInSequence += currSeq.litLength;
             nbBytes -= currSeq.litLength;
         }
 
@@ -788,38 +836,49 @@ static void ldm_moveForwardBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t nb
             return;
         } else {
             nbBytes -= currSeq.matchLength;
+            /* We have moved through this entire sequence - move the read pos
+               forward to the next sequence, and reset posInSequence */
             ldmSeqStore->pos++;
             ldmSeqStore->posInSequence = 0;
         }
     }
 }
 
+/* Calculates the beginning and end of a match, and updates ldmSeqStore as
+ * necessary.
+ * posInSequence can be either within the literals section, or within a match.
+ * If 
+ */
 static void ldm_calculateMatchRange(rawSeqStore_t* ldmSeqStore,
                             U32* matchStartPosInBlock, U32* matchEndPosInBlock,
                             U32* matchOffset, U32 currPosInBlock,
-                            U32 remainingBytes, U32 currBlockEndPos) {
+                            U32 blockBytesRemaining) {
     rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos];
-    U32 literalsBytesLeft = (ldmSeqStore->posInSequence < currSeq.litLength) ?
+    U32 currBlockEndPos = currPosInBlock + blockBytesRemaining;
+    U32 literalsBytesRemaining = (ldmSeqStore->posInSequence < currSeq.litLength) ?
                                     currSeq.litLength - ldmSeqStore->posInSequence :
                                     0;
+
     /* In this case, the match is further in the block than currPosInBlock, and we are
        currently in the literals section of the LDM */
-    if (literalsBytesLeft) {
-        if (literalsBytesLeft >= remainingBytes) {
+    if (literalsBytesRemaining) {
+        if (literalsBytesRemaining >= blockBytesRemaining) {
             /* If there are more literal bytes than bytes remaining in block, no ldm */
             *matchStartPosInBlock = UINT_MAX;
             *matchEndPosInBlock = UINT_MAX;
-            ldm_moveForwardBytesInSeqStore(ldmSeqStore, remainingBytes);
+            ldm_moveForwardBytesInSeqStore(ldmSeqStore, blockBytesRemaining);
             return;
         }  
     }
 
-    *matchStartPosInBlock = currPosInBlock + currSeq.litLength;
+    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+       when we are deciding whether or not to add the ldm */
+    *matchStartPosInBlock = currPosInBlock + literalsBytesRemaining;
     *matchEndPosInBlock = *matchStartPosInBlock + currSeq.matchLength;
     *matchOffset = currSeq.offset;
 
-    /* Match ends after the block ends, we can't use the whole match */
     if (*matchEndPosInBlock > currBlockEndPos) {
+        /* Match ends after the block ends, we can't use the whole match */
         *matchEndPosInBlock = currBlockEndPos;
         ldm_moveForwardBytesInSeqStore(ldmSeqStore, currBlockEndPos - currPosInBlock);
     } else {
@@ -829,53 +888,6 @@ static void ldm_calculateMatchRange(rawSeqStore_t* ldmSeqStore,
     }
 }
 
-/* Skips past srcSize bytes in an ldm seqstore */
-static void ldm_skipBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t bytesToSkip) {
-    while (bytesToSkip > 0 && ldmSeqStore->pos < ldmSeqStore->size) {
-        rawSeq* seq = ldmSeqStore->seq + ldmSeqStore->pos;
-        if (bytesToSkip <= seq->litLength) {
-            /* Skip past srcSize literals */
-            seq->litLength -= (U32)bytesToSkip;
-            return;
-        }
-        bytesToSkip -= seq->litLength;
-        seq->litLength = 0;
-        if (bytesToSkip < seq->matchLength) {
-            seq->matchLength -= (U32)bytesToSkip;
-            return;
-        }
-        bytesToSkip -= seq->matchLength;
-        seq->matchLength = 0;
-        ldmSeqStore->pos++;
-    }
-}
-
-/* Splits a sequence if it's across the boundary. May update pos in the seq store too
- * Pretty much the same function as maybeSplitSequence() in zstd_ldm.c
- */
-static rawSeq ldm_splitSequenceAndUpdateSeqStore(rawSeqStore_t* ldmSeqStore, U32 remainingBytes) {
-    rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos];
-    /* Case where don't split the match*/
-    if (remainingBytes >= currSeq.litLength + currSeq.matchLength) {
-        ldmSeqStore->pos++;
-        return currSeq;
-    }
-    /* Need a split */
-    if (remainingBytes <= currSeq.litLength) {
-        currSeq.offset = 0;
-    } else if (remainingBytes < currSeq.litLength + currSeq.matchLength) {
-        currSeq.matchLength = remainingBytes - currSeq.litLength;
-    }
-
-    /* After deriving currSeq which is the sequence before the block boundary,
-     * we now must skip past the remaining number of bytes unaccounted for,
-     * and update the entry at pos in the seqStore, which represents the second half
-     * of the sequence after the block boundary
-     */
-    ldm_skipBytesInSeqStore(ldmSeqStore, remainingBytes);
-    return currSeq;
-}
-
 /* Fetch the next match in the ldm seq store */
 static void ldm_getNextMatch(rawSeqStore_t* ldmSeqStore,
                             U32* matchStartPosInBlock, U32* matchEndPosInBlock,
@@ -896,10 +908,7 @@ static void ldm_getNextMatch(rawSeqStore_t* ldmSeqStore,
         return;
     }*/
 
-    ldm_calculateMatchRange(ldmSeqStore, matchStartPosInBlock, matchEndPosInBlock, matchOffset, remainingBytes);
-    *matchStartPosInBlock = currPosInBlock + seq.litLength;
-    *matchEndPosInBlock = *matchStartPosInBlock + seq.matchLength;
-    *matchOffset = seq.offset;
+    ldm_calculateMatchRange(ldmSeqStore, matchStartPosInBlock, matchEndPosInBlock, matchOffset, currPosInBlock, remainingBytes);
     return;
 }
 
@@ -970,8 +979,8 @@ static void ldm_handleLdm(rawSeqStore_t* ldmSeqStore, ZSTD_match_t* matches, U32
             ldm_moveForwardBytesInSeqStore(ldmSeqStore, posOvershoot);
         } 
         ldm_getNextMatch(ldmSeqStore, matchStartPosInBlock,
-                                          matchEndPosInBlock, matchOffset,
-                                          currPosInBlock, remainingBytes);
+                         matchEndPosInBlock, matchOffset,
+                         currPosInBlock, remainingBytes);
     }
     ldm_maybeAddLdm(matches, nbMatches, *matchStartPosInBlock, *matchEndPosInBlock, *matchOffset, currPosInBlock);
 }
@@ -1034,16 +1043,16 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
     U32 ldmEndPosInBlock = 0;
     U32 ldmOffset = 0;
     
-    /*if (ms->ldmSeqStore.size > 0 && ms->ldmSeqStore.pos != ms->ldmSeqStore.size) {
-        if (ms->ldmSeqStore.base != base) {
+    if (ms->ldmSeqStore.size > 0 && ms->ldmSeqStore.pos != ms->ldmSeqStore.size) {
+        /*if (ms->ldmSeqStore.base != base) {
             int baseDiff = (int)(ms->ldmSeqStore.base - base);
             ms->ldmSeqStore.seq[ms->ldmSeqStore.pos].litLength += baseDiff;
             ms->ldmSeqStore.base = base;
-        }
+        }*/
         ldm_getNextMatch(&ms->ldmSeqStore, &ldmStartPosInBlock,
                          &ldmEndPosInBlock, &ldmOffset,
                          (U32)(ip-istart), (U32)(iend-ip));
-    }*/
+    }
     /* init */
     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
@@ -1304,7 +1313,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
     if (ldmEndPosInBlock < srcSize) {
         /* This can occur if after adding the final match in an ldm seq store within this block,
         ip goes to the end of the block without activating a check for ldm_getNextMatch */
-        ldm_moveForwardBytesInSeqStore(ms->ldmSeqStore, srcSize - ldmEndPosInBlock);
+        ldm_moveForwardBytesInSeqStore(&ms->ldmSeqStore, srcSize - ldmEndPosInBlock);
     }
     /* Return the last literals size */
     return (size_t)(iend - anchor);