From: senhuang42 Date: Thu, 1 Oct 2020 14:12:21 +0000 (-0400) Subject: Refactor existing functions to use posInSequence X-Git-Tag: v1.4.7~57^2~21 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0718aa70df196b5854ce4e40c4d5abf8e846ad19;p=thirdparty%2Fzstd.git Refactor existing functions to use posInSequence --- diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 5f7466b9b..bafff8261 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -768,6 +768,53 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( * LDM helper functions *********************************/ +/* Skips past srcSize bytes in an ldm seqstore */ +static void ldm_skipBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t bytesToSkip) { + while (bytesToSkip > 0 && ldmSeqStore->pos < ldmSeqStore->size) { + rawSeq* seq = ldmSeqStore->seq + ldmSeqStore->pos; + if (bytesToSkip <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)bytesToSkip; + return; + } + bytesToSkip -= seq->litLength; + seq->litLength = 0; + if (bytesToSkip < seq->matchLength) { + seq->matchLength -= (U32)bytesToSkip; + return; + } + bytesToSkip -= seq->matchLength; + seq->matchLength = 0; + ldmSeqStore->pos++; + } +} + +/* Splits a sequence if it's across the boundary. May update pos in the seq store too + * Pretty much the same function as maybeSplitSequence() in zstd_ldm.c + */ +static rawSeq ldm_splitSequenceAndUpdateSeqStore(rawSeqStore_t* ldmSeqStore, U32 remainingBytes) { + rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos]; + /* Case where don't split the match*/ + if (remainingBytes >= currSeq.litLength + currSeq.matchLength) { + ldmSeqStore->pos++; + return currSeq; + } + /* Need a split */ + if (remainingBytes <= currSeq.litLength) { + currSeq.offset = 0; + } else if (remainingBytes < currSeq.litLength + currSeq.matchLength) { + currSeq.matchLength = remainingBytes - currSeq.litLength; + } + + /* After deriving currSeq which is the sequence before the block boundary, + * we now must skip past the remaining number of bytes unaccounted for, + * and update the entry at pos in the seqStore, which represents the second half + * of the sequence after the block boundary + */ + ldm_skipBytesInSeqStore(ldmSeqStore, remainingBytes); + return currSeq; +} + /* Moves forward in rawSeqStore by nbBytes bytes, which will updating the fields * 'pos' and 'posInSequence' accordingly. */ @@ -780,6 +827,7 @@ static void ldm_moveForwardBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t nb ldmSeqStore->posInSequence += nbBytes; return; } else { + ldmSeqStore->posInSequence += currSeq.litLength; nbBytes -= currSeq.litLength; } @@ -788,38 +836,49 @@ static void ldm_moveForwardBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t nb return; } else { nbBytes -= currSeq.matchLength; + /* We have moved through this entire sequence - move the read pos + forward to the next sequence, and reset posInSequence */ ldmSeqStore->pos++; ldmSeqStore->posInSequence = 0; } } } +/* Calculates the beginning and end of a match, and updates ldmSeqStore as + * necessary. + * posInSequence can be either within the literals section, or within a match. + * If + */ static void ldm_calculateMatchRange(rawSeqStore_t* ldmSeqStore, U32* matchStartPosInBlock, U32* matchEndPosInBlock, U32* matchOffset, U32 currPosInBlock, - U32 remainingBytes, U32 currBlockEndPos) { + U32 blockBytesRemaining) { rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos]; - U32 literalsBytesLeft = (ldmSeqStore->posInSequence < currSeq.litLength) ? + U32 currBlockEndPos = currPosInBlock + blockBytesRemaining; + U32 literalsBytesRemaining = (ldmSeqStore->posInSequence < currSeq.litLength) ? currSeq.litLength - ldmSeqStore->posInSequence : 0; + /* In this case, the match is further in the block than currPosInBlock, and we are currently in the literals section of the LDM */ - if (literalsBytesLeft) { - if (literalsBytesLeft >= remainingBytes) { + if (literalsBytesRemaining) { + if (literalsBytesRemaining >= blockBytesRemaining) { /* If there are more literal bytes than bytes remaining in block, no ldm */ *matchStartPosInBlock = UINT_MAX; *matchEndPosInBlock = UINT_MAX; - ldm_moveForwardBytesInSeqStore(ldmSeqStore, remainingBytes); + ldm_moveForwardBytesInSeqStore(ldmSeqStore, blockBytesRemaining); return; } } - *matchStartPosInBlock = currPosInBlock + currSeq.litLength; + /* Matches may be < MINMATCH by this process. In that case, we will reject them + when we are deciding whether or not to add the ldm */ + *matchStartPosInBlock = currPosInBlock + literalsBytesRemaining; *matchEndPosInBlock = *matchStartPosInBlock + currSeq.matchLength; *matchOffset = currSeq.offset; - /* Match ends after the block ends, we can't use the whole match */ if (*matchEndPosInBlock > currBlockEndPos) { + /* Match ends after the block ends, we can't use the whole match */ *matchEndPosInBlock = currBlockEndPos; ldm_moveForwardBytesInSeqStore(ldmSeqStore, currBlockEndPos - currPosInBlock); } else { @@ -829,53 +888,6 @@ static void ldm_calculateMatchRange(rawSeqStore_t* ldmSeqStore, } } -/* Skips past srcSize bytes in an ldm seqstore */ -static void ldm_skipBytesInSeqStore(rawSeqStore_t* ldmSeqStore, size_t bytesToSkip) { - while (bytesToSkip > 0 && ldmSeqStore->pos < ldmSeqStore->size) { - rawSeq* seq = ldmSeqStore->seq + ldmSeqStore->pos; - if (bytesToSkip <= seq->litLength) { - /* Skip past srcSize literals */ - seq->litLength -= (U32)bytesToSkip; - return; - } - bytesToSkip -= seq->litLength; - seq->litLength = 0; - if (bytesToSkip < seq->matchLength) { - seq->matchLength -= (U32)bytesToSkip; - return; - } - bytesToSkip -= seq->matchLength; - seq->matchLength = 0; - ldmSeqStore->pos++; - } -} - -/* Splits a sequence if it's across the boundary. May update pos in the seq store too - * Pretty much the same function as maybeSplitSequence() in zstd_ldm.c - */ -static rawSeq ldm_splitSequenceAndUpdateSeqStore(rawSeqStore_t* ldmSeqStore, U32 remainingBytes) { - rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos]; - /* Case where don't split the match*/ - if (remainingBytes >= currSeq.litLength + currSeq.matchLength) { - ldmSeqStore->pos++; - return currSeq; - } - /* Need a split */ - if (remainingBytes <= currSeq.litLength) { - currSeq.offset = 0; - } else if (remainingBytes < currSeq.litLength + currSeq.matchLength) { - currSeq.matchLength = remainingBytes - currSeq.litLength; - } - - /* After deriving currSeq which is the sequence before the block boundary, - * we now must skip past the remaining number of bytes unaccounted for, - * and update the entry at pos in the seqStore, which represents the second half - * of the sequence after the block boundary - */ - ldm_skipBytesInSeqStore(ldmSeqStore, remainingBytes); - return currSeq; -} - /* Fetch the next match in the ldm seq store */ static void ldm_getNextMatch(rawSeqStore_t* ldmSeqStore, U32* matchStartPosInBlock, U32* matchEndPosInBlock, @@ -896,10 +908,7 @@ static void ldm_getNextMatch(rawSeqStore_t* ldmSeqStore, return; }*/ - ldm_calculateMatchRange(ldmSeqStore, matchStartPosInBlock, matchEndPosInBlock, matchOffset, remainingBytes); - *matchStartPosInBlock = currPosInBlock + seq.litLength; - *matchEndPosInBlock = *matchStartPosInBlock + seq.matchLength; - *matchOffset = seq.offset; + ldm_calculateMatchRange(ldmSeqStore, matchStartPosInBlock, matchEndPosInBlock, matchOffset, currPosInBlock, remainingBytes); return; } @@ -970,8 +979,8 @@ static void ldm_handleLdm(rawSeqStore_t* ldmSeqStore, ZSTD_match_t* matches, U32 ldm_moveForwardBytesInSeqStore(ldmSeqStore, posOvershoot); } ldm_getNextMatch(ldmSeqStore, matchStartPosInBlock, - matchEndPosInBlock, matchOffset, - currPosInBlock, remainingBytes); + matchEndPosInBlock, matchOffset, + currPosInBlock, remainingBytes); } ldm_maybeAddLdm(matches, nbMatches, *matchStartPosInBlock, *matchEndPosInBlock, *matchOffset, currPosInBlock); } @@ -1034,16 +1043,16 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 ldmEndPosInBlock = 0; U32 ldmOffset = 0; - /*if (ms->ldmSeqStore.size > 0 && ms->ldmSeqStore.pos != ms->ldmSeqStore.size) { - if (ms->ldmSeqStore.base != base) { + if (ms->ldmSeqStore.size > 0 && ms->ldmSeqStore.pos != ms->ldmSeqStore.size) { + /*if (ms->ldmSeqStore.base != base) { int baseDiff = (int)(ms->ldmSeqStore.base - base); ms->ldmSeqStore.seq[ms->ldmSeqStore.pos].litLength += baseDiff; ms->ldmSeqStore.base = base; - } + }*/ ldm_getNextMatch(&ms->ldmSeqStore, &ldmStartPosInBlock, &ldmEndPosInBlock, &ldmOffset, (U32)(ip-istart), (U32)(iend-ip)); - }*/ + } /* init */ DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); @@ -1304,7 +1313,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ if (ldmEndPosInBlock < srcSize) { /* This can occur if after adding the final match in an ldm seq store within this block, ip goes to the end of the block without activating a check for ldm_getNextMatch */ - ldm_moveForwardBytesInSeqStore(ms->ldmSeqStore, srcSize - ldmEndPosInBlock); + ldm_moveForwardBytesInSeqStore(&ms->ldmSeqStore, srcSize - ldmEndPosInBlock); } /* Return the last literals size */ return (size_t)(iend - anchor);