From 97aabc496e57821cb46893bd0a73f48ff6bc20b7 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 9 May 2022 17:17:11 -0400 Subject: [PATCH] Correct and clarify repcode offset history logic --- lib/compress/zstd_double_fast.c | 19 +++++++++++-------- lib/compress/zstd_fast.c | 32 ++++++++++++++++++++------------ lib/compress/zstd_lazy.c | 17 +++++++++++------ 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 610a1b3ec..d619f119b 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -67,7 +67,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; + U32 offsetSaved1 = 0, offsetSaved2 = 0; size_t mLength; U32 offset; @@ -100,8 +100,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( U32 const current = (U32)(ip - base); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; } /* Outer Loop: one iteration per match found and stored */ @@ -175,9 +175,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic( } while (ip1 <= ilimit); _cleanup: + /* If rep_offset1 started invalid (offsetSaved1 > 0) and became valid (rep_offset1 > 0), + * rotate saved offsets. */ + offsetSaved2 = ((offsetSaved1 > 0) & (offset_1 > 0)) ? offsetSaved1 : offsetSaved2; + /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; + rep[0] = offset_1 ? offset_1 : offsetSaved1; + rep[1] = offset_2 ? offset_2 : offsetSaved2; /* Return the last literals size */ return (size_t)(iend - anchor); @@ -275,7 +279,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; const ZSTD_matchState_t* const dms = ms->dictMatchState; const ZSTD_compressionParameters* const dictCParams = &dms->cParams; @@ -461,8 +464,8 @@ _match_stored: } /* while (ip < ilimit) */ /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; + rep[0] = offset_1; + rep[1] = offset_2; /* Return the last literals size */ return (size_t)(iend - anchor); diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 198ccaee3..36ea4f4e1 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -117,7 +117,7 @@ ZSTD_compressBlock_fast_noDict_generic( U32 rep_offset1 = rep[0]; U32 rep_offset2 = rep[1]; - U32 offsetSaved = 0; + U32 offsetSaved1 = 0, offsetSaved2 = 0; size_t hash0; /* hash for ip0 */ size_t hash1; /* hash for ip1 */ @@ -141,8 +141,8 @@ ZSTD_compressBlock_fast_noDict_generic( { U32 const curr = (U32)(ip0 - base); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); U32 const maxRep = curr - windowLow; - if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; - if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; + if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0; + if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0; } /* start each op */ @@ -254,9 +254,13 @@ _cleanup: * However, it seems to be a meaningful performance hit to try to search * them. So let's not. */ + /* If rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), + * rotate saved offsets. */ + offsetSaved2 = ((offsetSaved1 > 0) & (rep_offset1 > 0)) ? offsetSaved1 : offsetSaved2; + /* save reps for next block */ - rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; - rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; + rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1; + rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2; /* Return the last literals size */ return (size_t)(iend - anchor); @@ -388,7 +392,6 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; const ZSTD_matchState_t* const dms = ms->dictMatchState; const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; @@ -545,8 +548,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( _cleanup: /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; + rep[0] = offset_1; + rep[1] = offset_2; /* Return the last literals size */ return (size_t)(iend - anchor); @@ -603,6 +606,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved1 = 0, offsetSaved2 = 0; const BYTE* ip0 = istart; const BYTE* ip1; @@ -635,8 +639,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( { U32 const curr = (U32)(ip0 - base); U32 const maxRep = curr - dictStartIndex; - if (offset_2 >= maxRep) offset_2 = 0; - if (offset_1 >= maxRep) offset_1 = 0; + if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0; + if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0; } /* start each op */ @@ -758,9 +762,13 @@ _cleanup: * However, it seems to be a meaningful performance hit to try to search * them. So let's not. */ + /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), + * rotate saved offsets. */ + offsetSaved2 = ((offsetSaved1 > 0) & (offset_1 > 0)) ? offsetSaved1 : offsetSaved2; + /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : rep[0]; - rep[1] = offset_2 ? offset_2 : rep[1]; + rep[0] = offset_1 ? offset_1 : offsetSaved1; + rep[1] = offset_2 ? offset_2 : offsetSaved2; /* Return the last literals size */ return (size_t)(iend - anchor); diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 7f97e12a5..9814943d1 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1461,7 +1461,8 @@ ZSTD_compressBlock_lazy_generic( const BYTE* const prefixLowest = base + prefixLowestIndex; searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, dictMode)->searchMax; - U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + U32 offset_1 = rep[0], offset_2 = rep[1]; + U32 offsetSaved1 = 0, offsetSaved2 = 0; const int isDMS = dictMode == ZSTD_dictMatchState; const int isDDS = dictMode == ZSTD_dedicatedDictSearch; @@ -1484,8 +1485,8 @@ ZSTD_compressBlock_lazy_generic( U32 const curr = (U32)(ip - base); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); U32 const maxRep = curr - windowLow; - if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; - if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; } if (isDxS) { /* dictMatchState repCode checks don't currently handle repCode == 0 @@ -1681,9 +1682,13 @@ _storeSequence: continue; /* faster when present ... (?) */ } } } - /* Save reps for next block */ - rep[0] = offset_1 ? offset_1 : savedOffset; - rep[1] = offset_2 ? offset_2 : savedOffset; + /* If offset_1 started invalid (offsetSaved1 > 0) and became valid (offset_1 > 0), + * rotate saved offsets. */ + offsetSaved2 = ((offsetSaved1 > 0) & (offset_1 > 0)) ? offsetSaved1 : offsetSaved2; + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved1; + rep[1] = offset_2 ? offset_2 : offsetSaved2; /* Return the last literals size */ return (size_t)(iend - anchor); -- 2.47.2