From: Federico Maresca <62501138+Federico-Maresca@users.noreply.github.com> Date: Wed, 29 May 2024 16:35:24 +0000 (+0200) Subject: Refactor dictionary matchfinder index safety check (#4039) X-Git-Tag: v1.5.7^2~119 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=5e9a6c2fe4e4bfabeef750642871e3edcf6c6d79;p=thirdparty%2Fzstd.git Refactor dictionary matchfinder index safety check (#4039) --- diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index e41d7b78e..f8ec1c82e 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -1315,6 +1315,13 @@ MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, return matchLowest; } +/* index_safety_check: + * intentional underflow : ensure repIndex isn't overlapping dict + prefix + * @return 1 if values are not overlapping, + * 0 otherwise */ +MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) { + return ((U32)((prefixLowestIndex-1) - repIndex) >= 3); +} /* debug functions */ diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index a4e9c50d3..1b5f64f20 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -392,7 +392,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ /* check repcode */ - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; @@ -513,7 +513,7 @@ _match_stored: const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? dictBase + repIndex2 - dictIndexDelta : base + repIndex2; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2)) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; @@ -651,7 +651,7 @@ size_t ZSTD_compressBlock_doubleFast_extDict_generic( size_t mLength; hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ - if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex)) & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; @@ -719,7 +719,7 @@ size_t ZSTD_compressBlock_doubleFast_extDict_generic( U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 <= current2 - dictStartIndex)) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 6c4554cfc..c6baa49d4 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -546,8 +546,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls); hashTable[hash0] = curr; /* update hash table */ - if (((U32) ((prefixStartIndex - 1) - repIndex) >= - 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex)) && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) { const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4; @@ -631,7 +630,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase - dictIndexDelta + repIndex2 : base + repIndex2; - if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) && (MEM_read32(repMatch2) == MEM_read32(ip0))) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; @@ -925,7 +924,7 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */ while (ip0 <= ilimit) { U32 const repIndex2 = (U32)(ip0-base) - offset_2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */ + if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0)) && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 67dd55fdb..9efaf4b25 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1590,7 +1590,7 @@ size_t ZSTD_compressBlock_lazy_generic( && repIndex < prefixLowestIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; @@ -1642,7 +1642,7 @@ size_t ZSTD_compressBlock_lazy_generic( const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) && (MEM_read32(repMatch) == MEM_read32(ip)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; @@ -1678,7 +1678,7 @@ size_t ZSTD_compressBlock_lazy_generic( const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) && (MEM_read32(repMatch) == MEM_read32(ip)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; @@ -1740,7 +1740,7 @@ _storeSequence: const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase - dictIndexDelta + repIndex : base + repIndex; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) && (MEM_read32(repMatch) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; @@ -1986,7 +1986,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(curr+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ @@ -2027,7 +2027,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ @@ -2059,7 +2059,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(curr - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ @@ -2113,7 +2113,7 @@ _storeSequence: const U32 repIndex = repCurrent - offset_2; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index e63073e5a..8a4345b31 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -664,13 +664,13 @@ ZSTD_insertBtAndGetAllMatches ( assert(curr >= windowLow); if ( dictMode == ZSTD_extDict && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ - & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + & (ZSTD_index_overlap_check(dictLimit, repIndex)) ) && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; } if (dictMode == ZSTD_dictMatchState && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ - & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (ZSTD_index_overlap_check(dictLimit, repIndex)) ) && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; } }