From: Christian Brabandt Date: Wed, 11 Mar 2026 13:16:29 +0000 (+0100) Subject: patch 9.2.0137: [security]: crash with composing char in collection range X-Git-Tag: v9.2.0137^0 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=36d6e87542cf823d833e451e09a90ee429899cec;p=thirdparty%2Fvim.git patch 9.2.0137: [security]: crash with composing char in collection range Problem: Using a composing character as the end of a range inside a collection may corrupt the NFA postfix stack (Nathan Mills, after v9.1.0011) Solution: When a character is used as the endpoint of a range, do not emit its composing characters separately. Range handling only uses the base codepoint. supported by AI Github Advisory: https://github.com/vim/vim/security/advisories/GHSA-9phh-423r-778r Signed-off-by: Christian Brabandt --- diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index 6a9581d993..807bc203c7 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -1765,6 +1765,7 @@ collection: if (*endp == ']') { int plen; + bool range_endpoint; /* * Try to reverse engineer character classes. For example, * recognize that [0-9] stands for \d and [A-Za-z_] for \h, @@ -1812,6 +1813,7 @@ collection: while (regparse < endp) { int oldstartc = startc; + range_endpoint = false; startc = -1; got_coll_char = FALSE; @@ -1975,6 +1977,7 @@ collection: if (emit_range) { int endc = startc; + range_endpoint = true; startc = oldstartc; if (startc > endc) @@ -2053,7 +2056,14 @@ collection: } } - if (enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse)))) + // + // If this character was consumed as the end of a range, do not emit its + // composing characters separately. Range handling only uses the base + // codepoint; emitting the composing part again would duplicate the + // character in the postfix stream and corrupt the NFA stack. + // + if (!range_endpoint && enc_utf8 && + (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse)))) { int i = utf_ptr2len(regparse); @@ -3187,7 +3197,10 @@ nfa_max_width(nfa_state_T *startstate, int depth) ++len; if (state->c != NFA_ANY) { - // skip over the characters + // Skip over the compiled collection. + // malformed NFAs must not crash width estimation. + if (state->out1 == NULL || state->out1->out == NULL) + return -1; state = state->out1->out; continue; } diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim index 6eb6bf6fa3..d33c03c421 100644 --- a/src/testdir/test_regexp_utf8.vim +++ b/src/testdir/test_regexp_utf8.vim @@ -633,4 +633,22 @@ func Test_replace_multibyte_match_in_multi_lines() set ignorecase&vim re&vim endfun +func Test_regex_collection_range_with_composing_crash() + " Regression test: composing char in collection range caused NFA crash/E874 + new + call setline(1, ['00', '0Ö»', '01']) + let patterns = [ '0[0-0Ö»]\@", 'E486:') + endfor + endfor + + bwipe! +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c index e58c12baa5..79a30caba3 100644 --- a/src/version.c +++ b/src/version.c @@ -734,6 +734,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 137, /**/ 136, /**/