patch 9.2.0137: [security]: crash with composing char in collection range

author Christian Brabandt <cb@256bit.org>

Wed, 11 Mar 2026 13:16:29 +0000 (14:16 +0100)

committer Christian Brabandt <cb@256bit.org>

Wed, 11 Mar 2026 18:52:56 +0000 (18:52 +0000)
author Christian Brabandt <cb@256bit.org>
Wed, 11 Mar 2026 13:16:29 +0000 (14:16 +0100)
committer Christian Brabandt <cb@256bit.org>
Wed, 11 Mar 2026 18:52:56 +0000 (18:52 +0000)
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c

index 6a9581d99356978130a8c5aabc8a428b37d9ce15..807bc203c738e5f751172482fdc1838cd6147e38 100644 (file)
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -1765,6 +1765,7 @@ collection:
             if (*endp == ']')
             {
                 int plen;
+               bool range_endpoint;
                 /*
                  * Try to reverse engineer character classes. For example,
                  * recognize that [0-9] stands for \d and [A-Za-z_] for \h,
@@ -1812,6 +1813,7 @@ collection:
                 while (regparse < endp)
                 {
                     int     oldstartc = startc;
+                   range_endpoint = false;
  
                     startc = -1;
                     got_coll_char = FALSE;
@@ -1975,6 +1977,7 @@ collection:
                     if (emit_range)
                     {
                         int     endc = startc;
+                       range_endpoint = true;
  
                         startc = oldstartc;
                         if (startc > endc)
@@ -2053,7 +2056,14 @@ collection:
                         }
                     }
  
-                   if (enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))))
+                   //
+                   // If this character was consumed as the end of a range, do not emit its
+                   // composing characters separately.  Range handling only uses the base
+                   // codepoint; emitting the composing part again would duplicate the
+                   // character in the postfix stream and corrupt the NFA stack.
+                   //
+                   if (!range_endpoint && enc_utf8 &&
+                           (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))))
                     {
                         int i = utf_ptr2len(regparse);
  
@@ -3187,7 +3197,10 @@ nfa_max_width(nfa_state_T *startstate, int depth)
                     ++len;
                 if (state->c != NFA_ANY)
                 {
-                   // skip over the characters
+                   // Skip over the compiled collection.
+                   // malformed NFAs must not crash width estimation.
+                   if (state->out1 == NULL || state->out1->out == NULL)
+                       return -1;
                     state = state->out1->out;
                     continue;
                 }
diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim

index 6eb6bf6fa3e2e0df97e5d1eccd4c1b4332fb2411..d33c03c421032b1a79d79b4d4424cafffb38db8f 100644 (file)
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -633,4 +633,22 @@ func Test_replace_multibyte_match_in_multi_lines()
    set ignorecase&vim re&vim
  endfun
  
+func Test_regex_collection_range_with_composing_crash()
+  " Regression test: composing char in collection range caused NFA crash/E874
+  new
+  call setline(1, ['00', '0ֻ', '01'])
+  let patterns = [ '0[0-0ֻ]\@<!','0[0ֻ]\@<!']
+
+  for pat in patterns
+    " Should compile and execute without crash or error
+    for re in range(3)
+      let regex = '\%#=' .. re .. pat
+      call search(regex)
+      call assert_fails($"/{regex}\<cr>", 'E486:')
+    endfor
+  endfor
+
+  bwipe!
+endfunc
+
  " vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c

index e58c12baa56965f996a8eb5987eeae66978ef518..79a30caba318ffe9e827d12aa2248d50984d340d 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -734,6 +734,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    137,
  /**/
      136,
  /**/
author	Christian Brabandt <cb@256bit.org>
	Wed, 11 Mar 2026 13:16:29 +0000 (14:16 +0100)
committer	Christian Brabandt <cb@256bit.org>
	Wed, 11 Mar 2026 18:52:56 +0000 (18:52 +0000)
src/regexp_nfa.c		patch \| blob \| blame \| history
src/testdir/test_regexp_utf8.vim		patch \| blob \| blame \| history
src/version.c		patch \| blob \| blame \| history