]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
libstdc++/regex: Defer _M_current restoration to backtracking nodes [PR86164]
authorPatrick Palka <ppalka@redhat.com>
Fri, 30 Jan 2026 17:29:17 +0000 (12:29 -0500)
committerPatrick Palka <ppalka@redhat.com>
Fri, 30 Jan 2026 17:29:17 +0000 (12:29 -0500)
The incrementing of the current input string position (_M_current) is
done by _M_handle_match, which also makes sure to restore it afterwards,
via a restore_current frame.  But restoring _M_current is naturally only
necessary when backtracking is involved, not after every single match.

So this patch moves the responsibility of saving/restoring _M_current
from _M_handle_match to the branching nodes _M_handle_alternative and
_M_handle_repeat.  This is done by storing _M_current within the
fallback_next, fallback_rep_once_more and posix_alternative frames.
In turn we can get rid of the now unused restore_current frame kind.

This reduces the maximum size of the _M_frames stack by 15% for

  regex_match(string(200000, 'a'), "(a|b|c)*")

PR libstdc++/86164

libstdc++-v3/ChangeLog:

* include/bits/regex_executor.tcc (__detail::_ExecutorFrameOpcode):
Remove _S_fopcode_restore_current.
(__detail::_Executor::_M_handle_repeat): Pass _M_current when
pushing a fallback_next or fallback_rep_once_more frame.
(__detail::_Executor::_M_handle_match): Don't push a
restore_current frame.
(__detail::_Executor::_M_handle_backref): Likewise and simplify
accordingly.
(__detail::_Executor::_M_handle_alternative): Pass _M_current when
pushing a fallback_next or posix_alternative frame.
(__detail::_Executor::_M_dfs) <case _S_fopcode_fallback_next>:
Restore _M_current.
<case _S_fopcode_fallback_rep_once_more>: Likewise.
<case _S_fopcode_posix_alternative>: Likewise.
<case _S_fopcode_restore_current>: Remove.

Reviewed-by: Tomasz KamiƄski <tkaminsk@redhat.com>
Reviewed-by: Jonathan Wakely <jwakely@redhat.com>
libstdc++-v3/include/bits/regex_executor.tcc

index b76f1dce952c2afd9cf59cb7ea4819ba81848aa8..ccdec934b49cbb3ff961569a213d738e13808a38 100644 (file)
@@ -64,7 +64,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
     _S_fopcode_fallback_rep_once_more,
     _S_fopcode_posix_alternative,
     _S_fopcode_merge_sol,
-    _S_fopcode_restore_current,
     _S_fopcode_restore_cur_results,
     _S_fopcode_restore_rep_count,
     _S_fopcode_decrement_rep_count,
@@ -294,7 +293,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        {
          if constexpr (__dfs_mode)
            // If it's DFS executor and already accepted, we're done.
-           _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
+           _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
+                                  _M_current);
          else
            _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
          _M_frames.emplace_back(_S_fopcode_rep_once_more, __i);
@@ -304,7 +304,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
          if constexpr (__dfs_mode)
            {
              // vice-versa.
-             _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i);
+             _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i,
+                                    _M_current);
              _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
            }
          else
@@ -410,7 +411,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        {
          if (__state._M_matches(*_M_current))
            {
-             _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current);
              ++_M_current;
              _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
            }
@@ -493,14 +493,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
              _M_re._M_automaton->_M_traits)._M_apply(
                  __submatch.first, __submatch.second, _M_current, __last))
        {
-         if (__last != _M_current)
-           {
-             _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current);
-             _M_current = __last;
-             _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
-           }
-         else
-           _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
+         _M_current = __last;
+         _M_frames.emplace_back(_S_fopcode_next, __state._M_next);
        }
     }
 
@@ -568,14 +562,16 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        {
          // TODO: Fix BFS support. It is wrong.
          // Pick lhs if it matches. Only try rhs if it doesn't.
-         _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next);
+         _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next,
+                                _M_current);
          _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
        }
       else
        {
          // Try both and compare the result.
          // See "case _S_opcode_accept:" handling above.
-         _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next);
+         _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next,
+                                _M_current);
          _M_frames.emplace_back(_S_fopcode_next, __state._M_alt);
        }
     }
@@ -638,6 +634,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
            case _S_fopcode_fallback_next:
              if (_M_has_sol)
                break;
+             if constexpr (__dfs_mode)
+               _M_current = __frame._M_pos;
              [[__fallthrough__]];
            case _S_fopcode_next:
              _M_node(__match_mode, __frame._M_state_id);
@@ -646,6 +644,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
            case _S_fopcode_fallback_rep_once_more:
              if (_M_has_sol)
                break;
+             if constexpr (__dfs_mode)
+               _M_current = __frame._M_pos;
              [[__fallthrough__]];
            case _S_fopcode_rep_once_more:
              _M_rep_once_more(__match_mode, __frame._M_state_id);
@@ -654,6 +654,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
            case _S_fopcode_posix_alternative:
              _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol);
              _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id);
+             if constexpr (__dfs_mode)
+               _M_current = __frame._M_pos;
              _M_has_sol = false;
              break;
 
@@ -661,10 +663,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
              _M_has_sol |= __frame._M_val;
              break;
 
-           case _S_fopcode_restore_current:
-             _M_current = __frame._M_pos;
-             break;
-
            case _S_fopcode_restore_cur_results:
              if (!__frame._M_end)
                _M_cur_results[__frame._M_state_id].first = __frame._M_pos;