From: Patrick Palka Date: Fri, 30 Jan 2026 17:29:17 +0000 (-0500) Subject: libstdc++/regex: Defer _M_current restoration to backtracking nodes [PR86164] X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ead66d5f229edda244d1a91125524eca9abdb9b9;p=thirdparty%2Fgcc.git libstdc++/regex: Defer _M_current restoration to backtracking nodes [PR86164] The incrementing of the current input string position (_M_current) is done by _M_handle_match, which also makes sure to restore it afterwards, via a restore_current frame. But restoring _M_current is naturally only necessary when backtracking is involved, not after every single match. So this patch moves the responsibility of saving/restoring _M_current from _M_handle_match to the branching nodes _M_handle_alternative and _M_handle_repeat. This is done by storing _M_current within the fallback_next, fallback_rep_once_more and posix_alternative frames. In turn we can get rid of the now unused restore_current frame kind. This reduces the maximum size of the _M_frames stack by 15% for regex_match(string(200000, 'a'), "(a|b|c)*") PR libstdc++/86164 libstdc++-v3/ChangeLog: * include/bits/regex_executor.tcc (__detail::_ExecutorFrameOpcode): Remove _S_fopcode_restore_current. (__detail::_Executor::_M_handle_repeat): Pass _M_current when pushing a fallback_next or fallback_rep_once_more frame. (__detail::_Executor::_M_handle_match): Don't push a restore_current frame. (__detail::_Executor::_M_handle_backref): Likewise and simplify accordingly. (__detail::_Executor::_M_handle_alternative): Pass _M_current when pushing a fallback_next or posix_alternative frame. (__detail::_Executor::_M_dfs) : Restore _M_current. : Likewise. : Likewise. : Remove. Reviewed-by: Tomasz KamiƄski Reviewed-by: Jonathan Wakely --- diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index b76f1dce952..ccdec934b49 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -64,7 +64,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _S_fopcode_fallback_rep_once_more, _S_fopcode_posix_alternative, _S_fopcode_merge_sol, - _S_fopcode_restore_current, _S_fopcode_restore_cur_results, _S_fopcode_restore_rep_count, _S_fopcode_decrement_rep_count, @@ -294,7 +293,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) { if constexpr (__dfs_mode) // If it's DFS executor and already accepted, we're done. - _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next); + _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next, + _M_current); else _M_frames.emplace_back(_S_fopcode_next, __state._M_next); _M_frames.emplace_back(_S_fopcode_rep_once_more, __i); @@ -304,7 +304,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) if constexpr (__dfs_mode) { // vice-versa. - _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i); + _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i, + _M_current); _M_frames.emplace_back(_S_fopcode_next, __state._M_next); } else @@ -410,7 +411,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) { if (__state._M_matches(*_M_current)) { - _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current); ++_M_current; _M_frames.emplace_back(_S_fopcode_next, __state._M_next); } @@ -493,14 +493,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _M_re._M_automaton->_M_traits)._M_apply( __submatch.first, __submatch.second, _M_current, __last)) { - if (__last != _M_current) - { - _M_frames.emplace_back(_S_fopcode_restore_current, 0, _M_current); - _M_current = __last; - _M_frames.emplace_back(_S_fopcode_next, __state._M_next); - } - else - _M_frames.emplace_back(_S_fopcode_next, __state._M_next); + _M_current = __last; + _M_frames.emplace_back(_S_fopcode_next, __state._M_next); } } @@ -568,14 +562,16 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) { // TODO: Fix BFS support. It is wrong. // Pick lhs if it matches. Only try rhs if it doesn't. - _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next); + _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next, + _M_current); _M_frames.emplace_back(_S_fopcode_next, __state._M_alt); } else { // Try both and compare the result. // See "case _S_opcode_accept:" handling above. - _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next); + _M_frames.emplace_back(_S_fopcode_posix_alternative, __state._M_next, + _M_current); _M_frames.emplace_back(_S_fopcode_next, __state._M_alt); } } @@ -638,6 +634,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_fallback_next: if (_M_has_sol) break; + if constexpr (__dfs_mode) + _M_current = __frame._M_pos; [[__fallthrough__]]; case _S_fopcode_next: _M_node(__match_mode, __frame._M_state_id); @@ -646,6 +644,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_fallback_rep_once_more: if (_M_has_sol) break; + if constexpr (__dfs_mode) + _M_current = __frame._M_pos; [[__fallthrough__]]; case _S_fopcode_rep_once_more: _M_rep_once_more(__match_mode, __frame._M_state_id); @@ -654,6 +654,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_posix_alternative: _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol); _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id); + if constexpr (__dfs_mode) + _M_current = __frame._M_pos; _M_has_sol = false; break; @@ -661,10 +663,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _M_has_sol |= __frame._M_val; break; - case _S_fopcode_restore_current: - _M_current = __frame._M_pos; - break; - case _S_fopcode_restore_cur_results: if (!__frame._M_end) _M_cur_results[__frame._M_state_id].first = __frame._M_pos;