From: Patrick Palka Date: Tue, 10 Feb 2026 01:04:00 +0000 (-0500) Subject: libstdc++/regex: Replace __dfs_mode template parameter with run-time flag X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c49ce07bf09504d1eca66ac7359ca92eab63f106;p=thirdparty%2Fgcc.git libstdc++/regex: Replace __dfs_mode template parameter with run-time flag That _Executor's __dfs_mode flag is a template parameter means __regex_algo_impl has to instantiate two different (but largely the same) versions of _Executor even though the BFS version is rarely used. And it seems the compiler has trouble DCEing the unused version of _Executor, which needlessly increases code size and burdens the optimizer. This patch replaces the template parameter with a run-time data member _M_search_mode. We in turn need to inline the _Executor::_State_info member functions and data members into _Executor so that they depend on the run-time instead of compile-time flag. This means _Executor is three pointers bigger in DFS mode (due to the unused _M_match_queue and _M_visited_states members), which we can reduce to one pointer if we really want to, but that doesn't seem ver worthwhile. The __dfs_mode template parameter is now unused but not yet removed, that'll be done in the next patch. After this patch, both code size and run time for the microbenchmark for (int i = 0; i < 10000; i++) regex_match(string(200, 'a'), regex("(a|b|c)*")); decreases by about 15% each (with -O2). Compile time/memory use decreases by 5-10%. libstdc++-v3/ChangeLog: * include/bits/regex.tcc (__regex_algo_impl): Pass __use_dfs parameter to _Executor's constructor. * include/bits/regex_executor.h (_Executor::_Search_mode): New. (_Executor::_Executor): Add __use_dfs parameter and initialize _M_search_mode. Adjust after inlining _State_info members into _Executor. (_Executor::~_Executor): Free _M_visted_states. (_Executor::_M_main): Adjust after renaming _M_main_dispatch overloads to _M_main_dfs and _M_main_bfs. (_Executor::_State_info): Remove. (_Executor::_M_visited): Inlined from _State_info. (_Executor::_M_get_sol_pos): Likewise. (_Executor::_M_states): Remove. (_Executor::_M_start): Inlined from _State_info. (_Executor::_M_sol_pos): Likewise. (_Executor::_M_match_queue): Likewise. (_Executor::_M_search_mode): New. * include/bits/regex_executor.tcc (_Executor::_M_main_dispatch): Renamed to... (_Executor::_M_main_dfs, _Executor::_M_main_bfs): ... these. (_Executor::_M_*): Adjust after _M_states removal. (_Executor::_M_lookhead): Also adjust _Executor constructor call. Reviewed-by: Tomasz KamiƄski Reviewed-by: Jonathan Wakely --- diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc index 061b60042ec..d16cfd5baf0 100644 --- a/libstdc++-v3/include/bits/regex.tcc +++ b/libstdc++-v3/include/bits/regex.tcc @@ -61,26 +61,19 @@ namespace __detail __m._M_resize(__re._M_automaton->_M_sub_count()); bool __ret; + bool __use_dfs = true; if ((__re.flags() & regex_constants::__polynomial) || (__policy == _RegexExecutorPolicy::_S_alternate && !__re._M_automaton->_M_has_backref)) - { - _Executor<_BiIter, _Alloc, _TraitsT, false> - __executor(__s, __e, __res, __re, __flags); - if (__match_mode) - __ret = __executor._M_match(); - else - __ret = __executor._M_search(); - } + __use_dfs = false; + + _Executor<_BiIter, _Alloc, _TraitsT, true> + __executor(__s, __e, __res, __re, __flags, __use_dfs); + if (__match_mode) + __ret = __executor._M_match(); else - { - _Executor<_BiIter, _Alloc, _TraitsT, true> - __executor(__s, __e, __res, __re, __flags); - if (__match_mode) - __ret = __executor._M_match(); - else - __ret = __executor._M_search(); - } + __ret = __executor._M_search(); + if (__ret) { for (auto& __it : __res) diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index 94048e1f632..2638df4d5af 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -55,10 +55,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) bool __dfs_mode> class _Executor { - using __search_mode = integral_constant; - using __dfs = true_type; - using __bfs = false_type; - + enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 }; enum class _Match_mode : unsigned char { _Exact, _Prefix }; public: @@ -74,7 +71,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _BiIter __end, _ResultsVec& __results, const _RegexT& __re, - _FlagT __flags) + _FlagT __flags, + bool __use_dfs) : _M_cur_results(__results.get_allocator()), _M_begin(__begin), _M_end(__end), @@ -82,14 +80,21 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _M_nfa(*__re._M_automaton), _M_results(__results), _M_rep_count(_M_nfa.size()), - _M_states(_M_nfa._M_start(), _M_nfa.size()), - _M_flags(__flags) + _M_start(_M_nfa._M_start()), + _M_visited_states(nullptr), + _M_flags(__flags), + _M_search_mode(__use_dfs ? _Search_mode::_DFS : _Search_mode::_BFS) { using namespace regex_constants; if (__flags & match_prev_avail) // ignore not_bol and not_bow _M_flags &= ~(match_not_bol | match_not_bow); + if (_M_search_mode == _Search_mode::_BFS) + _M_visited_states = new bool[_M_nfa.size()]; } + ~_Executor() + { delete[] _M_visited_states; } + // Set matched when string exactly matches the pattern. bool _M_match() @@ -154,13 +159,18 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) bool _M_main(_Match_mode __match_mode) - { return _M_main_dispatch(__match_mode, __search_mode{}); } + { + if (_M_search_mode == _Search_mode::_DFS) + return _M_main_dfs(__match_mode); + else + return _M_main_bfs(__match_mode); + } bool - _M_main_dispatch(_Match_mode __match_mode, __dfs); + _M_main_dfs(_Match_mode __match_mode); bool - _M_main_dispatch(_Match_mode __match_mode, __bfs); + _M_main_bfs(_Match_mode __match_mode); bool _M_is_word(_CharT __ch) const @@ -239,62 +249,19 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) return (_M_re._M_automaton->_M_options() & __m) == __m; } - // Holds additional information used in BFS-mode. - template - struct _State_info; - - template - struct _State_info<__bfs, _ResultsVec> - { - explicit - _State_info(_StateIdT __start, size_t __n) - : _M_visited_states(new bool[__n]()), _M_start(__start) - { } - - ~_State_info() { delete[] _M_visited_states; } - - _State_info(const _State_info&) = delete; - _State_info& operator=(const _State_info&) = delete; - - bool _M_visited(_StateIdT __i) + bool + _M_visited(_StateIdT __i) + { + if (_M_visited_states) { if (_M_visited_states[__i]) return true; _M_visited_states[__i] = true; - return false; } + return false; + } - void _M_queue(_StateIdT __i, const _ResultsVec& __res) - { _M_match_queue.emplace_back(__i, __res); } - - // Dummy implementations for BFS mode. - _BiIter* _M_get_sol_pos() { return nullptr; } - - // Saves states that need to be considered for the next character. - _GLIBCXX_STD_C::vector> _M_match_queue; - // Indicates which states are already visited. - bool* _M_visited_states; - // To record current solution. - _StateIdT _M_start; - }; - - template - struct _State_info<__dfs, _ResultsVec> - { - explicit - _State_info(_StateIdT __start, size_t) : _M_start(__start) - { } - - // Dummy implementations for DFS mode. - bool _M_visited(_StateIdT) const { return false; } - void _M_queue(_StateIdT, const _ResultsVec&) { } - - _BiIter* _M_get_sol_pos() { return &_M_sol_pos; } - - // To record current solution. - _StateIdT _M_start; - _BiIter _M_sol_pos; - }; + _BiIter* _M_get_sol_pos() { return &_M_sol_pos; } public: _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames; @@ -306,8 +273,15 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) const _NFAT& _M_nfa; _ResultsVec& _M_results; _GLIBCXX_STD_C::vector> _M_rep_count; - _State_info<__search_mode, _ResultsVec> _M_states; + // To record current solution. + _StateIdT _M_start; + _BiIter _M_sol_pos; + // (BFS only) Saves states that need to be considered for the next character. + _GLIBCXX_STD_C::vector> _M_match_queue; + // (BFS only) Indicates which states are already visited. + bool* _M_visited_states; _FlagT _M_flags; + const _Search_mode _M_search_mode; // Do we have a solution so far? bool _M_has_sol; }; diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 19b5ad27df4..0ae7bdf6839 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -138,8 +138,8 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) }; // The _M_main function operates in different modes, DFS mode or BFS mode, - // indicated by template parameter __dfs_mode, and dispatches to one of the - // _M_main_dispatch overloads. + // indicated by _M_search_mode, and dispatches to either _M_main_dfs or + // _M_main_bfs. // // ------------------------------------------------------------ // @@ -163,12 +163,12 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) template bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_main_dispatch(_Match_mode __match_mode, __dfs) + _M_main_dfs(_Match_mode __match_mode) { _M_has_sol = false; - *_M_states._M_get_sol_pos() = _BiIter(); + *_M_get_sol_pos() = _BiIter(); _M_cur_results = _M_results; - _M_dfs(__match_mode, _M_states._M_start); + _M_dfs(__match_mode, _M_start); return _M_has_sol; } @@ -182,9 +182,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) // It first computes epsilon closure (states that can be achieved without // consuming characters) for every state that's still matching, // using the same DFS algorithm, but doesn't re-enter states (using - // _M_states._M_visited to check), nor follow _S_opcode_match. + // _M_visited to check), nor follow _S_opcode_match. // - // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue) + // Then apply DFS using every _S_opcode_match (in _M_match_queue) // as the start state. // // It significantly reduces potential duplicate states, so has a better @@ -197,17 +197,17 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) template bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: - _M_main_dispatch(_Match_mode __match_mode, __bfs) + _M_main_bfs(_Match_mode __match_mode) { - _M_states._M_queue(_M_states._M_start, _M_results); + _M_match_queue.emplace_back(_M_start, _M_results); bool __ret = false; while (1) { _M_has_sol = false; - if (_M_states._M_match_queue.empty()) + if (_M_match_queue.empty()) break; - std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false); - auto __old_queue = std::move(_M_states._M_match_queue); + std::fill_n(_M_visited_states, _M_nfa.size(), false); + auto __old_queue = std::move(_M_match_queue); auto __alloc = _M_cur_results.get_allocator(); for (auto& __task : __old_queue) { @@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) } if (__match_mode == _Match_mode::_Exact) __ret = _M_has_sol; - _M_states._M_match_queue.clear(); + _M_match_queue.clear(); return __ret; } @@ -236,8 +236,9 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) // We may want to make this faster by not copying, // but let's not be clever prematurely. _ResultsVec __what(_M_cur_results); - _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags); - __sub._M_states._M_start = __next; + _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags, + bool(_M_search_mode)); + __sub._M_start = __next; if (__sub._M_search_from_first()) { for (size_t __i = 0; __i < __what.size(); __i++) @@ -294,7 +295,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) // Greedy. if (!__state._M_neg) { - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) // If it's DFS executor and already accepted, we're done. _M_frames.emplace_back(_S_fopcode_fallback_next, __state._M_next, _M_current); @@ -304,7 +305,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) } else // Non-greedy mode { - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) { // vice-versa. _M_frames.emplace_back(_S_fopcode_fallback_rep_once_more, __i, @@ -409,7 +410,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) const auto& __state = _M_nfa[__i]; if (_M_current == _M_end) return; - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) { if (__state._M_matches(*_M_current)) { @@ -419,7 +420,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) } else if (__state._M_matches(*_M_current)) - _M_states._M_queue(__state._M_next, _M_cur_results); + _M_match_queue.emplace_back(__state._M_next, _M_cur_results); } template @@ -479,7 +480,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_backref(_Match_mode __match_mode, _StateIdT __i) { - static_assert(__dfs_mode, "this should never be instantiated"); + __glibcxx_assert(_M_search_mode == _Search_mode::_DFS); const auto& __state = _M_nfa[__i]; auto& __submatch = _M_cur_results[__state._M_backref_index]; @@ -505,7 +506,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_accept(_Match_mode __match_mode, _StateIdT) { - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) { __glibcxx_assert(!_M_has_sol); if (__match_mode == _Match_mode::_Exact) @@ -521,7 +522,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) _M_results = _M_cur_results; else // POSIX { - __glibcxx_assert(_M_states._M_get_sol_pos()); + __glibcxx_assert(_M_get_sol_pos()); // Here's POSIX's logic: match the longest one. However // we never know which one (lhs or rhs of "|") is longer // unless we try both of them and compare the results. @@ -529,12 +530,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) // position of the last successful match. It's better // to be larger, because POSIX regex is always greedy. // TODO: This could be slow. - if (*_M_states._M_get_sol_pos() == _BiIter() - || std::distance(_M_begin, - *_M_states._M_get_sol_pos()) + if (*_M_get_sol_pos() == _BiIter() + || std::distance(_M_begin, *_M_get_sol_pos()) < std::distance(_M_begin, _M_current)) { - *_M_states._M_get_sol_pos() = _M_current; + *_M_get_sol_pos() = _M_current; _M_results = _M_cur_results; } } @@ -586,7 +586,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_node(_Match_mode __match_mode, _StateIdT __i) { - if (_M_states._M_visited(__i)) + if (_M_visited(__i)) return; switch (_M_nfa[__i]._M_opcode()) @@ -608,7 +608,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_opcode_match: _M_handle_match(__match_mode, __i); break; case _S_opcode_backref: - if constexpr (__dfs_mode) + if (_M_search_mode == _Search_mode::_DFS) _M_handle_backref(__match_mode, __i); else __builtin_unreachable(); @@ -623,10 +623,11 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) } template - void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: + bool __dummy> + void _Executor<_BiIter, _Alloc, _TraitsT, __dummy>:: _M_dfs(_Match_mode __match_mode, _StateIdT __start) { + const bool __dfs_mode = (_M_search_mode == _Search_mode::_DFS); _M_frames.emplace_back(_S_fopcode_next, __start); while (!_M_frames.empty()) @@ -639,7 +640,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_fallback_next: if (_M_has_sol) break; - if constexpr (__dfs_mode) + if (__dfs_mode) _M_current = __frame._M_pos; [[__fallthrough__]]; case _S_fopcode_next: @@ -649,7 +650,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_fallback_rep_once_more: if (_M_has_sol) break; - if constexpr (__dfs_mode) + if (__dfs_mode) _M_current = __frame._M_pos; [[__fallthrough__]]; case _S_fopcode_rep_once_more: @@ -659,7 +660,7 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2) case _S_fopcode_posix_alternative: _M_frames.emplace_back(_S_fopcode_merge_sol, 0, _M_has_sol); _M_frames.emplace_back(_S_fopcode_next, __frame._M_state_id); - if constexpr (__dfs_mode) + if (__dfs_mode) _M_current = __frame._M_pos; _M_has_sol = false; break;