]>
Commit | Line | Data |
---|---|---|
6cb784b6 TS |
1 | // class template regex -*- C++ -*- |
2 | ||
83ffe9cd | 3 | // Copyright (C) 2013-2023 Free Software Foundation, Inc. |
6cb784b6 TS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | /** | |
26 | * @file bits/regex_automaton.h | |
27 | * This is an internal header file, included by other library headers. | |
28 | * Do not attempt to use it directly. @headername{regex} | |
29 | */ | |
30 | ||
5bcb66df TS |
31 | // This macro defines the maximal state number a NFA can have. |
32 | #ifndef _GLIBCXX_REGEX_STATE_LIMIT | |
33 | #define _GLIBCXX_REGEX_STATE_LIMIT 100000 | |
34 | #endif | |
35 | ||
6cb784b6 TS |
36 | namespace std _GLIBCXX_VISIBILITY(default) |
37 | { | |
6cb784b6 TS |
38 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
39 | ||
4a15d842 FD |
40 | namespace __detail |
41 | { | |
6cb784b6 TS |
42 | /** |
43 | * @defgroup regex-detail Base and Implementation Classes | |
44 | * @ingroup regex | |
45 | * @{ | |
46 | */ | |
47 | ||
6cb43087 | 48 | typedef long _StateIdT; |
e3b10249 | 49 | _GLIBCXX17_INLINE constexpr _StateIdT _S_invalid_state_id = -1; |
6cb784b6 TS |
50 | |
51 | template<typename _CharT> | |
52 | using _Matcher = std::function<bool (_CharT)>; | |
53 | ||
54 | /// Operation codes that define the type of transitions within the base NFA | |
55 | /// that represents the regular expression. | |
6cb43087 | 56 | enum _Opcode : int |
6cb784b6 | 57 | { |
7b86458e TS |
58 | _S_opcode_unknown, |
59 | _S_opcode_alternative, | |
a670a9bb | 60 | _S_opcode_repeat, |
7b86458e TS |
61 | _S_opcode_backref, |
62 | _S_opcode_line_begin_assertion, | |
63 | _S_opcode_line_end_assertion, | |
f054ff5b | 64 | _S_opcode_word_boundary, |
7b86458e TS |
65 | _S_opcode_subexpr_lookahead, |
66 | _S_opcode_subexpr_begin, | |
67 | _S_opcode_subexpr_end, | |
68 | _S_opcode_dummy, | |
69 | _S_opcode_match, | |
70 | _S_opcode_accept, | |
6cb784b6 TS |
71 | }; |
72 | ||
7d9d2185 JW |
73 | struct _State_base |
74 | { | |
81b7ff07 | 75 | protected: |
7d9d2185 | 76 | _Opcode _M_opcode; // type of outgoing transition |
81b7ff07 TS |
77 | |
78 | public: | |
7d9d2185 JW |
79 | _StateIdT _M_next; // outgoing transition |
80 | union // Since they are mutually exclusive. | |
6cb784b6 | 81 | { |
7d9d2185 JW |
82 | size_t _M_subexpr; // for _S_opcode_subexpr_* |
83 | size_t _M_backref_index; // for _S_opcode_backref | |
84 | struct | |
ce645eb0 | 85 | { |
a670a9bb TS |
86 | // for _S_opcode_alternative, _S_opcode_repeat and |
87 | // _S_opcode_subexpr_lookahead | |
7d9d2185 JW |
88 | _StateIdT _M_alt; |
89 | // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or | |
90 | // quantifiers (ungreedy if set true) | |
91 | bool _M_neg; | |
ce645eb0 | 92 | }; |
81b7ff07 TS |
93 | // For _S_opcode_match |
94 | __gnu_cxx::__aligned_membuf<_Matcher<char>> _M_matcher_storage; | |
7d9d2185 | 95 | }; |
6cb784b6 | 96 | |
81b7ff07 | 97 | protected: |
df0dd04b | 98 | explicit _State_base(_Opcode __opcode) noexcept |
7d9d2185 JW |
99 | : _M_opcode(__opcode), _M_next(_S_invalid_state_id) |
100 | { } | |
101 | ||
7d9d2185 | 102 | public: |
81b7ff07 | 103 | bool |
df0dd04b | 104 | _M_has_alt() const noexcept |
81b7ff07 TS |
105 | { |
106 | return _M_opcode == _S_opcode_alternative | |
107 | || _M_opcode == _S_opcode_repeat | |
108 | || _M_opcode == _S_opcode_subexpr_lookahead; | |
109 | } | |
110 | ||
6cb784b6 | 111 | #ifdef _GLIBCXX_DEBUG |
7d9d2185 JW |
112 | std::ostream& |
113 | _M_print(std::ostream& ostr) const; | |
6cb784b6 | 114 | |
7d9d2185 JW |
115 | // Prints graphviz dot commands for state. |
116 | std::ostream& | |
117 | _M_dot(std::ostream& __ostr, _StateIdT __id) const; | |
6cb784b6 | 118 | #endif |
7d9d2185 | 119 | }; |
6cb784b6 | 120 | |
81b7ff07 | 121 | template<typename _Char_type> |
7d9d2185 | 122 | struct _State : _State_base |
6cb784b6 | 123 | { |
81b7ff07 TS |
124 | typedef _Matcher<_Char_type> _MatcherT; |
125 | static_assert(sizeof(_MatcherT) == sizeof(_Matcher<char>), | |
126 | "std::function<bool(T)> has the same size as " | |
127 | "std::function<bool(char)>"); | |
128 | static_assert(alignof(_MatcherT) == alignof(_Matcher<char>), | |
129 | "std::function<bool(T)> has the same alignment as " | |
130 | "std::function<bool(char)>"); | |
131 | ||
132 | explicit | |
df0dd04b | 133 | _State(_Opcode __opcode) noexcept : _State_base(__opcode) |
81b7ff07 TS |
134 | { |
135 | if (_M_opcode() == _S_opcode_match) | |
136 | new (this->_M_matcher_storage._M_addr()) _MatcherT(); | |
137 | } | |
6cb784b6 | 138 | |
81b7ff07 TS |
139 | _State(const _State& __rhs) : _State_base(__rhs) |
140 | { | |
141 | if (__rhs._M_opcode() == _S_opcode_match) | |
142 | new (this->_M_matcher_storage._M_addr()) | |
143 | _MatcherT(__rhs._M_get_matcher()); | |
144 | } | |
6cb784b6 | 145 | |
df0dd04b | 146 | _State(_State&& __rhs) noexcept : _State_base(__rhs) |
81b7ff07 TS |
147 | { |
148 | if (__rhs._M_opcode() == _S_opcode_match) | |
149 | new (this->_M_matcher_storage._M_addr()) | |
150 | _MatcherT(std::move(__rhs._M_get_matcher())); | |
151 | } | |
152 | ||
153 | _State& | |
154 | operator=(const _State&) = delete; | |
155 | ||
156 | ~_State() | |
157 | { | |
158 | if (_M_opcode() == _S_opcode_match) | |
159 | _M_get_matcher().~_MatcherT(); | |
160 | } | |
161 | ||
162 | // Since correct ctor and dtor rely on _M_opcode, it's better not to | |
163 | // change it over time. | |
164 | _Opcode | |
df0dd04b | 165 | _M_opcode() const noexcept |
81b7ff07 TS |
166 | { return _State_base::_M_opcode; } |
167 | ||
168 | bool | |
169 | _M_matches(_Char_type __char) const | |
170 | { return _M_get_matcher()(__char); } | |
171 | ||
172 | _MatcherT& | |
df0dd04b | 173 | _M_get_matcher() noexcept |
81b7ff07 TS |
174 | { return *static_cast<_MatcherT*>(this->_M_matcher_storage._M_addr()); } |
175 | ||
176 | const _MatcherT& | |
df0dd04b | 177 | _M_get_matcher() const noexcept |
81b7ff07 TS |
178 | { |
179 | return *static_cast<const _MatcherT*>( | |
180 | this->_M_matcher_storage._M_addr()); | |
181 | } | |
7d9d2185 | 182 | }; |
6cb784b6 | 183 | |
7d9d2185 JW |
184 | struct _NFA_base |
185 | { | |
7d9d2185 JW |
186 | typedef regex_constants::syntax_option_type _FlagT; |
187 | ||
188 | explicit | |
df0dd04b | 189 | _NFA_base(_FlagT __f) noexcept |
7d9d2185 | 190 | : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), |
a670a9bb | 191 | _M_has_backref(false) |
7d9d2185 JW |
192 | { } |
193 | ||
194 | _NFA_base(_NFA_base&&) = default; | |
195 | ||
196 | protected: | |
197 | ~_NFA_base() = default; | |
198 | ||
199 | public: | |
200 | _FlagT | |
df0dd04b | 201 | _M_options() const noexcept |
7d9d2185 JW |
202 | { return _M_flags; } |
203 | ||
204 | _StateIdT | |
df0dd04b | 205 | _M_start() const noexcept |
7d9d2185 JW |
206 | { return _M_start_state; } |
207 | ||
c44c5f3d | 208 | size_t |
df0dd04b | 209 | _M_sub_count() const noexcept |
7d9d2185 JW |
210 | { return _M_subexpr_count; } |
211 | ||
ec332f1b | 212 | _GLIBCXX_STD_C::vector<size_t> _M_paren_stack; |
7d9d2185 JW |
213 | _FlagT _M_flags; |
214 | _StateIdT _M_start_state; | |
c44c5f3d | 215 | size_t _M_subexpr_count; |
7d9d2185 JW |
216 | bool _M_has_backref; |
217 | }; | |
218 | ||
68e69ce2 | 219 | template<typename _TraitsT> |
7d9d2185 | 220 | struct _NFA |
ec332f1b | 221 | : _NFA_base, _GLIBCXX_STD_C::vector<_State<typename _TraitsT::char_type>> |
7d9d2185 | 222 | { |
81b7ff07 TS |
223 | typedef typename _TraitsT::char_type _Char_type; |
224 | typedef _State<_Char_type> _StateT; | |
225 | typedef _Matcher<_Char_type> _MatcherT; | |
7d9d2185 | 226 | |
2bde8cac TS |
227 | _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags) |
228 | : _NFA_base(__flags) | |
229 | { _M_traits.imbue(__loc); } | |
6cb784b6 | 230 | |
7d9d2185 JW |
231 | // for performance reasons _NFA objects should only be moved not copied |
232 | _NFA(const _NFA&) = delete; | |
233 | _NFA(_NFA&&) = default; | |
6cb784b6 TS |
234 | |
235 | _StateIdT | |
236 | _M_insert_accept() | |
237 | { | |
7b86458e | 238 | auto __ret = _M_insert_state(_StateT(_S_opcode_accept)); |
7b86458e | 239 | return __ret; |
6cb784b6 TS |
240 | } |
241 | ||
242 | _StateIdT | |
6b6147dd JW |
243 | _M_insert_alt(_StateIdT __next, _StateIdT __alt, |
244 | bool __neg __attribute__((__unused__))) | |
6cb784b6 | 245 | { |
7b86458e TS |
246 | _StateT __tmp(_S_opcode_alternative); |
247 | // It labels every quantifier to make greedy comparison easier in BFS | |
248 | // approach. | |
a670a9bb TS |
249 | __tmp._M_next = __next; |
250 | __tmp._M_alt = __alt; | |
251 | return _M_insert_state(std::move(__tmp)); | |
252 | } | |
253 | ||
254 | _StateIdT | |
255 | _M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg) | |
256 | { | |
257 | _StateT __tmp(_S_opcode_repeat); | |
258 | // It labels every quantifier to make greedy comparison easier in BFS | |
259 | // approach. | |
7b86458e TS |
260 | __tmp._M_next = __next; |
261 | __tmp._M_alt = __alt; | |
262 | __tmp._M_neg = __neg; | |
7d9d2185 | 263 | return _M_insert_state(std::move(__tmp)); |
6cb784b6 TS |
264 | } |
265 | ||
266 | _StateIdT | |
267 | _M_insert_matcher(_MatcherT __m) | |
268 | { | |
7b86458e | 269 | _StateT __tmp(_S_opcode_match); |
81b7ff07 | 270 | __tmp._M_get_matcher() = std::move(__m); |
7d9d2185 | 271 | return _M_insert_state(std::move(__tmp)); |
6cb784b6 TS |
272 | } |
273 | ||
274 | _StateIdT | |
275 | _M_insert_subexpr_begin() | |
276 | { | |
7d9d2185 JW |
277 | auto __id = this->_M_subexpr_count++; |
278 | this->_M_paren_stack.push_back(__id); | |
7b86458e TS |
279 | _StateT __tmp(_S_opcode_subexpr_begin); |
280 | __tmp._M_subexpr = __id; | |
7d9d2185 | 281 | return _M_insert_state(std::move(__tmp)); |
6cb784b6 TS |
282 | } |
283 | ||
284 | _StateIdT | |
285 | _M_insert_subexpr_end() | |
286 | { | |
7b86458e | 287 | _StateT __tmp(_S_opcode_subexpr_end); |
7d9d2185 JW |
288 | __tmp._M_subexpr = this->_M_paren_stack.back(); |
289 | this->_M_paren_stack.pop_back(); | |
290 | return _M_insert_state(std::move(__tmp)); | |
6cb784b6 TS |
291 | } |
292 | ||
ce645eb0 | 293 | _StateIdT |
6cb43087 | 294 | _M_insert_backref(size_t __index); |
6cb784b6 | 295 | |
7c812a2a | 296 | _StateIdT |
7b86458e TS |
297 | _M_insert_line_begin() |
298 | { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); } | |
299 | ||
300 | _StateIdT | |
301 | _M_insert_line_end() | |
302 | { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); } | |
303 | ||
304 | _StateIdT | |
305 | _M_insert_word_bound(bool __neg) | |
7c812a2a | 306 | { |
f054ff5b | 307 | _StateT __tmp(_S_opcode_word_boundary); |
7b86458e | 308 | __tmp._M_neg = __neg; |
7d9d2185 | 309 | return _M_insert_state(std::move(__tmp)); |
7c812a2a TS |
310 | } |
311 | ||
7b86458e TS |
312 | _StateIdT |
313 | _M_insert_lookahead(_StateIdT __alt, bool __neg) | |
314 | { | |
315 | _StateT __tmp(_S_opcode_subexpr_lookahead); | |
316 | __tmp._M_alt = __alt; | |
317 | __tmp._M_neg = __neg; | |
7d9d2185 | 318 | return _M_insert_state(std::move(__tmp)); |
7b86458e TS |
319 | } |
320 | ||
321 | _StateIdT | |
322 | _M_insert_dummy() | |
323 | { return _M_insert_state(_StateT(_S_opcode_dummy)); } | |
324 | ||
7c812a2a TS |
325 | _StateIdT |
326 | _M_insert_state(_StateT __s) | |
327 | { | |
7d9d2185 | 328 | this->push_back(std::move(__s)); |
5bcb66df | 329 | if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT) |
236d76c4 TS |
330 | __throw_regex_error( |
331 | regex_constants::error_space, | |
332 | "Number of NFA states exceeds limit. Please use shorter regex " | |
333 | "string, or use smaller brace expression, or make " | |
334 | "_GLIBCXX_REGEX_STATE_LIMIT larger."); | |
2d76fab4 | 335 | return this->size() - 1; |
7c812a2a TS |
336 | } |
337 | ||
338 | // Eliminate dummy node in this NFA to make it compact. | |
339 | void | |
340 | _M_eliminate_dummy(); | |
341 | ||
6cb784b6 TS |
342 | #ifdef _GLIBCXX_DEBUG |
343 | std::ostream& | |
344 | _M_dot(std::ostream& __ostr) const; | |
345 | #endif | |
2bde8cac TS |
346 | public: |
347 | _TraitsT _M_traits; | |
6cb784b6 TS |
348 | }; |
349 | ||
350 | /// Describes a sequence of one or more %_State, its current start | |
351 | /// and end(s). This structure contains fragments of an NFA during | |
352 | /// construction. | |
68e69ce2 | 353 | template<typename _TraitsT> |
6cb784b6 TS |
354 | class _StateSeq |
355 | { | |
356 | public: | |
68e69ce2 | 357 | typedef _NFA<_TraitsT> _RegexT; |
6cb784b6 | 358 | |
7c812a2a TS |
359 | public: |
360 | _StateSeq(_RegexT& __nfa, _StateIdT __s) | |
7d9d2185 | 361 | : _M_nfa(__nfa), _M_start(__s), _M_end(__s) |
6cb784b6 TS |
362 | { } |
363 | ||
7c812a2a TS |
364 | _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end) |
365 | : _M_nfa(__nfa), _M_start(__s), _M_end(__end) | |
6cb784b6 TS |
366 | { } |
367 | ||
7c812a2a | 368 | // Append a state on *this and change *this to the new sequence. |
6cb784b6 | 369 | void |
7c812a2a TS |
370 | _M_append(_StateIdT __id) |
371 | { | |
372 | _M_nfa[_M_end]._M_next = __id; | |
373 | _M_end = __id; | |
374 | } | |
6cb784b6 | 375 | |
7c812a2a | 376 | // Append a sequence on *this and change *this to the new sequence. |
6cb784b6 | 377 | void |
7c812a2a TS |
378 | _M_append(const _StateSeq& __s) |
379 | { | |
380 | _M_nfa[_M_end]._M_next = __s._M_start; | |
381 | _M_end = __s._M_end; | |
382 | } | |
6cb784b6 TS |
383 | |
384 | // Clones an entire sequence. | |
7c812a2a | 385 | _StateSeq |
6cb784b6 TS |
386 | _M_clone(); |
387 | ||
7c812a2a | 388 | public: |
6cb784b6 TS |
389 | _RegexT& _M_nfa; |
390 | _StateIdT _M_start; | |
7c812a2a | 391 | _StateIdT _M_end; |
6cb784b6 TS |
392 | }; |
393 | ||
f0b88346 | 394 | ///@} regex-detail |
6cb784b6 | 395 | } // namespace __detail |
4a15d842 FD |
396 | |
397 | _GLIBCXX_END_NAMESPACE_VERSION | |
6cb784b6 TS |
398 | } // namespace std |
399 | ||
400 | #include <bits/regex_automaton.tcc> |