]>
Commit | Line | Data |
---|---|---|
6cb784b6 TS |
1 | // class template regex -*- C++ -*- |
2 | ||
8d9254fc | 3 | // Copyright (C) 2013-2020 Free Software Foundation, Inc. |
6cb784b6 TS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | /** | |
26 | * @file bits/regex_compiler.tcc | |
27 | * This is an internal header file, included by other library headers. | |
28 | * Do not attempt to use it directly. @headername{regex} | |
29 | */ | |
30 | ||
b21abcee | 31 | // FIXME make comments doxygen format. |
7c812a2a | 32 | |
472a7639 | 33 | /* |
7c812a2a | 34 | // This compiler refers to "Regular Expression Matching Can Be Simple And Fast" |
472a7639 | 35 | // (http://swtch.com/~rsc/regexp/regexp1.html), |
7c812a2a TS |
36 | // but doesn't strictly follow it. |
37 | // | |
38 | // When compiling, states are *chained* instead of tree- or graph-constructed. | |
39 | // It's more like structured programs: there's if statement and loop statement. | |
40 | // | |
097f0bcf JW |
41 | // For alternative structure (say "a|b"), aka "if statement", two branches |
42 | // should be constructed. However, these two shall merge to an "end_tag" at | |
43 | // the end of this operator: | |
7c812a2a TS |
44 | // |
45 | // branch1 | |
46 | // / \ | |
47 | // => begin_tag end_tag => | |
48 | // \ / | |
49 | // branch2 | |
50 | // | |
51 | // This is the difference between this implementation and that in Russ's | |
52 | // article. | |
53 | // | |
54 | // That's why we introduced dummy node here ------ "end_tag" is a dummy node. | |
472a7639 JW |
55 | // All dummy nodes will be eliminated at the end of compilation. |
56 | */ | |
7c812a2a | 57 | |
6cb784b6 TS |
58 | namespace std _GLIBCXX_VISIBILITY(default) |
59 | { | |
6cb784b6 TS |
60 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
61 | ||
4a15d842 FD |
62 | namespace __detail |
63 | { | |
ddf41e9d TS |
64 | template<typename _TraitsT> |
65 | _Compiler<_TraitsT>:: | |
66 | _Compiler(_IterT __b, _IterT __e, | |
2bde8cac | 67 | const typename _TraitsT::locale_type& __loc, _FlagT __flags) |
c2669da9 TS |
68 | : _M_flags((__flags |
69 | & (regex_constants::ECMAScript | |
70 | | regex_constants::basic | |
71 | | regex_constants::extended | |
72 | | regex_constants::grep | |
73 | | regex_constants::egrep | |
74 | | regex_constants::awk)) | |
75 | ? __flags | |
76 | : __flags | regex_constants::ECMAScript), | |
2bde8cac TS |
77 | _M_scanner(__b, __e, _M_flags, __loc), |
78 | _M_nfa(make_shared<_RegexT>(__loc, _M_flags)), | |
79 | _M_traits(_M_nfa->_M_traits), | |
80 | _M_ctype(std::use_facet<_CtypeT>(__loc)) | |
6cb784b6 | 81 | { |
2bde8cac TS |
82 | _StateSeqT __r(*_M_nfa, _M_nfa->_M_start()); |
83 | __r._M_append(_M_nfa->_M_insert_subexpr_begin()); | |
7c812a2a TS |
84 | this->_M_disjunction(); |
85 | if (!_M_match_token(_ScannerT::_S_token_eof)) | |
86 | __throw_regex_error(regex_constants::error_paren); | |
87 | __r._M_append(_M_pop()); | |
2f1e8e7c | 88 | __glibcxx_assert(_M_stack.empty()); |
2bde8cac TS |
89 | __r._M_append(_M_nfa->_M_insert_subexpr_end()); |
90 | __r._M_append(_M_nfa->_M_insert_accept()); | |
91 | _M_nfa->_M_eliminate_dummy(); | |
6cb784b6 TS |
92 | } |
93 | ||
ddf41e9d | 94 | template<typename _TraitsT> |
6cb784b6 | 95 | void |
ddf41e9d | 96 | _Compiler<_TraitsT>:: |
6cb784b6 TS |
97 | _M_disjunction() |
98 | { | |
99 | this->_M_alternative(); | |
7c812a2a | 100 | while (_M_match_token(_ScannerT::_S_token_or)) |
6cb784b6 | 101 | { |
7c812a2a TS |
102 | _StateSeqT __alt1 = _M_pop(); |
103 | this->_M_alternative(); | |
104 | _StateSeqT __alt2 = _M_pop(); | |
2bde8cac | 105 | auto __end = _M_nfa->_M_insert_dummy(); |
7c812a2a TS |
106 | __alt1._M_append(__end); |
107 | __alt2._M_append(__end); | |
ad9ec7b3 TS |
108 | // __alt2 is state._M_next, __alt1 is state._M_alt. The executor |
109 | // executes _M_alt before _M_next, as well as executing left | |
110 | // alternative before right one. | |
2bde8cac TS |
111 | _M_stack.push(_StateSeqT(*_M_nfa, |
112 | _M_nfa->_M_insert_alt( | |
113 | __alt2._M_start, __alt1._M_start, false), | |
7c812a2a | 114 | __end)); |
6cb784b6 TS |
115 | } |
116 | } | |
117 | ||
ddf41e9d | 118 | template<typename _TraitsT> |
6cb784b6 | 119 | void |
ddf41e9d | 120 | _Compiler<_TraitsT>:: |
6cb784b6 TS |
121 | _M_alternative() |
122 | { | |
123 | if (this->_M_term()) | |
124 | { | |
7c812a2a | 125 | _StateSeqT __re = _M_pop(); |
6cb784b6 | 126 | this->_M_alternative(); |
7c812a2a | 127 | __re._M_append(_M_pop()); |
6cb784b6 TS |
128 | _M_stack.push(__re); |
129 | } | |
7c812a2a | 130 | else |
2bde8cac | 131 | _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_dummy())); |
6cb784b6 TS |
132 | } |
133 | ||
ddf41e9d | 134 | template<typename _TraitsT> |
6cb784b6 | 135 | bool |
ddf41e9d | 136 | _Compiler<_TraitsT>:: |
6cb784b6 TS |
137 | _M_term() |
138 | { | |
139 | if (this->_M_assertion()) | |
140 | return true; | |
141 | if (this->_M_atom()) | |
142 | { | |
053eb1f3 | 143 | while (this->_M_quantifier()); |
6cb784b6 TS |
144 | return true; |
145 | } | |
146 | return false; | |
147 | } | |
148 | ||
ddf41e9d | 149 | template<typename _TraitsT> |
6cb784b6 | 150 | bool |
ddf41e9d | 151 | _Compiler<_TraitsT>:: |
6cb784b6 TS |
152 | _M_assertion() |
153 | { | |
7c812a2a | 154 | if (_M_match_token(_ScannerT::_S_token_line_begin)) |
2bde8cac | 155 | _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_line_begin())); |
7c812a2a | 156 | else if (_M_match_token(_ScannerT::_S_token_line_end)) |
2bde8cac | 157 | _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_line_end())); |
7c812a2a | 158 | else if (_M_match_token(_ScannerT::_S_token_word_bound)) |
097f0bcf | 159 | // _M_value[0] == 'n' means it's negative, say "not word boundary". |
2bde8cac | 160 | _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa-> |
7b86458e | 161 | _M_insert_word_bound(_M_value[0] == 'n'))); |
7c812a2a | 162 | else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin)) |
7b86458e TS |
163 | { |
164 | auto __neg = _M_value[0] == 'n'; | |
165 | this->_M_disjunction(); | |
166 | if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) | |
236d76c4 TS |
167 | __throw_regex_error(regex_constants::error_paren, |
168 | "Parenthesis is not closed."); | |
7b86458e | 169 | auto __tmp = _M_pop(); |
2bde8cac | 170 | __tmp._M_append(_M_nfa->_M_insert_accept()); |
7b86458e TS |
171 | _M_stack.push( |
172 | _StateSeqT( | |
2bde8cac TS |
173 | *_M_nfa, |
174 | _M_nfa->_M_insert_lookahead(__tmp._M_start, __neg))); | |
7b86458e | 175 | } |
7c812a2a TS |
176 | else |
177 | return false; | |
178 | return true; | |
6cb784b6 TS |
179 | } |
180 | ||
ddf41e9d | 181 | template<typename _TraitsT> |
053eb1f3 | 182 | bool |
ddf41e9d | 183 | _Compiler<_TraitsT>:: |
6cb784b6 TS |
184 | _M_quantifier() |
185 | { | |
c2669da9 | 186 | bool __neg = (_M_flags & regex_constants::ECMAScript); |
7b86458e | 187 | auto __init = [this, &__neg]() |
6cb784b6 TS |
188 | { |
189 | if (_M_stack.empty()) | |
236d76c4 TS |
190 | __throw_regex_error(regex_constants::error_badrepeat, |
191 | "Nothing to repeat before a quantifier."); | |
7b86458e TS |
192 | __neg = __neg && _M_match_token(_ScannerT::_S_token_opt); |
193 | }; | |
194 | if (_M_match_token(_ScannerT::_S_token_closure0)) | |
195 | { | |
196 | __init(); | |
7c812a2a | 197 | auto __e = _M_pop(); |
2bde8cac TS |
198 | _StateSeqT __r(*_M_nfa, |
199 | _M_nfa->_M_insert_repeat(_S_invalid_state_id, | |
200 | __e._M_start, __neg)); | |
7c812a2a | 201 | __e._M_append(__r); |
6cb784b6 | 202 | _M_stack.push(__r); |
6cb784b6 | 203 | } |
7c812a2a | 204 | else if (_M_match_token(_ScannerT::_S_token_closure1)) |
6cb784b6 | 205 | { |
7b86458e | 206 | __init(); |
7c812a2a | 207 | auto __e = _M_pop(); |
2bde8cac TS |
208 | __e._M_append(_M_nfa->_M_insert_repeat(_S_invalid_state_id, |
209 | __e._M_start, __neg)); | |
7c812a2a | 210 | _M_stack.push(__e); |
6cb784b6 | 211 | } |
7c812a2a | 212 | else if (_M_match_token(_ScannerT::_S_token_opt)) |
6cb784b6 | 213 | { |
7b86458e | 214 | __init(); |
7c812a2a | 215 | auto __e = _M_pop(); |
2bde8cac TS |
216 | auto __end = _M_nfa->_M_insert_dummy(); |
217 | _StateSeqT __r(*_M_nfa, | |
218 | _M_nfa->_M_insert_repeat(_S_invalid_state_id, | |
219 | __e._M_start, __neg)); | |
7c812a2a TS |
220 | __e._M_append(__end); |
221 | __r._M_append(__end); | |
6cb784b6 | 222 | _M_stack.push(__r); |
6cb784b6 | 223 | } |
7c812a2a | 224 | else if (_M_match_token(_ScannerT::_S_token_interval_begin)) |
6cb784b6 | 225 | { |
c2669da9 | 226 | if (_M_stack.empty()) |
236d76c4 TS |
227 | __throw_regex_error(regex_constants::error_badrepeat, |
228 | "Nothing to repeat before a quantifier."); | |
6cb784b6 | 229 | if (!_M_match_token(_ScannerT::_S_token_dup_count)) |
236d76c4 TS |
230 | __throw_regex_error(regex_constants::error_badbrace, |
231 | "Unexpected token in brace expression."); | |
7c812a2a | 232 | _StateSeqT __r(_M_pop()); |
2bde8cac | 233 | _StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy()); |
6cb43087 | 234 | long __min_rep = _M_cur_int_value(10); |
c2669da9 | 235 | bool __infi = false; |
6cb43087 | 236 | long __n; |
c2669da9 | 237 | |
7c812a2a | 238 | // {3 |
6cb784b6 | 239 | if (_M_match_token(_ScannerT::_S_token_comma)) |
7c812a2a | 240 | if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7} |
c2669da9 TS |
241 | __n = _M_cur_int_value(10) - __min_rep; |
242 | else | |
243 | __infi = true; | |
244 | else | |
245 | __n = 0; | |
6cb784b6 | 246 | if (!_M_match_token(_ScannerT::_S_token_interval_end)) |
236d76c4 TS |
247 | __throw_regex_error(regex_constants::error_brace, |
248 | "Unexpected end of brace expression."); | |
c2669da9 TS |
249 | |
250 | __neg = __neg && _M_match_token(_ScannerT::_S_token_opt); | |
251 | ||
6cb43087 | 252 | for (long __i = 0; __i < __min_rep; ++__i) |
c2669da9 TS |
253 | __e._M_append(__r._M_clone()); |
254 | ||
255 | if (__infi) | |
256 | { | |
257 | auto __tmp = __r._M_clone(); | |
2bde8cac TS |
258 | _StateSeqT __s(*_M_nfa, |
259 | _M_nfa->_M_insert_repeat(_S_invalid_state_id, | |
260 | __tmp._M_start, __neg)); | |
c2669da9 TS |
261 | __tmp._M_append(__s); |
262 | __e._M_append(__s); | |
263 | } | |
264 | else | |
265 | { | |
266 | if (__n < 0) | |
236d76c4 TS |
267 | __throw_regex_error(regex_constants::error_badbrace, |
268 | "Invalid range in brace expression."); | |
2bde8cac | 269 | auto __end = _M_nfa->_M_insert_dummy(); |
c2669da9 TS |
270 | // _M_alt is the "match more" branch, and _M_next is the |
271 | // "match less" one. Switch _M_alt and _M_next of all created | |
097f0bcf | 272 | // nodes. This is a hack but IMO works well. |
c2669da9 | 273 | std::stack<_StateIdT> __stack; |
6cb43087 | 274 | for (long __i = 0; __i < __n; ++__i) |
c2669da9 TS |
275 | { |
276 | auto __tmp = __r._M_clone(); | |
2bde8cac TS |
277 | auto __alt = _M_nfa->_M_insert_repeat(__tmp._M_start, |
278 | __end, __neg); | |
c2669da9 | 279 | __stack.push(__alt); |
2bde8cac | 280 | __e._M_append(_StateSeqT(*_M_nfa, __alt, __tmp._M_end)); |
c2669da9 TS |
281 | } |
282 | __e._M_append(__end); | |
283 | while (!__stack.empty()) | |
284 | { | |
2bde8cac | 285 | auto& __tmp = (*_M_nfa)[__stack.top()]; |
c2669da9 | 286 | __stack.pop(); |
a8e67466 | 287 | std::swap(__tmp._M_next, __tmp._M_alt); |
c2669da9 TS |
288 | } |
289 | } | |
7c812a2a | 290 | _M_stack.push(__e); |
6cb784b6 | 291 | } |
053eb1f3 TS |
292 | else |
293 | return false; | |
294 | return true; | |
6cb784b6 TS |
295 | } |
296 | ||
472a7639 | 297 | #define __INSERT_REGEX_MATCHER(__func, ...)\ |
2d76fab4 | 298 | do {\ |
ddf41e9d TS |
299 | if (!(_M_flags & regex_constants::icase))\ |
300 | if (!(_M_flags & regex_constants::collate))\ | |
472a7639 | 301 | __func<false, false>(__VA_ARGS__);\ |
ddf41e9d | 302 | else\ |
472a7639 | 303 | __func<false, true>(__VA_ARGS__);\ |
ddf41e9d TS |
304 | else\ |
305 | if (!(_M_flags & regex_constants::collate))\ | |
472a7639 | 306 | __func<true, false>(__VA_ARGS__);\ |
ddf41e9d | 307 | else\ |
472a7639 | 308 | __func<true, true>(__VA_ARGS__);\ |
2d76fab4 | 309 | } while (false) |
ddf41e9d TS |
310 | |
311 | template<typename _TraitsT> | |
6cb784b6 | 312 | bool |
ddf41e9d | 313 | _Compiler<_TraitsT>:: |
6cb784b6 TS |
314 | _M_atom() |
315 | { | |
316 | if (_M_match_token(_ScannerT::_S_token_anychar)) | |
f43cc2a6 | 317 | { |
ddf41e9d TS |
318 | if (!(_M_flags & regex_constants::ECMAScript)) |
319 | __INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix); | |
f43cc2a6 | 320 | else |
ddf41e9d | 321 | __INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma); |
f43cc2a6 | 322 | } |
7c812a2a | 323 | else if (_M_try_char()) |
ddf41e9d | 324 | __INSERT_REGEX_MATCHER(_M_insert_char_matcher); |
7c812a2a | 325 | else if (_M_match_token(_ScannerT::_S_token_backref)) |
2bde8cac | 326 | _M_stack.push(_StateSeqT(*_M_nfa, _M_nfa-> |
7c812a2a TS |
327 | _M_insert_backref(_M_cur_int_value(10)))); |
328 | else if (_M_match_token(_ScannerT::_S_token_quoted_class)) | |
ddf41e9d | 329 | __INSERT_REGEX_MATCHER(_M_insert_character_class_matcher); |
7c812a2a | 330 | else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin)) |
6cb784b6 | 331 | { |
2bde8cac | 332 | _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy()); |
7c812a2a TS |
333 | this->_M_disjunction(); |
334 | if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) | |
236d76c4 TS |
335 | __throw_regex_error(regex_constants::error_paren, |
336 | "Parenthesis is not closed."); | |
7c812a2a TS |
337 | __r._M_append(_M_pop()); |
338 | _M_stack.push(__r); | |
6cb784b6 | 339 | } |
7c812a2a | 340 | else if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) |
6cb784b6 | 341 | { |
2bde8cac | 342 | _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin()); |
6cb784b6 TS |
343 | this->_M_disjunction(); |
344 | if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) | |
236d76c4 TS |
345 | __throw_regex_error(regex_constants::error_paren, |
346 | "Parenthesis is not closed."); | |
7c812a2a | 347 | __r._M_append(_M_pop()); |
2bde8cac | 348 | __r._M_append(_M_nfa->_M_insert_subexpr_end()); |
6cb784b6 | 349 | _M_stack.push(__r); |
6cb784b6 | 350 | } |
7c812a2a TS |
351 | else if (!_M_bracket_expression()) |
352 | return false; | |
353 | return true; | |
6cb784b6 TS |
354 | } |
355 | ||
ddf41e9d | 356 | template<typename _TraitsT> |
6cb784b6 | 357 | bool |
ddf41e9d | 358 | _Compiler<_TraitsT>:: |
6cb784b6 TS |
359 | _M_bracket_expression() |
360 | { | |
33fbbb76 TS |
361 | bool __neg = |
362 | _M_match_token(_ScannerT::_S_token_bracket_neg_begin); | |
363 | if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin))) | |
e280b6ff | 364 | return false; |
ddf41e9d TS |
365 | __INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg); |
366 | return true; | |
367 | } | |
368 | #undef __INSERT_REGEX_MATCHER | |
369 | ||
370 | template<typename _TraitsT> | |
371 | template<bool __icase, bool __collate> | |
372 | void | |
373 | _Compiler<_TraitsT>:: | |
374 | _M_insert_any_matcher_ecma() | |
375 | { | |
2bde8cac TS |
376 | _M_stack.push(_StateSeqT(*_M_nfa, |
377 | _M_nfa->_M_insert_matcher | |
ddf41e9d TS |
378 | (_AnyMatcher<_TraitsT, true, __icase, __collate> |
379 | (_M_traits)))); | |
380 | } | |
381 | ||
382 | template<typename _TraitsT> | |
383 | template<bool __icase, bool __collate> | |
384 | void | |
385 | _Compiler<_TraitsT>:: | |
386 | _M_insert_any_matcher_posix() | |
387 | { | |
2bde8cac TS |
388 | _M_stack.push(_StateSeqT(*_M_nfa, |
389 | _M_nfa->_M_insert_matcher | |
ddf41e9d TS |
390 | (_AnyMatcher<_TraitsT, false, __icase, __collate> |
391 | (_M_traits)))); | |
392 | } | |
393 | ||
394 | template<typename _TraitsT> | |
395 | template<bool __icase, bool __collate> | |
396 | void | |
397 | _Compiler<_TraitsT>:: | |
398 | _M_insert_char_matcher() | |
399 | { | |
2bde8cac TS |
400 | _M_stack.push(_StateSeqT(*_M_nfa, |
401 | _M_nfa->_M_insert_matcher | |
ddf41e9d TS |
402 | (_CharMatcher<_TraitsT, __icase, __collate> |
403 | (_M_value[0], _M_traits)))); | |
404 | } | |
405 | ||
406 | template<typename _TraitsT> | |
407 | template<bool __icase, bool __collate> | |
408 | void | |
409 | _Compiler<_TraitsT>:: | |
410 | _M_insert_character_class_matcher() | |
411 | { | |
2f1e8e7c | 412 | __glibcxx_assert(_M_value.size() == 1); |
ddf41e9d TS |
413 | _BracketMatcher<_TraitsT, __icase, __collate> __matcher |
414 | (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); | |
4dae67e0 | 415 | __matcher._M_add_character_class(_M_value, false); |
ddf41e9d | 416 | __matcher._M_ready(); |
2bde8cac TS |
417 | _M_stack.push(_StateSeqT(*_M_nfa, |
418 | _M_nfa->_M_insert_matcher(std::move(__matcher)))); | |
ddf41e9d TS |
419 | } |
420 | ||
421 | template<typename _TraitsT> | |
422 | template<bool __icase, bool __collate> | |
423 | void | |
424 | _Compiler<_TraitsT>:: | |
425 | _M_insert_bracket_matcher(bool __neg) | |
426 | { | |
427 | _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits); | |
79b576cc TS |
428 | pair<bool, _CharT> __last_char; // Optional<_CharT> |
429 | __last_char.first = false; | |
430 | if (!(_M_flags & regex_constants::ECMAScript)) | |
4aebb4e4 TS |
431 | { |
432 | if (_M_try_char()) | |
433 | { | |
434 | __last_char.first = true; | |
435 | __last_char.second = _M_value[0]; | |
436 | } | |
437 | else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) | |
438 | { | |
439 | __last_char.first = true; | |
440 | __last_char.second = '-'; | |
441 | } | |
442 | } | |
f9ce3c16 | 443 | while (_M_expression_term(__last_char, __matcher)); |
4aebb4e4 TS |
444 | if (__last_char.first) |
445 | __matcher._M_add_char(__last_char.second); | |
f43cc2a6 | 446 | __matcher._M_ready(); |
2bde8cac TS |
447 | _M_stack.push(_StateSeqT( |
448 | *_M_nfa, | |
449 | _M_nfa->_M_insert_matcher(std::move(__matcher)))); | |
6cb784b6 TS |
450 | } |
451 | ||
ddf41e9d TS |
452 | template<typename _TraitsT> |
453 | template<bool __icase, bool __collate> | |
f9ce3c16 | 454 | bool |
ddf41e9d | 455 | _Compiler<_TraitsT>:: |
79b576cc TS |
456 | _M_expression_term(pair<bool, _CharT>& __last_char, |
457 | _BracketMatcher<_TraitsT, __icase, __collate>& __matcher) | |
6cb784b6 | 458 | { |
f9ce3c16 TS |
459 | if (_M_match_token(_ScannerT::_S_token_bracket_end)) |
460 | return false; | |
461 | ||
4aebb4e4 TS |
462 | const auto __push_char = [&](_CharT __ch) |
463 | { | |
464 | if (__last_char.first) | |
465 | __matcher._M_add_char(__last_char.second); | |
466 | else | |
467 | __last_char.first = true; | |
468 | __last_char.second = __ch; | |
469 | }; | |
470 | const auto __flush = [&] | |
471 | { | |
472 | if (__last_char.first) | |
473 | { | |
474 | __matcher._M_add_char(__last_char.second); | |
475 | __last_char.first = false; | |
476 | } | |
477 | }; | |
478 | ||
6cb784b6 | 479 | if (_M_match_token(_ScannerT::_S_token_collsymbol)) |
f9ce3c16 TS |
480 | { |
481 | auto __symbol = __matcher._M_add_collate_element(_M_value); | |
482 | if (__symbol.size() == 1) | |
4aebb4e4 TS |
483 | __push_char(__symbol[0]); |
484 | else | |
485 | __flush(); | |
f9ce3c16 | 486 | } |
7c812a2a | 487 | else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) |
4aebb4e4 TS |
488 | { |
489 | __flush(); | |
490 | __matcher._M_add_equivalence_class(_M_value); | |
491 | } | |
7c812a2a | 492 | else if (_M_match_token(_ScannerT::_S_token_char_class_name)) |
4aebb4e4 TS |
493 | { |
494 | __flush(); | |
495 | __matcher._M_add_character_class(_M_value, false); | |
496 | } | |
497 | else if (_M_try_char()) | |
498 | __push_char(_M_value[0]); | |
f9ce3c16 TS |
499 | // POSIX doesn't allow '-' as a start-range char (say [a-z--0]), |
500 | // except when the '-' is the first or last character in the bracket | |
501 | // expression ([--0]). ECMAScript treats all '-' after a range as a | |
502 | // normal character. Also see above, where _M_expression_term gets called. | |
79b576cc TS |
503 | // |
504 | // As a result, POSIX rejects [-----], but ECMAScript doesn't. | |
505 | // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax. | |
506 | // Clang (3.5) always uses ECMAScript style even in its POSIX syntax. | |
507 | // | |
508 | // It turns out that no one reads BNFs ;) | |
4aebb4e4 | 509 | else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) |
e280b6ff | 510 | { |
79b576cc TS |
511 | if (!__last_char.first) |
512 | { | |
4aebb4e4 | 513 | if (!(_M_flags & regex_constants::ECMAScript)) |
f9ce3c16 TS |
514 | { |
515 | if (_M_match_token(_ScannerT::_S_token_bracket_end)) | |
4aebb4e4 TS |
516 | { |
517 | __push_char('-'); | |
518 | return false; | |
519 | } | |
236d76c4 TS |
520 | __throw_regex_error( |
521 | regex_constants::error_range, | |
522 | "Unexpected dash in bracket expression. For POSIX syntax, " | |
523 | "a dash is not treated literally only when it is at " | |
524 | "beginning or end."); | |
f9ce3c16 | 525 | } |
4aebb4e4 | 526 | __push_char('-'); |
79b576cc TS |
527 | } |
528 | else | |
e280b6ff | 529 | { |
4aebb4e4 | 530 | if (_M_try_char()) |
e280b6ff | 531 | { |
4aebb4e4 TS |
532 | __matcher._M_make_range(__last_char.second, _M_value[0]); |
533 | __last_char.first = false; | |
534 | } | |
535 | else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) | |
536 | { | |
537 | __matcher._M_make_range(__last_char.second, '-'); | |
538 | __last_char.first = false; | |
e280b6ff | 539 | } |
79b576cc TS |
540 | else |
541 | { | |
4aebb4e4 TS |
542 | if (_M_scanner._M_get_token() |
543 | != _ScannerT::_S_token_bracket_end) | |
544 | __throw_regex_error( | |
545 | regex_constants::error_range, | |
546 | "Character is expected after a dash."); | |
547 | __push_char('-'); | |
79b576cc | 548 | } |
e280b6ff | 549 | } |
e280b6ff | 550 | } |
4dae67e0 | 551 | else if (_M_match_token(_ScannerT::_S_token_quoted_class)) |
4aebb4e4 TS |
552 | { |
553 | __flush(); | |
554 | __matcher._M_add_character_class(_M_value, | |
555 | _M_ctype.is(_CtypeT::upper, | |
556 | _M_value[0])); | |
557 | } | |
7c812a2a | 558 | else |
236d76c4 TS |
559 | __throw_regex_error(regex_constants::error_brack, |
560 | "Unexpected character in bracket expression."); | |
f9ce3c16 TS |
561 | |
562 | return true; | |
6cb784b6 TS |
563 | } |
564 | ||
ddf41e9d | 565 | template<typename _TraitsT> |
33fbbb76 | 566 | bool |
ddf41e9d | 567 | _Compiler<_TraitsT>:: |
33fbbb76 TS |
568 | _M_try_char() |
569 | { | |
570 | bool __is_char = false; | |
571 | if (_M_match_token(_ScannerT::_S_token_oct_num)) | |
572 | { | |
573 | __is_char = true; | |
574 | _M_value.assign(1, _M_cur_int_value(8)); | |
575 | } | |
576 | else if (_M_match_token(_ScannerT::_S_token_hex_num)) | |
577 | { | |
578 | __is_char = true; | |
579 | _M_value.assign(1, _M_cur_int_value(16)); | |
580 | } | |
581 | else if (_M_match_token(_ScannerT::_S_token_ord_char)) | |
582 | __is_char = true; | |
583 | return __is_char; | |
584 | } | |
585 | ||
ddf41e9d | 586 | template<typename _TraitsT> |
7c812a2a | 587 | bool |
ddf41e9d | 588 | _Compiler<_TraitsT>:: |
7c812a2a TS |
589 | _M_match_token(_TokenT token) |
590 | { | |
591 | if (token == _M_scanner._M_get_token()) | |
592 | { | |
593 | _M_value = _M_scanner._M_get_value(); | |
594 | _M_scanner._M_advance(); | |
595 | return true; | |
596 | } | |
597 | return false; | |
598 | } | |
599 | ||
ddf41e9d | 600 | template<typename _TraitsT> |
6cb784b6 | 601 | int |
ddf41e9d | 602 | _Compiler<_TraitsT>:: |
6cb784b6 TS |
603 | _M_cur_int_value(int __radix) |
604 | { | |
6cb43087 | 605 | long __v = 0; |
6cb784b6 | 606 | for (typename _StringT::size_type __i = 0; |
33fbbb76 TS |
607 | __i < _M_value.length(); ++__i) |
608 | __v =__v * __radix + _M_traits.value(_M_value[__i], __radix); | |
6cb784b6 TS |
609 | return __v; |
610 | } | |
611 | ||
ddf41e9d | 612 | template<typename _TraitsT, bool __icase, bool __collate> |
7d9d2185 | 613 | bool |
ddf41e9d TS |
614 | _BracketMatcher<_TraitsT, __icase, __collate>:: |
615 | _M_apply(_CharT __ch, false_type) const | |
6cb784b6 | 616 | { |
974afa58 TS |
617 | return [this, __ch] |
618 | { | |
619 | if (std::binary_search(_M_char_set.begin(), _M_char_set.end(), | |
620 | _M_translator._M_translate(__ch))) | |
621 | return true; | |
622 | auto __s = _M_translator._M_transform(__ch); | |
623 | for (auto& __it : _M_range_set) | |
624 | if (_M_translator._M_match_range(__it.first, __it.second, __s)) | |
625 | return true; | |
626 | if (_M_traits.isctype(__ch, _M_class_set)) | |
627 | return true; | |
628 | if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(), | |
629 | _M_traits.transform_primary(&__ch, &__ch+1)) | |
630 | != _M_equiv_set.end()) | |
631 | return true; | |
632 | for (auto& __it : _M_neg_class_set) | |
633 | if (!_M_traits.isctype(__ch, __it)) | |
634 | return true; | |
635 | return false; | |
636 | }() ^ _M_is_non_matching; | |
6cb784b6 | 637 | } |
4a15d842 | 638 | } // namespace __detail |
6cb784b6 TS |
639 | |
640 | _GLIBCXX_END_NAMESPACE_VERSION | |
6cb784b6 | 641 | } // namespace |