]>
Commit | Line | Data |
---|---|---|
6adffd3f | 1 | // class template regex -*- C++ -*- |
2 | ||
f1717362 | 3 | // Copyright (C) 2010-2016 Free Software Foundation, Inc. |
6adffd3f | 4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | /** | |
5846aeac | 26 | * @file bits/regex_compiler.h |
27 | * This is an internal header file, included by other library headers. | |
28 | * Do not attempt to use it directly. @headername{regex} | |
6adffd3f | 29 | */ |
30 | ||
2948dd21 | 31 | namespace std _GLIBCXX_VISIBILITY(default) |
32 | { | |
49787d3e | 33 | namespace __detail |
6adffd3f | 34 | { |
c3007d7f | 35 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
36 | ||
49787d3e | 37 | /** |
38 | * @addtogroup regex-detail | |
39 | * @{ | |
40 | */ | |
41 | ||
0cb70e14 | 42 | template<typename, bool, bool> |
24ef3585 | 43 | struct _BracketMatcher; |
6adffd3f | 44 | |
7421bffd | 45 | /** |
ec42ae9c | 46 | * @brief Builds an NFA from an input iterator range. |
7421bffd | 47 | * |
48 | * The %_TraitsT type should fulfill requirements [28.3]. | |
49 | */ | |
0cb70e14 | 50 | template<typename _TraitsT> |
6adffd3f | 51 | class _Compiler |
52 | { | |
53 | public: | |
0cb70e14 | 54 | typedef typename _TraitsT::char_type _CharT; |
55 | typedef const _CharT* _IterT; | |
ddefd36e | 56 | typedef _NFA<_TraitsT> _RegexT; |
988f5995 | 57 | typedef regex_constants::syntax_option_type _FlagT; |
6adffd3f | 58 | |
0cb70e14 | 59 | _Compiler(_IterT __b, _IterT __e, |
611c73ea | 60 | const typename _TraitsT::locale_type& __traits, _FlagT __flags); |
6adffd3f | 61 | |
ef4ea6b7 | 62 | shared_ptr<const _RegexT> |
44f5b9b2 | 63 | _M_get_nfa() |
611c73ea | 64 | { return std::move(_M_nfa); } |
6adffd3f | 65 | |
66 | private: | |
0cb70e14 | 67 | typedef _Scanner<_CharT> _ScannerT; |
68 | typedef typename _TraitsT::string_type _StringT; | |
69 | typedef typename _ScannerT::_TokenT _TokenT; | |
70 | typedef _StateSeq<_TraitsT> _StateSeqT; | |
71 | typedef std::stack<_StateSeqT> _StackT; | |
72 | typedef std::ctype<_CharT> _CtypeT; | |
6adffd3f | 73 | |
74 | // accepts a specific token or returns false. | |
75 | bool | |
7eb33f7e | 76 | _M_match_token(_TokenT __token); |
6adffd3f | 77 | |
78 | void | |
79 | _M_disjunction(); | |
80 | ||
680e52bb | 81 | void |
6adffd3f | 82 | _M_alternative(); |
83 | ||
84 | bool | |
85 | _M_term(); | |
86 | ||
87 | bool | |
88 | _M_assertion(); | |
89 | ||
3b875a9d | 90 | bool |
6adffd3f | 91 | _M_quantifier(); |
92 | ||
93 | bool | |
94 | _M_atom(); | |
95 | ||
96 | bool | |
97 | _M_bracket_expression(); | |
98 | ||
0cb70e14 | 99 | template<bool __icase, bool __collate> |
100 | void | |
101 | _M_insert_any_matcher_ecma(); | |
6adffd3f | 102 | |
0cb70e14 | 103 | template<bool __icase, bool __collate> |
104 | void | |
105 | _M_insert_any_matcher_posix(); | |
6adffd3f | 106 | |
0cb70e14 | 107 | template<bool __icase, bool __collate> |
108 | void | |
109 | _M_insert_char_matcher(); | |
6adffd3f | 110 | |
0cb70e14 | 111 | template<bool __icase, bool __collate> |
112 | void | |
113 | _M_insert_character_class_matcher(); | |
6adffd3f | 114 | |
0cb70e14 | 115 | template<bool __icase, bool __collate> |
116 | void | |
117 | _M_insert_bracket_matcher(bool __neg); | |
118 | ||
940cb7d3 | 119 | // Returns true if successfully matched one term and should continue. |
120 | // Returns false if the compiler should move on. | |
0cb70e14 | 121 | template<bool __icase, bool __collate> |
940cb7d3 | 122 | bool |
3167598c | 123 | _M_expression_term(pair<bool, _CharT>& __last_char, |
124 | _BracketMatcher<_TraitsT, __icase, __collate>& | |
0cb70e14 | 125 | __matcher); |
6adffd3f | 126 | |
127 | int | |
128 | _M_cur_int_value(int __radix); | |
129 | ||
24ef3585 | 130 | bool |
131 | _M_try_char(); | |
132 | ||
d5ac524f | 133 | _StateSeqT |
134 | _M_pop() | |
135 | { | |
136 | auto ret = _M_stack.top(); | |
137 | _M_stack.pop(); | |
138 | return ret; | |
139 | } | |
24ef3585 | 140 | |
611c73ea | 141 | _FlagT _M_flags; |
142 | _ScannerT _M_scanner; | |
143 | shared_ptr<_RegexT> _M_nfa; | |
144 | _StringT _M_value; | |
145 | _StackT _M_stack; | |
146 | const _TraitsT& _M_traits; | |
147 | const _CtypeT& _M_ctype; | |
6adffd3f | 148 | }; |
149 | ||
ef4ea6b7 | 150 | template<typename _Tp> |
151 | struct __has_contiguous_iter : std::false_type { }; | |
152 | ||
153 | template<typename _Ch, typename _Tr, typename _Alloc> | |
154 | struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>> | |
155 | : std::true_type | |
156 | { }; | |
157 | ||
158 | template<typename _Tp, typename _Alloc> | |
159 | struct __has_contiguous_iter<std::vector<_Tp, _Alloc>> | |
160 | : std::true_type | |
161 | { }; | |
162 | ||
163 | template<typename _Tp> | |
164 | struct __is_contiguous_normal_iter : std::false_type { }; | |
165 | ||
166 | template<typename _CharT> | |
167 | struct __is_contiguous_normal_iter<_CharT*> : std::true_type { }; | |
168 | ||
169 | template<typename _Tp, typename _Cont> | |
170 | struct | |
171 | __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>> | |
172 | : __has_contiguous_iter<_Cont>::type | |
173 | { }; | |
174 | ||
175 | template<typename _Iter, typename _TraitsT> | |
176 | using __enable_if_contiguous_normal_iter | |
177 | = typename enable_if< __is_contiguous_normal_iter<_Iter>::value, | |
178 | std::shared_ptr<const _NFA<_TraitsT>> >::type; | |
179 | ||
180 | template<typename _Iter, typename _TraitsT> | |
181 | using __disable_if_contiguous_normal_iter | |
182 | = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value, | |
183 | std::shared_ptr<const _NFA<_TraitsT>> >::type; | |
184 | ||
185 | template<typename _FwdIter, typename _TraitsT> | |
186 | inline __enable_if_contiguous_normal_iter<_FwdIter, _TraitsT> | |
187 | __compile_nfa(_FwdIter __first, _FwdIter __last, | |
611c73ea | 188 | const typename _TraitsT::locale_type& __loc, |
ddefd36e | 189 | regex_constants::syntax_option_type __flags) |
190 | { | |
ef4ea6b7 | 191 | size_t __len = __last - __first; |
192 | const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr; | |
0cb70e14 | 193 | using _Cmplr = _Compiler<_TraitsT>; |
ef4ea6b7 | 194 | return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa(); |
195 | } | |
196 | ||
197 | template<typename _FwdIter, typename _TraitsT> | |
198 | inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT> | |
199 | __compile_nfa(_FwdIter __first, _FwdIter __last, | |
200 | const typename _TraitsT::locale_type& __loc, | |
201 | regex_constants::syntax_option_type __flags) | |
202 | { | |
203 | basic_string<typename _TraitsT::char_type> __str(__first, __last); | |
204 | return __compile_nfa(__str.data(), __str.data() + __str.size(), __loc, | |
205 | __flags); | |
ddefd36e | 206 | } |
207 | ||
0cb70e14 | 208 | // [28.13.14] |
209 | template<typename _TraitsT, bool __icase, bool __collate> | |
210 | class _RegexTranslator | |
0722e55f | 211 | { |
0cb70e14 | 212 | public: |
213 | typedef typename _TraitsT::char_type _CharT; | |
214 | typedef typename _TraitsT::string_type _StringT; | |
215 | typedef typename std::conditional<__collate, | |
216 | _StringT, | |
217 | _CharT>::type _StrTransT; | |
218 | ||
219 | explicit | |
220 | _RegexTranslator(const _TraitsT& __traits) | |
221 | : _M_traits(__traits) | |
222 | { } | |
223 | ||
224 | _CharT | |
225 | _M_translate(_CharT __ch) const | |
226 | { | |
227 | if (__icase) | |
228 | return _M_traits.translate_nocase(__ch); | |
229 | else if (__collate) | |
230 | return _M_traits.translate(__ch); | |
231 | else | |
232 | return __ch; | |
233 | } | |
234 | ||
235 | _StrTransT | |
236 | _M_transform(_CharT __ch) const | |
237 | { | |
238 | return _M_transform_impl(__ch, typename integral_constant<bool, | |
239 | __collate>::type()); | |
240 | } | |
241 | ||
242 | private: | |
243 | _StrTransT | |
244 | _M_transform_impl(_CharT __ch, false_type) const | |
245 | { return __ch; } | |
246 | ||
247 | _StrTransT | |
248 | _M_transform_impl(_CharT __ch, true_type) const | |
249 | { | |
250 | _StrTransT __str = _StrTransT(1, _M_translate(__ch)); | |
251 | return _M_traits.transform(__str.begin(), __str.end()); | |
252 | } | |
0722e55f | 253 | |
0cb70e14 | 254 | const _TraitsT& _M_traits; |
255 | }; | |
256 | ||
257 | template<typename _TraitsT> | |
258 | class _RegexTranslator<_TraitsT, false, false> | |
094a5031 | 259 | { |
0cb70e14 | 260 | public: |
261 | typedef typename _TraitsT::char_type _CharT; | |
262 | typedef _CharT _StrTransT; | |
263 | ||
264 | explicit | |
d3564844 | 265 | _RegexTranslator(const _TraitsT&) |
0cb70e14 | 266 | { } |
267 | ||
268 | _CharT | |
269 | _M_translate(_CharT __ch) const | |
270 | { return __ch; } | |
271 | ||
272 | _StrTransT | |
273 | _M_transform(_CharT __ch) const | |
274 | { return __ch; } | |
275 | }; | |
276 | ||
277 | template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate> | |
278 | struct _AnyMatcher; | |
279 | ||
280 | template<typename _TraitsT, bool __icase, bool __collate> | |
281 | struct _AnyMatcher<_TraitsT, false, __icase, __collate> | |
282 | { | |
283 | typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; | |
284 | typedef typename _TransT::_CharT _CharT; | |
535d22b6 | 285 | |
094a5031 | 286 | explicit |
287 | _AnyMatcher(const _TraitsT& __traits) | |
0cb70e14 | 288 | : _M_translator(__traits) |
289 | { } | |
290 | ||
291 | bool | |
292 | operator()(_CharT __ch) const | |
293 | { | |
294 | static auto __nul = _M_translator._M_translate('\0'); | |
295 | return _M_translator._M_translate(__ch) != __nul; | |
296 | } | |
297 | ||
298 | _TransT _M_translator; | |
299 | }; | |
300 | ||
301 | template<typename _TraitsT, bool __icase, bool __collate> | |
302 | struct _AnyMatcher<_TraitsT, true, __icase, __collate> | |
303 | { | |
304 | typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; | |
305 | typedef typename _TransT::_CharT _CharT; | |
306 | ||
307 | explicit | |
308 | _AnyMatcher(const _TraitsT& __traits) | |
309 | : _M_translator(__traits) | |
094a5031 | 310 | { } |
311 | ||
312 | bool | |
313 | operator()(_CharT __ch) const | |
9332e082 | 314 | { return _M_apply(__ch, typename is_same<_CharT, char>::type()); } |
315 | ||
316 | bool | |
317 | _M_apply(_CharT __ch, true_type) const | |
094a5031 | 318 | { |
0cb70e14 | 319 | auto __c = _M_translator._M_translate(__ch); |
320 | auto __n = _M_translator._M_translate('\n'); | |
321 | auto __r = _M_translator._M_translate('\r'); | |
322 | return __c != __n && __c != __r; | |
9332e082 | 323 | } |
324 | ||
325 | bool | |
326 | _M_apply(_CharT __ch, false_type) const | |
327 | { | |
0cb70e14 | 328 | auto __c = _M_translator._M_translate(__ch); |
329 | auto __n = _M_translator._M_translate('\n'); | |
330 | auto __r = _M_translator._M_translate('\r'); | |
331 | auto __u2028 = _M_translator._M_translate(u'\u2028'); | |
332 | auto __u2029 = _M_translator._M_translate(u'\u2029'); | |
333 | return __c != __n && __c != __r && __c != __u2028 && __c != __u2029; | |
094a5031 | 334 | } |
335 | ||
0cb70e14 | 336 | _TransT _M_translator; |
094a5031 | 337 | }; |
338 | ||
0cb70e14 | 339 | template<typename _TraitsT, bool __icase, bool __collate> |
094a5031 | 340 | struct _CharMatcher |
341 | { | |
0cb70e14 | 342 | typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; |
343 | typedef typename _TransT::_CharT _CharT; | |
094a5031 | 344 | |
9332e082 | 345 | _CharMatcher(_CharT __ch, const _TraitsT& __traits) |
0cb70e14 | 346 | : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch)) |
094a5031 | 347 | { } |
348 | ||
349 | bool | |
350 | operator()(_CharT __ch) const | |
0cb70e14 | 351 | { return _M_ch == _M_translator._M_translate(__ch); } |
094a5031 | 352 | |
0cb70e14 | 353 | _TransT _M_translator; |
354 | _CharT _M_ch; | |
094a5031 | 355 | }; |
356 | ||
24ef3585 | 357 | /// Matches a character range (bracket expression) |
0cb70e14 | 358 | template<typename _TraitsT, bool __icase, bool __collate> |
24ef3585 | 359 | struct _BracketMatcher |
360 | { | |
9332e082 | 361 | public: |
0cb70e14 | 362 | typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; |
363 | typedef typename _TransT::_CharT _CharT; | |
364 | typedef typename _TransT::_StrTransT _StrTransT; | |
365 | typedef typename _TraitsT::string_type _StringT; | |
366 | typedef typename _TraitsT::char_class_type _CharClassT; | |
24ef3585 | 367 | |
9332e082 | 368 | public: |
24ef3585 | 369 | _BracketMatcher(bool __is_non_matching, |
0cb70e14 | 370 | const _TraitsT& __traits) |
371 | : _M_class_set(0), _M_translator(__traits), _M_traits(__traits), | |
372 | _M_is_non_matching(__is_non_matching) | |
24ef3585 | 373 | { } |
374 | ||
375 | bool | |
9332e082 | 376 | operator()(_CharT __ch) const |
377 | { | |
378 | _GLIBCXX_DEBUG_ASSERT(_M_is_ready); | |
ec42ae9c | 379 | return _M_apply(__ch, _UseCache()); |
9332e082 | 380 | } |
24ef3585 | 381 | |
382 | void | |
383 | _M_add_char(_CharT __c) | |
9332e082 | 384 | { |
0cb70e14 | 385 | _M_char_set.push_back(_M_translator._M_translate(__c)); |
52278ff6 | 386 | _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); |
9332e082 | 387 | } |
24ef3585 | 388 | |
940cb7d3 | 389 | _StringT |
390 | _M_add_collate_element(const _StringT& __s) | |
24ef3585 | 391 | { |
392 | auto __st = _M_traits.lookup_collatename(__s.data(), | |
393 | __s.data() + __s.size()); | |
394 | if (__st.empty()) | |
ca083a8f | 395 | __throw_regex_error(regex_constants::error_collate, |
396 | "Invalid collate element."); | |
0cb70e14 | 397 | _M_char_set.push_back(_M_translator._M_translate(__st[0])); |
52278ff6 | 398 | _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); |
940cb7d3 | 399 | return __st; |
24ef3585 | 400 | } |
401 | ||
402 | void | |
403 | _M_add_equivalence_class(const _StringT& __s) | |
404 | { | |
45ae37cf | 405 | auto __st = _M_traits.lookup_collatename(__s.data(), |
406 | __s.data() + __s.size()); | |
407 | if (__st.empty()) | |
ca083a8f | 408 | __throw_regex_error(regex_constants::error_collate, |
409 | "Invalid equivalence class."); | |
45ae37cf | 410 | __st = _M_traits.transform_primary(__st.data(), |
411 | __st.data() + __st.size()); | |
0cb70e14 | 412 | _M_equiv_set.push_back(__st); |
52278ff6 | 413 | _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); |
24ef3585 | 414 | } |
415 | ||
9de6d0d9 | 416 | // __neg should be true for \D, \S and \W only. |
24ef3585 | 417 | void |
9de6d0d9 | 418 | _M_add_character_class(const _StringT& __s, bool __neg) |
24ef3585 | 419 | { |
45ae37cf | 420 | auto __mask = _M_traits.lookup_classname(__s.data(), |
421 | __s.data() + __s.size(), | |
0cb70e14 | 422 | __icase); |
45ae37cf | 423 | if (__mask == 0) |
ca083a8f | 424 | __throw_regex_error(regex_constants::error_collate, |
425 | "Invalid character class."); | |
9de6d0d9 | 426 | if (!__neg) |
427 | _M_class_set |= __mask; | |
428 | else | |
429 | _M_neg_class_set.push_back(__mask); | |
52278ff6 | 430 | _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); |
24ef3585 | 431 | } |
432 | ||
433 | void | |
434 | _M_make_range(_CharT __l, _CharT __r) | |
435 | { | |
3167598c | 436 | if (__l > __r) |
ca083a8f | 437 | __throw_regex_error(regex_constants::error_range, |
438 | "Invalid range in bracket expression."); | |
0cb70e14 | 439 | _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), |
9de6d0d9 | 440 | _M_translator._M_transform(__r))); |
52278ff6 | 441 | _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); |
24ef3585 | 442 | } |
443 | ||
9332e082 | 444 | void |
445 | _M_ready() | |
446 | { | |
ec42ae9c | 447 | std::sort(_M_char_set.begin(), _M_char_set.end()); |
448 | auto __end = std::unique(_M_char_set.begin(), _M_char_set.end()); | |
449 | _M_char_set.erase(__end, _M_char_set.end()); | |
450 | _M_make_cache(_UseCache()); | |
52278ff6 | 451 | _GLIBCXX_DEBUG_ONLY(_M_is_ready = true); |
9332e082 | 452 | } |
453 | ||
454 | private: | |
ec42ae9c | 455 | // Currently we only use the cache for char |
456 | typedef typename std::is_same<_CharT, char>::type _UseCache; | |
457 | ||
458 | static constexpr size_t | |
4f6fa302 | 459 | _S_cache_size() |
460 | { | |
461 | return 1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value)); | |
462 | } | |
ec42ae9c | 463 | |
9332e082 | 464 | struct _Dummy { }; |
ec42ae9c | 465 | typedef typename std::conditional<_UseCache::value, |
466 | std::bitset<_S_cache_size()>, | |
467 | _Dummy>::type _CacheT; | |
468 | typedef typename std::make_unsigned<_CharT>::type _UnsignedCharT; | |
9332e082 | 469 | |
9332e082 | 470 | bool |
471 | _M_apply(_CharT __ch, false_type) const; | |
472 | ||
473 | bool | |
474 | _M_apply(_CharT __ch, true_type) const | |
475 | { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; } | |
476 | ||
9332e082 | 477 | void |
478 | _M_make_cache(true_type) | |
479 | { | |
ec42ae9c | 480 | for (unsigned __i = 0; __i < _M_cache.size(); __i++) |
481 | _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type()); | |
9332e082 | 482 | } |
483 | ||
484 | void | |
485 | _M_make_cache(false_type) | |
486 | { } | |
487 | ||
488 | private: | |
0cb70e14 | 489 | std::vector<_CharT> _M_char_set; |
490 | std::vector<_StringT> _M_equiv_set; | |
491 | std::vector<pair<_StrTransT, _StrTransT>> _M_range_set; | |
9de6d0d9 | 492 | std::vector<_CharClassT> _M_neg_class_set; |
0cb70e14 | 493 | _CharClassT _M_class_set; |
494 | _TransT _M_translator; | |
495 | const _TraitsT& _M_traits; | |
496 | bool _M_is_non_matching; | |
ec42ae9c | 497 | _CacheT _M_cache; |
9332e082 | 498 | #ifdef _GLIBCXX_DEBUG |
52278ff6 | 499 | bool _M_is_ready = false; |
9332e082 | 500 | #endif |
24ef3585 | 501 | }; |
502 | ||
49787d3e | 503 | //@} regex-detail |
2948dd21 | 504 | _GLIBCXX_END_NAMESPACE_VERSION |
49787d3e | 505 | } // namespace __detail |
c3007d7f | 506 | } // namespace std |
988f5995 | 507 | |
508 | #include <bits/regex_compiler.tcc> |