]> git.ipfire.org Git - thirdparty/gcc.git/blob - libstdc++-v3/include/bits/regex.tcc
regex_compiler.h (__detail::_BracketMatcher): Reorder members to avoid wasted space...
[thirdparty/gcc.git] / libstdc++-v3 / include / bits / regex.tcc
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 // A non-standard switch to let the user pick the matching algorithm.
32 // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
33 // algorithm will be used. This algorithm is not enabled by default,
34 // and cannot be used if the regex contains back-references, but has better
35 // (polynomial instead of exponential) worst case performance.
36 // See __regex_algo_impl below.
37
38 namespace std _GLIBCXX_VISIBILITY(default)
39 {
40 namespace __detail
41 {
42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43
44 // Result of merging regex_match and regex_search.
45 //
46 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47 // the other one if possible, for test purpose).
48 //
49 // That __match_mode is true means regex_match, else regex_search.
50 template<typename _BiIter, typename _Alloc,
51 typename _CharT, typename _TraitsT,
52 _RegexExecutorPolicy __policy,
53 bool __match_mode>
54 bool
55 __regex_algo_impl(_BiIter __s,
56 _BiIter __e,
57 match_results<_BiIter, _Alloc>& __m,
58 const basic_regex<_CharT, _TraitsT>& __re,
59 regex_constants::match_flag_type __flags)
60 {
61 if (__re._M_automaton == nullptr)
62 return false;
63
64 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
65 __res.resize(__re._M_automaton->_M_sub_count() + 2);
66 for (auto& __it : __res)
67 __it.matched = false;
68
69 // __policy is used by testsuites so that they can use Thompson NFA
70 // without defining a macro. Users should define
71 // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
72 bool __ret;
73 if (!__re._M_automaton->_M_has_backref
74 #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
75 && __policy == _RegexExecutorPolicy::_S_alternate
76 #endif
77 )
78 {
79 _Executor<_BiIter, _Alloc, _TraitsT, false>
80 __executor(__s, __e, __m, __re, __flags);
81 if (__match_mode)
82 __ret = __executor._M_match();
83 else
84 __ret = __executor._M_search();
85 }
86 else
87 {
88 _Executor<_BiIter, _Alloc, _TraitsT, true>
89 __executor(__s, __e, __m, __re, __flags);
90 if (__match_mode)
91 __ret = __executor._M_match();
92 else
93 __ret = __executor._M_search();
94 }
95 if (__ret)
96 {
97 for (auto __it : __res)
98 if (!__it.matched)
99 __it.first = __it.second = __e;
100 auto& __pre = __res[__res.size()-2];
101 auto& __suf = __res[__res.size()-1];
102 if (__match_mode)
103 {
104 __pre.matched = false;
105 __pre.first = __s;
106 __pre.second = __s;
107 __suf.matched = false;
108 __suf.first = __e;
109 __suf.second = __e;
110 }
111 else
112 {
113 __pre.first = __s;
114 __pre.second = __res[0].first;
115 __pre.matched = (__pre.first != __pre.second);
116 __suf.first = __res[0].second;
117 __suf.second = __e;
118 __suf.matched = (__suf.first != __suf.second);
119 }
120 }
121 return __ret;
122 }
123
124 _GLIBCXX_END_NAMESPACE_VERSION
125 }
126
127 _GLIBCXX_BEGIN_NAMESPACE_VERSION
128
129 template<typename _Ch_type>
130 template<typename _Fwd_iter>
131 typename regex_traits<_Ch_type>::string_type
132 regex_traits<_Ch_type>::
133 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
134 {
135 typedef std::ctype<char_type> __ctype_type;
136 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
137
138 static const char* __collatenames[] =
139 {
140 "NUL",
141 "SOH",
142 "STX",
143 "ETX",
144 "EOT",
145 "ENQ",
146 "ACK",
147 "alert",
148 "backspace",
149 "tab",
150 "newline",
151 "vertical-tab",
152 "form-feed",
153 "carriage-return",
154 "SO",
155 "SI",
156 "DLE",
157 "DC1",
158 "DC2",
159 "DC3",
160 "DC4",
161 "NAK",
162 "SYN",
163 "ETB",
164 "CAN",
165 "EM",
166 "SUB",
167 "ESC",
168 "IS4",
169 "IS3",
170 "IS2",
171 "IS1",
172 "space",
173 "exclamation-mark",
174 "quotation-mark",
175 "number-sign",
176 "dollar-sign",
177 "percent-sign",
178 "ampersand",
179 "apostrophe",
180 "left-parenthesis",
181 "right-parenthesis",
182 "asterisk",
183 "plus-sign",
184 "comma",
185 "hyphen",
186 "period",
187 "slash",
188 "zero",
189 "one",
190 "two",
191 "three",
192 "four",
193 "five",
194 "six",
195 "seven",
196 "eight",
197 "nine",
198 "colon",
199 "semicolon",
200 "less-than-sign",
201 "equals-sign",
202 "greater-than-sign",
203 "question-mark",
204 "commercial-at",
205 "A",
206 "B",
207 "C",
208 "D",
209 "E",
210 "F",
211 "G",
212 "H",
213 "I",
214 "J",
215 "K",
216 "L",
217 "M",
218 "N",
219 "O",
220 "P",
221 "Q",
222 "R",
223 "S",
224 "T",
225 "U",
226 "V",
227 "W",
228 "X",
229 "Y",
230 "Z",
231 "left-square-bracket",
232 "backslash",
233 "right-square-bracket",
234 "circumflex",
235 "underscore",
236 "grave-accent",
237 "a",
238 "b",
239 "c",
240 "d",
241 "e",
242 "f",
243 "g",
244 "h",
245 "i",
246 "j",
247 "k",
248 "l",
249 "m",
250 "n",
251 "o",
252 "p",
253 "q",
254 "r",
255 "s",
256 "t",
257 "u",
258 "v",
259 "w",
260 "x",
261 "y",
262 "z",
263 "left-curly-bracket",
264 "vertical-line",
265 "right-curly-bracket",
266 "tilde",
267 "DEL",
268 ""
269 };
270
271 // same as boost
272 //static const char* __digraphs[] =
273 // {
274 // "ae",
275 // "Ae",
276 // "AE",
277 // "ch",
278 // "Ch",
279 // "CH",
280 // "ll",
281 // "Ll",
282 // "LL",
283 // "ss",
284 // "Ss",
285 // "SS",
286 // "nj",
287 // "Nj",
288 // "NJ",
289 // "dz",
290 // "Dz",
291 // "DZ",
292 // "lj",
293 // "Lj",
294 // "LJ",
295 // ""
296 // };
297
298 std::string __s(__last - __first, '?');
299 __fctyp.narrow(__first, __last, '?', &*__s.begin());
300
301 for (unsigned int __i = 0; *__collatenames[__i]; __i++)
302 if (__s == __collatenames[__i])
303 return string_type(1, __fctyp.widen(static_cast<char>(__i)));
304
305 //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
306 // {
307 // const char* __now = __digraphs[__i];
308 // if (__s == __now)
309 // {
310 // string_type ret(__s.size(), __fctyp.widen('?'));
311 // __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
312 // return ret;
313 // }
314 // }
315 return string_type();
316 }
317
318 template<typename _Ch_type>
319 template<typename _Fwd_iter>
320 typename regex_traits<_Ch_type>::char_class_type
321 regex_traits<_Ch_type>::
322 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
323 {
324 typedef std::ctype<char_type> __ctype_type;
325 typedef std::ctype<char> __cctype_type;
326 typedef const pair<const char*, char_class_type> _ClassnameEntry;
327 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
328 const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
329
330 static _ClassnameEntry __classnames[] =
331 {
332 {"d", ctype_base::digit},
333 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
334 {"s", ctype_base::space},
335 {"alnum", ctype_base::alnum},
336 {"alpha", ctype_base::alpha},
337 {"blank", {0, _RegexMask::_S_blank}},
338 {"cntrl", ctype_base::cntrl},
339 {"digit", ctype_base::digit},
340 {"graph", ctype_base::graph},
341 {"lower", ctype_base::lower},
342 {"print", ctype_base::print},
343 {"punct", ctype_base::punct},
344 {"space", ctype_base::space},
345 {"upper", ctype_base::upper},
346 {"xdigit", ctype_base::xdigit},
347 };
348
349 std::string __s(__last - __first, '?');
350 __fctyp.narrow(__first, __last, '?', &__s[0]);
351 __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
352 for (_ClassnameEntry* __it = __classnames;
353 __it < *(&__classnames + 1);
354 ++__it)
355 {
356 if (__s == __it->first)
357 {
358 if (__icase
359 && ((__it->second
360 & (ctype_base::lower | ctype_base::upper)) != 0))
361 return ctype_base::alpha;
362 return __it->second;
363 }
364 }
365 return 0;
366 }
367
368 template<typename _Ch_type>
369 bool
370 regex_traits<_Ch_type>::
371 isctype(_Ch_type __c, char_class_type __f) const
372 {
373 typedef std::ctype<char_type> __ctype_type;
374 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
375
376 return __fctyp.is(__f._M_base, __c)
377 // [[:w:]]
378 || ((__f._M_extended & _RegexMask::_S_under)
379 && __c == __fctyp.widen('_'))
380 // [[:blank:]]
381 || ((__f._M_extended & _RegexMask::_S_blank)
382 && (__c == __fctyp.widen(' ')
383 || __c == __fctyp.widen('\t')));
384 }
385
386 template<typename _Ch_type>
387 int
388 regex_traits<_Ch_type>::
389 value(_Ch_type __ch, int __radix) const
390 {
391 std::basic_istringstream<char_type> __is(string_type(1, __ch));
392 long __v;
393 if (__radix == 8)
394 __is >> std::oct;
395 else if (__radix == 16)
396 __is >> std::hex;
397 __is >> __v;
398 return __is.fail() ? -1 : __v;
399 }
400
401 template<typename _Bi_iter, typename _Alloc>
402 template<typename _Out_iter>
403 _Out_iter match_results<_Bi_iter, _Alloc>::
404 format(_Out_iter __out,
405 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
406 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
407 match_flag_type __flags) const
408 {
409 _GLIBCXX_DEBUG_ASSERT( ready() );
410 regex_traits<char_type> __traits;
411 typedef std::ctype<char_type> __ctype_type;
412 const __ctype_type&
413 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
414
415 auto __output = [&](size_t __idx)
416 {
417 auto& __sub = _Base_type::operator[](__idx);
418 if (__sub.matched)
419 __out = std::copy(__sub.first, __sub.second, __out);
420 };
421
422 if (__flags & regex_constants::format_sed)
423 {
424 for (; __fmt_first != __fmt_last;)
425 if (*__fmt_first == '&')
426 {
427 __output(0);
428 ++__fmt_first;
429 }
430 else if (*__fmt_first == '\\')
431 {
432 if (++__fmt_first != __fmt_last
433 && __fctyp.is(__ctype_type::digit, *__fmt_first))
434 __output(__traits.value(*__fmt_first++, 10));
435 else
436 *__out++ = '\\';
437 }
438 else
439 *__out++ = *__fmt_first++;
440 }
441 else
442 {
443 while (1)
444 {
445 auto __next = std::find(__fmt_first, __fmt_last, '$');
446 if (__next == __fmt_last)
447 break;
448
449 __out = std::copy(__fmt_first, __next, __out);
450
451 auto __eat = [&](char __ch) -> bool
452 {
453 if (*__next == __ch)
454 {
455 ++__next;
456 return true;
457 }
458 return false;
459 };
460
461 if (++__next == __fmt_last)
462 *__out++ = '$';
463 else if (__eat('$'))
464 *__out++ = '$';
465 else if (__eat('&'))
466 __output(0);
467 else if (__eat('`'))
468 __output(_Base_type::size()-2);
469 else if (__eat('\''))
470 __output(_Base_type::size()-1);
471 else if (__fctyp.is(__ctype_type::digit, *__next))
472 {
473 long __num = __traits.value(*__next, 10);
474 if (++__next != __fmt_last
475 && __fctyp.is(__ctype_type::digit, *__next))
476 {
477 __num *= 10;
478 __num += __traits.value(*__next++, 10);
479 }
480 if (0 <= __num && __num < this->size())
481 __output(__num);
482 }
483 else
484 *__out++ = '$';
485 __fmt_first = __next;
486 }
487 __out = std::copy(__fmt_first, __fmt_last, __out);
488 }
489 return __out;
490 }
491
492 template<typename _Out_iter, typename _Bi_iter,
493 typename _Rx_traits, typename _Ch_type>
494 _Out_iter
495 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
496 const basic_regex<_Ch_type, _Rx_traits>& __e,
497 const _Ch_type* __fmt,
498 regex_constants::match_flag_type __flags)
499 {
500 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
501 _IterT __i(__first, __last, __e, __flags);
502 _IterT __end;
503 if (__i == __end)
504 {
505 if (!(__flags & regex_constants::format_no_copy))
506 __out = std::copy(__first, __last, __out);
507 }
508 else
509 {
510 sub_match<_Bi_iter> __last;
511 auto __len = char_traits<_Ch_type>::length(__fmt);
512 for (; __i != __end; ++__i)
513 {
514 if (!(__flags & regex_constants::format_no_copy))
515 __out = std::copy(__i->prefix().first, __i->prefix().second,
516 __out);
517 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
518 __last = __i->suffix();
519 if (__flags & regex_constants::format_first_only)
520 break;
521 }
522 if (!(__flags & regex_constants::format_no_copy))
523 __out = std::copy(__last.first, __last.second, __out);
524 }
525 return __out;
526 }
527
528 template<typename _Bi_iter,
529 typename _Ch_type,
530 typename _Rx_traits>
531 bool
532 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
533 operator==(const regex_iterator& __rhs) const
534 {
535 return (_M_match.empty() && __rhs._M_match.empty())
536 || (_M_begin == __rhs._M_begin
537 && _M_end == __rhs._M_end
538 && _M_pregex == __rhs._M_pregex
539 && _M_flags == __rhs._M_flags
540 && _M_match[0] == __rhs._M_match[0]);
541 }
542
543 template<typename _Bi_iter,
544 typename _Ch_type,
545 typename _Rx_traits>
546 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
547 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
548 operator++()
549 {
550 // In all cases in which the call to regex_search returns true,
551 // match.prefix().first shall be equal to the previous value of
552 // match[0].second, and for each index i in the half-open range
553 // [0, match.size()) for which match[i].matched is true,
554 // match[i].position() shall return distance(begin, match[i].first).
555 // [28.12.1.4.5]
556 if (_M_match[0].matched)
557 {
558 auto __start = _M_match[0].second;
559 auto __prefix_first = _M_match[0].second;
560 if (_M_match[0].first == _M_match[0].second)
561 {
562 if (__start == _M_end)
563 {
564 _M_match = value_type();
565 return *this;
566 }
567 else
568 {
569 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
570 _M_flags
571 | regex_constants::match_not_null
572 | regex_constants::match_continuous))
573 {
574 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
575 _M_match.at(_M_match.size()).first = __prefix_first;
576 _M_match._M_in_iterator = true;
577 _M_match._M_begin = _M_begin;
578 return *this;
579 }
580 else
581 ++__start;
582 }
583 }
584 _M_flags |= regex_constants::match_prev_avail;
585 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
586 {
587 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
588 _M_match.at(_M_match.size()).first = __prefix_first;
589 _M_match._M_in_iterator = true;
590 _M_match._M_begin = _M_begin;
591 }
592 else
593 _M_match = value_type();
594 }
595 return *this;
596 }
597
598 template<typename _Bi_iter,
599 typename _Ch_type,
600 typename _Rx_traits>
601 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
602 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
603 operator=(const regex_token_iterator& __rhs)
604 {
605 _M_position = __rhs._M_position;
606 _M_subs = __rhs._M_subs;
607 _M_n = __rhs._M_n;
608 _M_result = __rhs._M_result;
609 _M_suffix = __rhs._M_suffix;
610 _M_has_m1 = __rhs._M_has_m1;
611 if (__rhs._M_result == &__rhs._M_suffix)
612 _M_result = &_M_suffix;
613 return *this;
614 }
615
616 template<typename _Bi_iter,
617 typename _Ch_type,
618 typename _Rx_traits>
619 bool
620 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
621 operator==(const regex_token_iterator& __rhs) const
622 {
623 if (_M_end_of_seq() && __rhs._M_end_of_seq())
624 return true;
625 if (_M_suffix.matched && __rhs._M_suffix.matched
626 && _M_suffix == __rhs._M_suffix)
627 return true;
628 if (_M_end_of_seq() || _M_suffix.matched
629 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
630 return false;
631 return _M_position == __rhs._M_position
632 && _M_n == __rhs._M_n
633 && _M_subs == __rhs._M_subs;
634 }
635
636 template<typename _Bi_iter,
637 typename _Ch_type,
638 typename _Rx_traits>
639 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
640 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
641 operator++()
642 {
643 _Position __prev = _M_position;
644 if (_M_suffix.matched)
645 *this = regex_token_iterator();
646 else if (_M_n + 1 < _M_subs.size())
647 {
648 _M_n++;
649 _M_result = &_M_current_match();
650 }
651 else
652 {
653 _M_n = 0;
654 ++_M_position;
655 if (_M_position != _Position())
656 _M_result = &_M_current_match();
657 else if (_M_has_m1 && __prev->suffix().length() != 0)
658 {
659 _M_suffix.matched = true;
660 _M_suffix.first = __prev->suffix().first;
661 _M_suffix.second = __prev->suffix().second;
662 _M_result = &_M_suffix;
663 }
664 else
665 *this = regex_token_iterator();
666 }
667 return *this;
668 }
669
670 template<typename _Bi_iter,
671 typename _Ch_type,
672 typename _Rx_traits>
673 void
674 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
675 _M_init(_Bi_iter __a, _Bi_iter __b)
676 {
677 _M_has_m1 = false;
678 for (auto __it : _M_subs)
679 if (__it == -1)
680 {
681 _M_has_m1 = true;
682 break;
683 }
684 if (_M_position != _Position())
685 _M_result = &_M_current_match();
686 else if (_M_has_m1)
687 {
688 _M_suffix.matched = true;
689 _M_suffix.first = __a;
690 _M_suffix.second = __b;
691 _M_result = &_M_suffix;
692 }
693 else
694 _M_result = nullptr;
695 }
696
697 _GLIBCXX_END_NAMESPACE_VERSION
698 } // namespace
699