]> git.ipfire.org Git - thirdparty/gcc.git/blame - libstdc++-v3/include/bits/regex.tcc
Update copyright years in libstdc++-v3/
[thirdparty/gcc.git] / libstdc++-v3 / include / bits / regex.tcc
CommitLineData
c2669da9
TS
1// class template regex -*- C++ -*-
2
aa118a03 3// Copyright (C) 2013-2014 Free Software Foundation, Inc.
c2669da9
TS
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
9f0d9611
TS
31// See below __regex_algo_impl to get what this is talking about. The default
32// value 1 indicated a conservative optimization without giving up worst case
33// performance.
34#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
35#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
36#endif
37
c2669da9
TS
38namespace std _GLIBCXX_VISIBILITY(default)
39{
6cb43087
TS
40namespace __detail
41{
42_GLIBCXX_BEGIN_NAMESPACE_VERSION
43
44 // Result of merging regex_match and regex_search.
45 //
603b781b
TS
46 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47 // the other one if possible, for test purpose).
6cb43087
TS
48 //
49 // That __match_mode is true means regex_match, else regex_search.
50 template<typename _BiIter, typename _Alloc,
51 typename _CharT, typename _TraitsT,
52 _RegexExecutorPolicy __policy,
53 bool __match_mode>
54 bool
55 __regex_algo_impl(_BiIter __s,
56 _BiIter __e,
57 match_results<_BiIter, _Alloc>& __m,
58 const basic_regex<_CharT, _TraitsT>& __re,
59 regex_constants::match_flag_type __flags)
60 {
61 if (__re._M_automaton == nullptr)
62 return false;
63
64 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
65 __res.resize(__re._M_automaton->_M_sub_count() + 2);
66 for (auto& __it : __res)
67 __it.matched = false;
68
9f0d9611
TS
69 // This function decide which executor to use under given circumstances.
70 // The _S_auto policy now is the following: if a NFA has no
71 // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
72 // quantifiers (*, +, ?), the BFS executor will be used, other wise
73 // DFS executor. This is because DFS executor has a exponential upper
74 // bound, but better best-case performace. Meanwhile, BFS executor can
75 // effectively prevent from exponential-long time matching (which must
76 // contains many quantifiers), but it's slower in average.
77 //
78 // For simple regex, BFS executor could be 2 or more times slower than
79 // DFS executor.
80 //
81 // Of course, BFS executor cannot handle back-references.
6cb43087 82 bool __ret;
9f0d9611
TS
83 if (!__re._M_automaton->_M_has_backref
84 && (__policy == _RegexExecutorPolicy::_S_alternate
85 || __re._M_automaton->_M_quant_count
86 > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
87 {
88 _Executor<_BiIter, _Alloc, _TraitsT, false>
89 __executor(__s, __e, __m, __re, __flags);
90 if (__match_mode)
91 __ret = __executor._M_match();
92 else
93 __ret = __executor._M_search();
94 }
6cb43087 95 else
9f0d9611
TS
96 {
97 _Executor<_BiIter, _Alloc, _TraitsT, true>
98 __executor(__s, __e, __m, __re, __flags);
99 if (__match_mode)
100 __ret = __executor._M_match();
101 else
102 __ret = __executor._M_search();
103 }
6cb43087
TS
104 if (__ret)
105 {
106 for (auto __it : __res)
107 if (!__it.matched)
108 __it.first = __it.second = __e;
109 auto& __pre = __res[__res.size()-2];
110 auto& __suf = __res[__res.size()-1];
111 if (__match_mode)
112 {
113 __pre.matched = false;
114 __pre.first = __s;
115 __pre.second = __s;
116 __suf.matched = false;
117 __suf.first = __e;
118 __suf.second = __e;
119 }
120 else
121 {
122 __pre.first = __s;
123 __pre.second = __res[0].first;
124 __pre.matched = (__pre.first != __pre.second);
125 __suf.first = __res[0].second;
126 __suf.second = __e;
127 __suf.matched = (__suf.first != __suf.second);
128 }
129 if (__re.flags() & regex_constants::nosubs)
130 __res.resize(3);
131 }
132 return __ret;
133 }
134
135_GLIBCXX_END_NAMESPACE_VERSION
136}
137
237c8b9d
JW
138_GLIBCXX_BEGIN_NAMESPACE_VERSION
139
c2669da9
TS
140 template<typename _Ch_type>
141 template<typename _Fwd_iter>
142 typename regex_traits<_Ch_type>::string_type
143 regex_traits<_Ch_type>::
144 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
145 {
146 typedef std::ctype<char_type> __ctype_type;
147 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
148
149 static const char* __collatenames[] =
150 {
151 "NUL",
152 "SOH",
153 "STX",
154 "ETX",
155 "EOT",
156 "ENQ",
157 "ACK",
158 "alert",
159 "backspace",
160 "tab",
161 "newline",
162 "vertical-tab",
163 "form-feed",
164 "carriage-return",
165 "SO",
166 "SI",
167 "DLE",
168 "DC1",
169 "DC2",
170 "DC3",
171 "DC4",
172 "NAK",
173 "SYN",
174 "ETB",
175 "CAN",
176 "EM",
177 "SUB",
178 "ESC",
179 "IS4",
180 "IS3",
181 "IS2",
182 "IS1",
183 "space",
184 "exclamation-mark",
185 "quotation-mark",
186 "number-sign",
187 "dollar-sign",
188 "percent-sign",
189 "ampersand",
190 "apostrophe",
191 "left-parenthesis",
192 "right-parenthesis",
193 "asterisk",
194 "plus-sign",
195 "comma",
196 "hyphen",
197 "period",
198 "slash",
199 "zero",
200 "one",
201 "two",
202 "three",
203 "four",
204 "five",
205 "six",
206 "seven",
207 "eight",
208 "nine",
209 "colon",
210 "semicolon",
211 "less-than-sign",
212 "equals-sign",
213 "greater-than-sign",
214 "question-mark",
215 "commercial-at",
216 "A",
217 "B",
218 "C",
219 "D",
220 "E",
221 "F",
222 "G",
223 "H",
224 "I",
225 "J",
226 "K",
227 "L",
228 "M",
229 "N",
230 "O",
231 "P",
232 "Q",
233 "R",
234 "S",
235 "T",
236 "U",
237 "V",
238 "W",
239 "X",
240 "Y",
241 "Z",
242 "left-square-bracket",
243 "backslash",
244 "right-square-bracket",
245 "circumflex",
246 "underscore",
247 "grave-accent",
248 "a",
249 "b",
250 "c",
251 "d",
252 "e",
253 "f",
254 "g",
255 "h",
256 "i",
257 "j",
258 "k",
259 "l",
260 "m",
261 "n",
262 "o",
263 "p",
264 "q",
265 "r",
266 "s",
267 "t",
268 "u",
269 "v",
270 "w",
271 "x",
272 "y",
273 "z",
274 "left-curly-bracket",
275 "vertical-line",
276 "right-curly-bracket",
277 "tilde",
278 "DEL",
279 ""
280 };
281
282 // same as boost
283 //static const char* __digraphs[] =
284 // {
285 // "ae",
286 // "Ae",
287 // "AE",
288 // "ch",
289 // "Ch",
290 // "CH",
291 // "ll",
292 // "Ll",
293 // "LL",
294 // "ss",
295 // "Ss",
296 // "SS",
297 // "nj",
298 // "Nj",
299 // "NJ",
300 // "dz",
301 // "Dz",
302 // "DZ",
303 // "lj",
304 // "Lj",
305 // "LJ",
306 // ""
307 // };
308
309 std::string __s(__last - __first, '?');
310 __fctyp.narrow(__first, __last, '?', &*__s.begin());
311
312 for (unsigned int __i = 0; *__collatenames[__i]; __i++)
313 if (__s == __collatenames[__i])
314 return string_type(1, __fctyp.widen(static_cast<char>(__i)));
315
316 //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
317 // {
318 // const char* __now = __digraphs[__i];
319 // if (__s == __now)
320 // {
321 // string_type ret(__s.size(), __fctyp.widen('?'));
322 // __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
323 // return ret;
324 // }
325 // }
326 return string_type();
327 }
328
329 template<typename _Ch_type>
330 template<typename _Fwd_iter>
331 typename regex_traits<_Ch_type>::char_class_type
332 regex_traits<_Ch_type>::
333 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
334 {
335 typedef std::ctype<char_type> __ctype_type;
336 typedef std::ctype<char> __cctype_type;
337 typedef const pair<const char*, char_class_type> _ClassnameEntry;
338 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
339 const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
340
341 static _ClassnameEntry __classnames[] =
342 {
343 {"d", ctype_base::digit},
344 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
345 {"s", ctype_base::space},
346 {"alnum", ctype_base::alnum},
347 {"alpha", ctype_base::alpha},
348 {"blank", {0, _RegexMask::_S_blank}},
349 {"cntrl", ctype_base::cntrl},
350 {"digit", ctype_base::digit},
351 {"graph", ctype_base::graph},
352 {"lower", ctype_base::lower},
353 {"print", ctype_base::print},
354 {"punct", ctype_base::punct},
355 {"space", ctype_base::space},
356 {"upper", ctype_base::upper},
357 {"xdigit", ctype_base::xdigit},
358 };
359
360 std::string __s(__last - __first, '?');
361 __fctyp.narrow(__first, __last, '?', &__s[0]);
362 __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
363 for (_ClassnameEntry* __it = __classnames;
364 __it < *(&__classnames + 1);
365 ++__it)
366 {
367 if (__s == __it->first)
368 {
369 if (__icase
370 && ((__it->second
371 & (ctype_base::lower | ctype_base::upper)) != 0))
372 return ctype_base::alpha;
373 return __it->second;
374 }
375 }
376 return 0;
377 }
378
379 template<typename _Ch_type>
380 bool
381 regex_traits<_Ch_type>::
382 isctype(_Ch_type __c, char_class_type __f) const
383 {
384 typedef std::ctype<char_type> __ctype_type;
385 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
386
387 return __fctyp.is(__f._M_base, __c)
388 // [[:w:]]
389 || ((__f._M_extended & _RegexMask::_S_under)
390 && __c == __fctyp.widen('_'))
391 // [[:blank:]]
392 || ((__f._M_extended & _RegexMask::_S_blank)
393 && (__c == __fctyp.widen(' ')
394 || __c == __fctyp.widen('\t')));
395 }
396
397 template<typename _Ch_type>
398 int
399 regex_traits<_Ch_type>::
400 value(_Ch_type __ch, int __radix) const
401 {
402 std::basic_istringstream<char_type> __is(string_type(1, __ch));
6cb43087 403 long __v;
c2669da9
TS
404 if (__radix == 8)
405 __is >> std::oct;
406 else if (__radix == 16)
407 __is >> std::hex;
408 __is >> __v;
409 return __is.fail() ? -1 : __v;
410 }
411
412 template<typename _Bi_iter, typename _Alloc>
413 template<typename _Out_iter>
414 _Out_iter match_results<_Bi_iter, _Alloc>::
415 format(_Out_iter __out,
416 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
417 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
418 match_flag_type __flags) const
419 {
420 _GLIBCXX_DEBUG_ASSERT( ready() );
421 regex_traits<char_type> __traits;
422 typedef std::ctype<char_type> __ctype_type;
423 const __ctype_type&
424 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
425
6cb43087 426 auto __output = [&](size_t __idx)
c2669da9
TS
427 {
428 auto& __sub = _Base_type::operator[](__idx);
429 if (__sub.matched)
430 std::copy(__sub.first, __sub.second, __out);
431 };
432
433 if (__flags & regex_constants::format_sed)
434 {
435 for (; __fmt_first != __fmt_last;)
436 if (*__fmt_first == '&')
437 {
438 __output(0);
439 ++__fmt_first;
440 }
441 else if (*__fmt_first == '\\')
442 {
443 if (++__fmt_first != __fmt_last
444 && __fctyp.is(__ctype_type::digit, *__fmt_first))
445 __output(__traits.value(*__fmt_first++, 10));
446 else
447 *__out++ = '\\';
448 }
449 else
450 *__out++ = *__fmt_first++;
451 }
452 else
453 {
454 while (1)
455 {
456 auto __next = std::find(__fmt_first, __fmt_last, '$');
457 if (__next == __fmt_last)
458 break;
459
460 std::copy(__fmt_first, __next, __out);
461
462 auto __eat = [&](char __ch) -> bool
463 {
464 if (*__next == __ch)
465 {
466 ++__next;
467 return true;
468 }
469 return false;
470 };
471
472 if (++__next == __fmt_last)
473 *__out++ = '$';
474 else if (__eat('$'))
475 *__out++ = '$';
476 else if (__eat('&'))
477 __output(0);
478 else if (__eat('`'))
479 __output(_Base_type::size()-2);
480 else if (__eat('\''))
481 __output(_Base_type::size()-1);
482 else if (__fctyp.is(__ctype_type::digit, *__next))
483 {
6cb43087 484 long __num = __traits.value(*__next, 10);
c2669da9
TS
485 if (++__next != __fmt_last
486 && __fctyp.is(__ctype_type::digit, *__next))
487 {
488 __num *= 10;
489 __num += __traits.value(*__next++, 10);
490 }
491 if (0 <= __num && __num < this->size())
492 __output(__num);
493 }
494 else
495 *__out++ = '$';
496 __fmt_first = __next;
497 }
498 std::copy(__fmt_first, __fmt_last, __out);
499 }
500 return __out;
501 }
502
c2669da9
TS
503 template<typename _Out_iter, typename _Bi_iter,
504 typename _Rx_traits, typename _Ch_type>
505 _Out_iter
506 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
507 const basic_regex<_Ch_type, _Rx_traits>& __e,
508 const _Ch_type* __fmt,
6cb43087 509 regex_constants::match_flag_type __flags)
c2669da9
TS
510 {
511 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
512 _IterT __i(__first, __last, __e, __flags);
513 _IterT __end;
514 if (__i == __end)
515 {
516 if (!(__flags & regex_constants::format_no_copy))
517 std::copy(__first, __last, __out);
518 }
519 else
520 {
521 sub_match<_Bi_iter> __last;
522 auto __len = char_traits<_Ch_type>::length(__fmt);
523 for (; __i != __end; ++__i)
524 {
525 if (!(__flags & regex_constants::format_no_copy))
526 std::copy(__i->prefix().first, __i->prefix().second, __out);
527 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
528 __last = __i->suffix();
529 if (__flags & regex_constants::format_first_only)
530 break;
531 }
532 if (!(__flags & regex_constants::format_no_copy))
533 std::copy(__last.first, __last.second, __out);
534 }
535 return __out;
536 }
537
538 template<typename _Bi_iter,
539 typename _Ch_type,
540 typename _Rx_traits>
541 bool
542 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
543 operator==(const regex_iterator& __rhs) const
544 {
545 return (_M_match.empty() && __rhs._M_match.empty())
546 || (_M_begin == __rhs._M_begin
547 && _M_end == __rhs._M_end
548 && _M_pregex == __rhs._M_pregex
549 && _M_flags == __rhs._M_flags
550 && _M_match[0] == __rhs._M_match[0]);
551 }
552
553 template<typename _Bi_iter,
554 typename _Ch_type,
555 typename _Rx_traits>
556 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
557 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
558 operator++()
559 {
560 // In all cases in which the call to regex_search returns true,
561 // match.prefix().first shall be equal to the previous value of
562 // match[0].second, and for each index i in the half-open range
563 // [0, match.size()) for which match[i].matched is true,
564 // match[i].position() shall return distance(begin, match[i].first).
565 // [28.12.1.4.5]
566 if (_M_match[0].matched)
567 {
568 auto __start = _M_match[0].second;
569 auto __prefix_first = _M_match[0].second;
570 if (_M_match[0].first == _M_match[0].second)
ab1c993b
TS
571 {
572 if (__start == _M_end)
573 {
574 _M_match = value_type();
575 return *this;
576 }
577 else
578 {
579 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
580 _M_flags
581 | regex_constants::match_not_null
582 | regex_constants::match_continuous))
583 {
584 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
585 _M_match.at(_M_match.size()).first = __prefix_first;
586 _M_match._M_in_iterator = true;
587 _M_match._M_begin = _M_begin;
588 return *this;
589 }
590 else
591 ++__start;
592 }
593 }
c2669da9
TS
594 _M_flags |= regex_constants::match_prev_avail;
595 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
596 {
597 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
598 _M_match.at(_M_match.size()).first = __prefix_first;
599 _M_match._M_in_iterator = true;
600 _M_match._M_begin = _M_begin;
601 }
602 else
603 _M_match = value_type();
604 }
605 return *this;
606 }
607
608 template<typename _Bi_iter,
609 typename _Ch_type,
610 typename _Rx_traits>
611 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
612 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
613 operator=(const regex_token_iterator& __rhs)
614 {
615 _M_position = __rhs._M_position;
616 _M_subs = __rhs._M_subs;
617 _M_n = __rhs._M_n;
618 _M_result = __rhs._M_result;
619 _M_suffix = __rhs._M_suffix;
620 _M_has_m1 = __rhs._M_has_m1;
621 if (__rhs._M_result == &__rhs._M_suffix)
622 _M_result = &_M_suffix;
703344ca 623 return *this;
c2669da9
TS
624 }
625
626 template<typename _Bi_iter,
627 typename _Ch_type,
628 typename _Rx_traits>
629 bool
630 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
631 operator==(const regex_token_iterator& __rhs) const
632 {
633 if (_M_end_of_seq() && __rhs._M_end_of_seq())
634 return true;
635 if (_M_suffix.matched && __rhs._M_suffix.matched
636 && _M_suffix == __rhs._M_suffix)
637 return true;
638 if (_M_end_of_seq() || _M_suffix.matched
639 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
640 return false;
641 return _M_position == __rhs._M_position
642 && _M_n == __rhs._M_n
643 && _M_subs == __rhs._M_subs;
644 }
645
646 template<typename _Bi_iter,
647 typename _Ch_type,
648 typename _Rx_traits>
649 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
650 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
651 operator++()
652 {
653 _Position __prev = _M_position;
654 if (_M_suffix.matched)
655 *this = regex_token_iterator();
656 else if (_M_n + 1 < _M_subs.size())
657 {
658 _M_n++;
659 _M_result = &_M_current_match();
660 }
661 else
662 {
663 _M_n = 0;
664 ++_M_position;
665 if (_M_position != _Position())
666 _M_result = &_M_current_match();
667 else if (_M_has_m1 && __prev->suffix().length() != 0)
668 {
669 _M_suffix.matched = true;
670 _M_suffix.first = __prev->suffix().first;
671 _M_suffix.second = __prev->suffix().second;
672 _M_result = &_M_suffix;
673 }
674 else
675 *this = regex_token_iterator();
676 }
677 return *this;
678 }
679
680 template<typename _Bi_iter,
681 typename _Ch_type,
682 typename _Rx_traits>
683 void
684 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
685 _M_init(_Bi_iter __a, _Bi_iter __b)
686 {
687 _M_has_m1 = false;
688 for (auto __it : _M_subs)
689 if (__it == -1)
690 {
691 _M_has_m1 = true;
692 break;
693 }
694 if (_M_position != _Position())
695 _M_result = &_M_current_match();
696 else if (_M_has_m1)
697 {
698 _M_suffix.matched = true;
699 _M_suffix.first = __a;
700 _M_suffix.second = __b;
701 _M_result = &_M_suffix;
702 }
703 else
704 _M_result = nullptr;
705 }
706
707_GLIBCXX_END_NAMESPACE_VERSION
708} // namespace
709