]> git.ipfire.org Git - thirdparty/gcc.git/blob - libstdc++-v3/include/bits/regex.tcc
re PR libstdc++/64649 (regex_traits::lookup_classname() only works with random access...
[thirdparty/gcc.git] / libstdc++-v3 / include / bits / regex.tcc
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 // A non-standard switch to let the user pick the matching algorithm.
32 // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
33 // algorithm will be used. This algorithm is not enabled by default,
34 // and cannot be used if the regex contains back-references, but has better
35 // (polynomial instead of exponential) worst case performance.
36 // See __regex_algo_impl below.
37
38 namespace std _GLIBCXX_VISIBILITY(default)
39 {
40 namespace __detail
41 {
42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43
44 // Result of merging regex_match and regex_search.
45 //
46 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47 // the other one if possible, for test purpose).
48 //
49 // That __match_mode is true means regex_match, else regex_search.
50 template<typename _BiIter, typename _Alloc,
51 typename _CharT, typename _TraitsT,
52 _RegexExecutorPolicy __policy,
53 bool __match_mode>
54 bool
55 __regex_algo_impl(_BiIter __s,
56 _BiIter __e,
57 match_results<_BiIter, _Alloc>& __m,
58 const basic_regex<_CharT, _TraitsT>& __re,
59 regex_constants::match_flag_type __flags)
60 {
61 if (__re._M_automaton == nullptr)
62 return false;
63
64 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
65 __m._M_begin = __s;
66 __res.resize(__re._M_automaton->_M_sub_count() + 2);
67 for (auto& __it : __res)
68 __it.matched = false;
69
70 // __policy is used by testsuites so that they can use Thompson NFA
71 // without defining a macro. Users should define
72 // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
73 bool __ret;
74 if (!__re._M_automaton->_M_has_backref
75 && !(__re._M_flags & regex_constants::ECMAScript)
76 #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
77 && __policy == _RegexExecutorPolicy::_S_alternate
78 #endif
79 )
80 {
81 _Executor<_BiIter, _Alloc, _TraitsT, false>
82 __executor(__s, __e, __m, __re, __flags);
83 if (__match_mode)
84 __ret = __executor._M_match();
85 else
86 __ret = __executor._M_search();
87 }
88 else
89 {
90 _Executor<_BiIter, _Alloc, _TraitsT, true>
91 __executor(__s, __e, __m, __re, __flags);
92 if (__match_mode)
93 __ret = __executor._M_match();
94 else
95 __ret = __executor._M_search();
96 }
97 if (__ret)
98 {
99 for (auto& __it : __res)
100 if (!__it.matched)
101 __it.first = __it.second = __e;
102 auto& __pre = __res[__res.size()-2];
103 auto& __suf = __res[__res.size()-1];
104 if (__match_mode)
105 {
106 __pre.matched = false;
107 __pre.first = __s;
108 __pre.second = __s;
109 __suf.matched = false;
110 __suf.first = __e;
111 __suf.second = __e;
112 }
113 else
114 {
115 __pre.first = __s;
116 __pre.second = __res[0].first;
117 __pre.matched = (__pre.first != __pre.second);
118 __suf.first = __res[0].second;
119 __suf.second = __e;
120 __suf.matched = (__suf.first != __suf.second);
121 }
122 }
123 return __ret;
124 }
125
126 _GLIBCXX_END_NAMESPACE_VERSION
127 }
128
129 _GLIBCXX_BEGIN_NAMESPACE_VERSION
130
131 template<typename _Ch_type>
132 template<typename _Fwd_iter>
133 typename regex_traits<_Ch_type>::string_type
134 regex_traits<_Ch_type>::
135 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
136 {
137 typedef std::ctype<char_type> __ctype_type;
138 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
139
140 static const char* __collatenames[] =
141 {
142 "NUL",
143 "SOH",
144 "STX",
145 "ETX",
146 "EOT",
147 "ENQ",
148 "ACK",
149 "alert",
150 "backspace",
151 "tab",
152 "newline",
153 "vertical-tab",
154 "form-feed",
155 "carriage-return",
156 "SO",
157 "SI",
158 "DLE",
159 "DC1",
160 "DC2",
161 "DC3",
162 "DC4",
163 "NAK",
164 "SYN",
165 "ETB",
166 "CAN",
167 "EM",
168 "SUB",
169 "ESC",
170 "IS4",
171 "IS3",
172 "IS2",
173 "IS1",
174 "space",
175 "exclamation-mark",
176 "quotation-mark",
177 "number-sign",
178 "dollar-sign",
179 "percent-sign",
180 "ampersand",
181 "apostrophe",
182 "left-parenthesis",
183 "right-parenthesis",
184 "asterisk",
185 "plus-sign",
186 "comma",
187 "hyphen",
188 "period",
189 "slash",
190 "zero",
191 "one",
192 "two",
193 "three",
194 "four",
195 "five",
196 "six",
197 "seven",
198 "eight",
199 "nine",
200 "colon",
201 "semicolon",
202 "less-than-sign",
203 "equals-sign",
204 "greater-than-sign",
205 "question-mark",
206 "commercial-at",
207 "A",
208 "B",
209 "C",
210 "D",
211 "E",
212 "F",
213 "G",
214 "H",
215 "I",
216 "J",
217 "K",
218 "L",
219 "M",
220 "N",
221 "O",
222 "P",
223 "Q",
224 "R",
225 "S",
226 "T",
227 "U",
228 "V",
229 "W",
230 "X",
231 "Y",
232 "Z",
233 "left-square-bracket",
234 "backslash",
235 "right-square-bracket",
236 "circumflex",
237 "underscore",
238 "grave-accent",
239 "a",
240 "b",
241 "c",
242 "d",
243 "e",
244 "f",
245 "g",
246 "h",
247 "i",
248 "j",
249 "k",
250 "l",
251 "m",
252 "n",
253 "o",
254 "p",
255 "q",
256 "r",
257 "s",
258 "t",
259 "u",
260 "v",
261 "w",
262 "x",
263 "y",
264 "z",
265 "left-curly-bracket",
266 "vertical-line",
267 "right-curly-bracket",
268 "tilde",
269 "DEL",
270 };
271
272 string __s;
273 for (; __first != __last; ++__first)
274 __s += __fctyp.narrow(*__first, 0);
275
276 for (const auto& __it : __collatenames)
277 if (__s == __it)
278 return string_type(1, __fctyp.widen(
279 static_cast<char>(&__it - __collatenames)));
280
281 // TODO Add digraph support:
282 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
283
284 return string_type();
285 }
286
287 template<typename _Ch_type>
288 template<typename _Fwd_iter>
289 typename regex_traits<_Ch_type>::char_class_type
290 regex_traits<_Ch_type>::
291 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
292 {
293 typedef std::ctype<char_type> __ctype_type;
294 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
295
296 // Mappings from class name to class mask.
297 static const pair<const char*, char_class_type> __classnames[] =
298 {
299 {"d", ctype_base::digit},
300 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
301 {"s", ctype_base::space},
302 {"alnum", ctype_base::alnum},
303 {"alpha", ctype_base::alpha},
304 {"blank", ctype_base::blank},
305 {"cntrl", ctype_base::cntrl},
306 {"digit", ctype_base::digit},
307 {"graph", ctype_base::graph},
308 {"lower", ctype_base::lower},
309 {"print", ctype_base::print},
310 {"punct", ctype_base::punct},
311 {"space", ctype_base::space},
312 {"upper", ctype_base::upper},
313 {"xdigit", ctype_base::xdigit},
314 };
315
316 string __s;
317 for (; __first != __last; ++__first)
318 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
319
320 for (const auto& __it : __classnames)
321 if (__s == __it.first)
322 {
323 if (__icase
324 && ((__it.second
325 & (ctype_base::lower | ctype_base::upper)) != 0))
326 return ctype_base::alpha;
327 return __it.second;
328 }
329 return 0;
330 }
331
332 template<typename _Ch_type>
333 bool
334 regex_traits<_Ch_type>::
335 isctype(_Ch_type __c, char_class_type __f) const
336 {
337 typedef std::ctype<char_type> __ctype_type;
338 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
339
340 return __fctyp.is(__f._M_base, __c)
341 // [[:w:]]
342 || ((__f._M_extended & _RegexMask::_S_under)
343 && __c == __fctyp.widen('_'));
344 }
345
346 template<typename _Ch_type>
347 int
348 regex_traits<_Ch_type>::
349 value(_Ch_type __ch, int __radix) const
350 {
351 std::basic_istringstream<char_type> __is(string_type(1, __ch));
352 long __v;
353 if (__radix == 8)
354 __is >> std::oct;
355 else if (__radix == 16)
356 __is >> std::hex;
357 __is >> __v;
358 return __is.fail() ? -1 : __v;
359 }
360
361 template<typename _Bi_iter, typename _Alloc>
362 template<typename _Out_iter>
363 _Out_iter match_results<_Bi_iter, _Alloc>::
364 format(_Out_iter __out,
365 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
366 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
367 match_flag_type __flags) const
368 {
369 _GLIBCXX_DEBUG_ASSERT( ready() );
370 regex_traits<char_type> __traits;
371 typedef std::ctype<char_type> __ctype_type;
372 const __ctype_type&
373 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
374
375 auto __output = [&](size_t __idx)
376 {
377 auto& __sub = _Base_type::operator[](__idx);
378 if (__sub.matched)
379 __out = std::copy(__sub.first, __sub.second, __out);
380 };
381
382 if (__flags & regex_constants::format_sed)
383 {
384 for (; __fmt_first != __fmt_last;)
385 if (*__fmt_first == '&')
386 {
387 __output(0);
388 ++__fmt_first;
389 }
390 else if (*__fmt_first == '\\')
391 {
392 if (++__fmt_first != __fmt_last
393 && __fctyp.is(__ctype_type::digit, *__fmt_first))
394 __output(__traits.value(*__fmt_first++, 10));
395 else
396 *__out++ = '\\';
397 }
398 else
399 *__out++ = *__fmt_first++;
400 }
401 else
402 {
403 while (1)
404 {
405 auto __next = std::find(__fmt_first, __fmt_last, '$');
406 if (__next == __fmt_last)
407 break;
408
409 __out = std::copy(__fmt_first, __next, __out);
410
411 auto __eat = [&](char __ch) -> bool
412 {
413 if (*__next == __ch)
414 {
415 ++__next;
416 return true;
417 }
418 return false;
419 };
420
421 if (++__next == __fmt_last)
422 *__out++ = '$';
423 else if (__eat('$'))
424 *__out++ = '$';
425 else if (__eat('&'))
426 __output(0);
427 else if (__eat('`'))
428 __output(_Base_type::size()-2);
429 else if (__eat('\''))
430 __output(_Base_type::size()-1);
431 else if (__fctyp.is(__ctype_type::digit, *__next))
432 {
433 long __num = __traits.value(*__next, 10);
434 if (++__next != __fmt_last
435 && __fctyp.is(__ctype_type::digit, *__next))
436 {
437 __num *= 10;
438 __num += __traits.value(*__next++, 10);
439 }
440 if (0 <= __num && __num < this->size())
441 __output(__num);
442 }
443 else
444 *__out++ = '$';
445 __fmt_first = __next;
446 }
447 __out = std::copy(__fmt_first, __fmt_last, __out);
448 }
449 return __out;
450 }
451
452 template<typename _Out_iter, typename _Bi_iter,
453 typename _Rx_traits, typename _Ch_type>
454 _Out_iter
455 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
456 const basic_regex<_Ch_type, _Rx_traits>& __e,
457 const _Ch_type* __fmt,
458 regex_constants::match_flag_type __flags)
459 {
460 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
461 _IterT __i(__first, __last, __e, __flags);
462 _IterT __end;
463 if (__i == __end)
464 {
465 if (!(__flags & regex_constants::format_no_copy))
466 __out = std::copy(__first, __last, __out);
467 }
468 else
469 {
470 sub_match<_Bi_iter> __last;
471 auto __len = char_traits<_Ch_type>::length(__fmt);
472 for (; __i != __end; ++__i)
473 {
474 if (!(__flags & regex_constants::format_no_copy))
475 __out = std::copy(__i->prefix().first, __i->prefix().second,
476 __out);
477 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
478 __last = __i->suffix();
479 if (__flags & regex_constants::format_first_only)
480 break;
481 }
482 if (!(__flags & regex_constants::format_no_copy))
483 __out = std::copy(__last.first, __last.second, __out);
484 }
485 return __out;
486 }
487
488 template<typename _Bi_iter,
489 typename _Ch_type,
490 typename _Rx_traits>
491 bool
492 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
493 operator==(const regex_iterator& __rhs) const
494 {
495 return (_M_match.empty() && __rhs._M_match.empty())
496 || (_M_begin == __rhs._M_begin
497 && _M_end == __rhs._M_end
498 && _M_pregex == __rhs._M_pregex
499 && _M_flags == __rhs._M_flags
500 && _M_match[0] == __rhs._M_match[0]);
501 }
502
503 template<typename _Bi_iter,
504 typename _Ch_type,
505 typename _Rx_traits>
506 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
507 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
508 operator++()
509 {
510 // In all cases in which the call to regex_search returns true,
511 // match.prefix().first shall be equal to the previous value of
512 // match[0].second, and for each index i in the half-open range
513 // [0, match.size()) for which match[i].matched is true,
514 // match[i].position() shall return distance(begin, match[i].first).
515 // [28.12.1.4.5]
516 if (_M_match[0].matched)
517 {
518 auto __start = _M_match[0].second;
519 auto __prefix_first = _M_match[0].second;
520 if (_M_match[0].first == _M_match[0].second)
521 {
522 if (__start == _M_end)
523 {
524 _M_match = value_type();
525 return *this;
526 }
527 else
528 {
529 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
530 _M_flags
531 | regex_constants::match_not_null
532 | regex_constants::match_continuous))
533 {
534 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
535 auto& __prefix = _M_match.at(_M_match.size());
536 __prefix.first = __prefix_first;
537 __prefix.matched = __prefix.first != __prefix.second;
538 // [28.12.1.4.5]
539 _M_match._M_begin = _M_begin;
540 return *this;
541 }
542 else
543 ++__start;
544 }
545 }
546 _M_flags |= regex_constants::match_prev_avail;
547 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
548 {
549 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
550 auto& __prefix = _M_match.at(_M_match.size());
551 __prefix.first = __prefix_first;
552 __prefix.matched = __prefix.first != __prefix.second;
553 // [28.12.1.4.5]
554 _M_match._M_begin = _M_begin;
555 }
556 else
557 _M_match = value_type();
558 }
559 return *this;
560 }
561
562 template<typename _Bi_iter,
563 typename _Ch_type,
564 typename _Rx_traits>
565 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
566 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
567 operator=(const regex_token_iterator& __rhs)
568 {
569 _M_position = __rhs._M_position;
570 _M_subs = __rhs._M_subs;
571 _M_n = __rhs._M_n;
572 _M_suffix = __rhs._M_suffix;
573 _M_has_m1 = __rhs._M_has_m1;
574 _M_normalize_result();
575 return *this;
576 }
577
578 template<typename _Bi_iter,
579 typename _Ch_type,
580 typename _Rx_traits>
581 bool
582 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
583 operator==(const regex_token_iterator& __rhs) const
584 {
585 if (_M_end_of_seq() && __rhs._M_end_of_seq())
586 return true;
587 if (_M_suffix.matched && __rhs._M_suffix.matched
588 && _M_suffix == __rhs._M_suffix)
589 return true;
590 if (_M_end_of_seq() || _M_suffix.matched
591 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
592 return false;
593 return _M_position == __rhs._M_position
594 && _M_n == __rhs._M_n
595 && _M_subs == __rhs._M_subs;
596 }
597
598 template<typename _Bi_iter,
599 typename _Ch_type,
600 typename _Rx_traits>
601 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
602 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
603 operator++()
604 {
605 _Position __prev = _M_position;
606 if (_M_suffix.matched)
607 *this = regex_token_iterator();
608 else if (_M_n + 1 < _M_subs.size())
609 {
610 _M_n++;
611 _M_result = &_M_current_match();
612 }
613 else
614 {
615 _M_n = 0;
616 ++_M_position;
617 if (_M_position != _Position())
618 _M_result = &_M_current_match();
619 else if (_M_has_m1 && __prev->suffix().length() != 0)
620 {
621 _M_suffix.matched = true;
622 _M_suffix.first = __prev->suffix().first;
623 _M_suffix.second = __prev->suffix().second;
624 _M_result = &_M_suffix;
625 }
626 else
627 *this = regex_token_iterator();
628 }
629 return *this;
630 }
631
632 template<typename _Bi_iter,
633 typename _Ch_type,
634 typename _Rx_traits>
635 void
636 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
637 _M_init(_Bi_iter __a, _Bi_iter __b)
638 {
639 _M_has_m1 = false;
640 for (auto __it : _M_subs)
641 if (__it == -1)
642 {
643 _M_has_m1 = true;
644 break;
645 }
646 if (_M_position != _Position())
647 _M_result = &_M_current_match();
648 else if (_M_has_m1)
649 {
650 _M_suffix.matched = true;
651 _M_suffix.first = __a;
652 _M_suffix.second = __b;
653 _M_result = &_M_suffix;
654 }
655 else
656 _M_result = nullptr;
657 }
658
659 _GLIBCXX_END_NAMESPACE_VERSION
660 } // namespace
661