1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013-2024 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std _GLIBCXX_VISIBILITY(default)
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
37 /// @cond undocumented
39 // Result of merging regex_match and regex_search.
41 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42 // the other one if possible, for test purpose).
44 // That __match_mode is true means regex_match, else regex_search.
45 template<typename _BiIter, typename _Alloc,
46 typename _CharT, typename _TraitsT>
48 __regex_algo_impl(_BiIter __s,
50 match_results<_BiIter, _Alloc>& __m,
51 const basic_regex<_CharT, _TraitsT>& __re,
52 regex_constants::match_flag_type __flags,
53 _RegexExecutorPolicy __policy,
56 if (__re._M_automaton == nullptr)
59 typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
61 __m._M_resize(__re._M_automaton->_M_sub_count());
64 if ((__re.flags() & regex_constants::__polynomial)
65 || (__policy == _RegexExecutorPolicy::_S_alternate
66 && !__re._M_automaton->_M_has_backref))
68 _Executor<_BiIter, _Alloc, _TraitsT, false>
69 __executor(__s, __e, __res, __re, __flags);
71 __ret = __executor._M_match();
73 __ret = __executor._M_search();
77 _Executor<_BiIter, _Alloc, _TraitsT, true>
78 __executor(__s, __e, __res, __re, __flags);
80 __ret = __executor._M_match();
82 __ret = __executor._M_search();
86 for (auto& __it : __res)
88 __it.first = __it.second = __e;
89 auto& __pre = __m._M_prefix();
90 auto& __suf = __m._M_suffix();
93 __pre.matched = false;
96 __suf.matched = false;
103 __pre.second = __res[0].first;
104 __pre.matched = (__pre.first != __pre.second);
105 __suf.first = __res[0].second;
107 __suf.matched = (__suf.first != __suf.second);
112 __m._M_establish_failed_match(__e);
117 } // namespace __detail
119 template<typename _Ch_type>
120 template<typename _Fwd_iter>
121 typename regex_traits<_Ch_type>::string_type
122 regex_traits<_Ch_type>::
123 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
125 typedef std::ctype<char_type> __ctype_type;
126 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
128 static const char* __collatenames[] =
221 "left-square-bracket",
223 "right-square-bracket",
253 "left-curly-bracket",
255 "right-curly-bracket",
261 for (; __first != __last; ++__first)
262 __s += __fctyp.narrow(*__first, 0);
264 for (const auto& __it : __collatenames)
266 return string_type(1, __fctyp.widen(
267 static_cast<char>(&__it - __collatenames)));
269 // TODO Add digraph support:
270 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
272 return string_type();
275 template<typename _Ch_type>
276 template<typename _Fwd_iter>
277 typename regex_traits<_Ch_type>::char_class_type
278 regex_traits<_Ch_type>::
279 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
281 typedef std::ctype<char_type> __ctype_type;
282 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
284 // Mappings from class name to class mask.
285 static const pair<const char*, char_class_type> __classnames[] =
287 {"d", ctype_base::digit},
288 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
289 {"s", ctype_base::space},
290 {"alnum", ctype_base::alnum},
291 {"alpha", ctype_base::alpha},
292 {"blank", ctype_base::blank},
293 {"cntrl", ctype_base::cntrl},
294 {"digit", ctype_base::digit},
295 {"graph", ctype_base::graph},
296 {"lower", ctype_base::lower},
297 {"print", ctype_base::print},
298 {"punct", ctype_base::punct},
299 {"space", ctype_base::space},
300 {"upper", ctype_base::upper},
301 {"xdigit", ctype_base::xdigit},
305 for (; __first != __last; ++__first)
306 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
308 for (const auto& __it : __classnames)
309 if (__s == __it.first)
313 & (ctype_base::lower | ctype_base::upper)) != 0))
314 return ctype_base::alpha;
320 template<typename _Ch_type>
322 regex_traits<_Ch_type>::
323 isctype(_Ch_type __c, char_class_type __f) const
325 typedef std::ctype<char_type> __ctype_type;
326 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
328 return __fctyp.is(__f._M_base, __c)
330 || ((__f._M_extended & _RegexMask::_S_under)
331 && __c == __fctyp.widen('_'));
334 template<typename _Ch_type>
336 regex_traits<_Ch_type>::
337 value(_Ch_type __ch, int __radix) const
339 std::basic_istringstream<char_type> __is(string_type(1, __ch));
343 else if (__radix == 16)
346 return __is.fail() ? -1 : __v;
349 template<typename _Bi_iter, typename _Alloc>
350 template<typename _Out_iter>
352 match_results<_Bi_iter, _Alloc>::
353 format(_Out_iter __out,
354 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
355 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
356 match_flag_type __flags) const
358 __glibcxx_assert( ready() );
359 regex_traits<char_type> __traits;
360 typedef std::ctype<char_type> __ctype_type;
362 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
364 auto __output = [&](size_t __idx)
366 auto& __sub = (*this)[__idx];
368 __out = std::copy(__sub.first, __sub.second, __out);
371 if (__flags & regex_constants::format_sed)
373 bool __escaping = false;
374 for (; __fmt_first != __fmt_last; __fmt_first++)
379 if (__fctyp.is(__ctype_type::digit, *__fmt_first))
380 __output(__traits.value(*__fmt_first, 10));
382 *__out++ = *__fmt_first;
385 if (*__fmt_first == '\\')
390 if (*__fmt_first == '&')
395 *__out++ = *__fmt_first;
404 auto __next = std::find(__fmt_first, __fmt_last, '$');
405 if (__next == __fmt_last)
408 __out = std::copy(__fmt_first, __next, __out);
410 auto __eat = [&](char __ch) -> bool
420 if (++__next == __fmt_last)
428 auto& __sub = _M_prefix();
430 __out = std::copy(__sub.first, __sub.second, __out);
432 else if (__eat('\''))
434 auto& __sub = _M_suffix();
436 __out = std::copy(__sub.first, __sub.second, __out);
438 else if (__fctyp.is(__ctype_type::digit, *__next))
440 long __num = __traits.value(*__next, 10);
441 if (++__next != __fmt_last
442 && __fctyp.is(__ctype_type::digit, *__next))
445 __num += __traits.value(*__next++, 10);
447 if (0 <= __num && __num < this->size())
452 __fmt_first = __next;
454 __out = std::copy(__fmt_first, __fmt_last, __out);
459 template<typename _Out_iter, typename _Bi_iter,
460 typename _Rx_traits, typename _Ch_type>
462 __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
463 const basic_regex<_Ch_type, _Rx_traits>& __e,
464 const _Ch_type* __fmt, size_t __len,
465 regex_constants::match_flag_type __flags)
467 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
468 _IterT __i(__first, __last, __e, __flags);
472 if (!(__flags & regex_constants::format_no_copy))
473 __out = std::copy(__first, __last, __out);
477 sub_match<_Bi_iter> __last;
478 for (; __i != __end; ++__i)
480 if (!(__flags & regex_constants::format_no_copy))
481 __out = std::copy(__i->prefix().first, __i->prefix().second,
483 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
484 __last = __i->suffix();
485 if (__flags & regex_constants::format_first_only)
488 if (!(__flags & regex_constants::format_no_copy))
489 __out = std::copy(__last.first, __last.second, __out);
494 template<typename _Bi_iter,
498 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
499 operator==(const regex_iterator& __rhs) const noexcept
501 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
503 return _M_pregex == __rhs._M_pregex
504 && _M_begin == __rhs._M_begin
505 && _M_end == __rhs._M_end
506 && _M_flags == __rhs._M_flags
507 && _M_match[0] == __rhs._M_match[0];
510 template<typename _Bi_iter,
513 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
514 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
517 // In all cases in which the call to regex_search returns true,
518 // match.prefix().first shall be equal to the previous value of
519 // match[0].second, and for each index i in the half-open range
520 // [0, match.size()) for which match[i].matched is true,
521 // match[i].position() shall return distance(begin, match[i].first).
523 if (_M_match[0].matched)
525 auto __start = _M_match[0].second;
526 auto __prefix_first = _M_match[0].second;
527 if (_M_match[0].first == _M_match[0].second)
529 if (__start == _M_end)
536 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
538 | regex_constants::match_not_null
539 | regex_constants::match_continuous))
541 __glibcxx_assert(_M_match[0].matched);
542 auto& __prefix = _M_match._M_prefix();
543 __prefix.first = __prefix_first;
544 __prefix.matched = __prefix.first != __prefix.second;
546 _M_match._M_begin = _M_begin;
553 _M_flags |= regex_constants::match_prev_avail;
554 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
556 __glibcxx_assert(_M_match[0].matched);
557 auto& __prefix = _M_match._M_prefix();
558 __prefix.first = __prefix_first;
559 __prefix.matched = __prefix.first != __prefix.second;
561 _M_match._M_begin = _M_begin;
569 template<typename _Bi_iter,
572 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
573 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
574 operator=(const regex_token_iterator& __rhs)
576 _M_position = __rhs._M_position;
577 _M_subs = __rhs._M_subs;
579 _M_suffix = __rhs._M_suffix;
580 _M_has_m1 = __rhs._M_has_m1;
581 _M_normalize_result();
585 template<typename _Bi_iter,
589 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
590 operator==(const regex_token_iterator& __rhs) const
592 if (_M_end_of_seq() && __rhs._M_end_of_seq())
594 if (_M_suffix.matched && __rhs._M_suffix.matched
595 && _M_suffix == __rhs._M_suffix)
597 if (_M_end_of_seq() || _M_suffix.matched
598 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
600 return _M_position == __rhs._M_position
601 && _M_n == __rhs._M_n
602 && _M_subs == __rhs._M_subs;
605 template<typename _Bi_iter,
608 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
609 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
612 _Position __prev = _M_position;
613 if (_M_suffix.matched)
614 *this = regex_token_iterator();
615 else if (_M_n + 1 < _M_subs.size())
618 _M_result = &_M_current_match();
624 if (_M_position != _Position())
625 _M_result = &_M_current_match();
626 else if (_M_has_m1 && __prev->suffix().length() != 0)
628 _M_suffix.matched = true;
629 _M_suffix.first = __prev->suffix().first;
630 _M_suffix.second = __prev->suffix().second;
631 _M_result = &_M_suffix;
634 *this = regex_token_iterator();
639 template<typename _Bi_iter,
643 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
644 _M_init(_Bi_iter __a, _Bi_iter __b)
647 for (auto __it : _M_subs)
653 if (_M_position != _Position())
654 _M_result = &_M_current_match();
657 _M_suffix.matched = true;
658 _M_suffix.first = __a;
659 _M_suffix.second = __b;
660 _M_result = &_M_suffix;
666 _GLIBCXX_END_NAMESPACE_VERSION