// class template regex -*- C++ -*-
-// Copyright (C) 2013 Free Software Foundation, Inc.
+// Copyright (C) 2013-2024 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
* Do not attempt to use it directly. @headername{regex}
*/
-// See below __regex_algo_impl to get what this is talking about. The default
-// value 1 indicated a conservative optimization without giving up worst case
-// performance.
-#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
-#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
-#endif
-
namespace std _GLIBCXX_VISIBILITY(default)
{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
namespace __detail
{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
+ /// @cond undocumented
// Result of merging regex_match and regex_search.
//
//
// That __match_mode is true means regex_match, else regex_search.
template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT,
- _RegexExecutorPolicy __policy,
- bool __match_mode>
+ typename _CharT, typename _TraitsT>
bool
__regex_algo_impl(_BiIter __s,
_BiIter __e,
match_results<_BiIter, _Alloc>& __m,
const basic_regex<_CharT, _TraitsT>& __re,
- regex_constants::match_flag_type __flags)
+ regex_constants::match_flag_type __flags,
+ _RegexExecutorPolicy __policy,
+ bool __match_mode)
{
if (__re._M_automaton == nullptr)
return false;
- typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
- __res.resize(__re._M_automaton->_M_sub_count() + 2);
- for (auto& __it : __res)
- __it.matched = false;
-
- // This function decide which executor to use under given circumstances.
- // The _S_auto policy now is the following: if a NFA has no
- // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
- // quantifiers (*, +, ?), the BFS executor will be used, other wise
- // DFS executor. This is because DFS executor has a exponential upper
- // bound, but better best-case performace. Meanwhile, BFS executor can
- // effectively prevent from exponential-long time matching (which must
- // contains many quantifiers), but it's slower in average.
- //
- // For simple regex, BFS executor could be 2 or more times slower than
- // DFS executor.
- //
- // Of course, BFS executor cannot handle back-references.
+ typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
+ __m._M_begin = __s;
+ __m._M_resize(__re._M_automaton->_M_sub_count());
+
bool __ret;
- if (!__re._M_automaton->_M_has_backref
- && (__policy == _RegexExecutorPolicy::_S_alternate
- || __re._M_automaton->_M_quant_count
- > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
+ if ((__re.flags() & regex_constants::__polynomial)
+ || (__policy == _RegexExecutorPolicy::_S_alternate
+ && !__re._M_automaton->_M_has_backref))
{
_Executor<_BiIter, _Alloc, _TraitsT, false>
- __executor(__s, __e, __m, __re, __flags);
+ __executor(__s, __e, __res, __re, __flags);
if (__match_mode)
__ret = __executor._M_match();
else
else
{
_Executor<_BiIter, _Alloc, _TraitsT, true>
- __executor(__s, __e, __m, __re, __flags);
+ __executor(__s, __e, __res, __re, __flags);
if (__match_mode)
__ret = __executor._M_match();
else
}
if (__ret)
{
- for (auto __it : __res)
+ for (auto& __it : __res)
if (!__it.matched)
__it.first = __it.second = __e;
- auto& __pre = __res[__res.size()-2];
- auto& __suf = __res[__res.size()-1];
+ auto& __pre = __m._M_prefix();
+ auto& __suf = __m._M_suffix();
if (__match_mode)
{
__pre.matched = false;
__suf.second = __e;
__suf.matched = (__suf.first != __suf.second);
}
- if (__re.flags() & regex_constants::nosubs)
- __res.resize(3);
+ }
+ else
+ {
+ __m._M_establish_failed_match(__e);
}
return __ret;
}
-
-_GLIBCXX_END_NAMESPACE_VERSION
-}
-
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
+ /// @endcond
+} // namespace __detail
template<typename _Ch_type>
template<typename _Fwd_iter>
"right-curly-bracket",
"tilde",
"DEL",
- ""
};
- // same as boost
- //static const char* __digraphs[] =
- // {
- // "ae",
- // "Ae",
- // "AE",
- // "ch",
- // "Ch",
- // "CH",
- // "ll",
- // "Ll",
- // "LL",
- // "ss",
- // "Ss",
- // "SS",
- // "nj",
- // "Nj",
- // "NJ",
- // "dz",
- // "Dz",
- // "DZ",
- // "lj",
- // "Lj",
- // "LJ",
- // ""
- // };
-
- std::string __s(__last - __first, '?');
- __fctyp.narrow(__first, __last, '?', &*__s.begin());
-
- for (unsigned int __i = 0; *__collatenames[__i]; __i++)
- if (__s == __collatenames[__i])
- return string_type(1, __fctyp.widen(static_cast<char>(__i)));
-
- //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
- // {
- // const char* __now = __digraphs[__i];
- // if (__s == __now)
- // {
- // string_type ret(__s.size(), __fctyp.widen('?'));
- // __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
- // return ret;
- // }
- // }
+ string __s;
+ for (; __first != __last; ++__first)
+ __s += __fctyp.narrow(*__first, 0);
+
+ for (const auto& __it : __collatenames)
+ if (__s == __it)
+ return string_type(1, __fctyp.widen(
+ static_cast<char>(&__it - __collatenames)));
+
+ // TODO Add digraph support:
+ // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
+
return string_type();
}
lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
{
typedef std::ctype<char_type> __ctype_type;
- typedef std::ctype<char> __cctype_type;
- typedef const pair<const char*, char_class_type> _ClassnameEntry;
const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
- const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
- static _ClassnameEntry __classnames[] =
+ // Mappings from class name to class mask.
+ static const pair<const char*, char_class_type> __classnames[] =
{
{"d", ctype_base::digit},
{"w", {ctype_base::alnum, _RegexMask::_S_under}},
{"s", ctype_base::space},
{"alnum", ctype_base::alnum},
{"alpha", ctype_base::alpha},
- {"blank", {0, _RegexMask::_S_blank}},
+ {"blank", ctype_base::blank},
{"cntrl", ctype_base::cntrl},
{"digit", ctype_base::digit},
{"graph", ctype_base::graph},
{"xdigit", ctype_base::xdigit},
};
- std::string __s(__last - __first, '?');
- __fctyp.narrow(__first, __last, '?', &__s[0]);
- __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
- for (_ClassnameEntry* __it = __classnames;
- __it < *(&__classnames + 1);
- ++__it)
- {
- if (__s == __it->first)
- {
- if (__icase
- && ((__it->second
- & (ctype_base::lower | ctype_base::upper)) != 0))
- return ctype_base::alpha;
- return __it->second;
- }
- }
+ string __s;
+ for (; __first != __last; ++__first)
+ __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
+
+ for (const auto& __it : __classnames)
+ if (__s == __it.first)
+ {
+ if (__icase
+ && ((__it.second
+ & (ctype_base::lower | ctype_base::upper)) != 0))
+ return ctype_base::alpha;
+ return __it.second;
+ }
return 0;
}
return __fctyp.is(__f._M_base, __c)
// [[:w:]]
|| ((__f._M_extended & _RegexMask::_S_under)
- && __c == __fctyp.widen('_'))
- // [[:blank:]]
- || ((__f._M_extended & _RegexMask::_S_blank)
- && (__c == __fctyp.widen(' ')
- || __c == __fctyp.widen('\t')));
+ && __c == __fctyp.widen('_'));
}
template<typename _Ch_type>
template<typename _Bi_iter, typename _Alloc>
template<typename _Out_iter>
- _Out_iter match_results<_Bi_iter, _Alloc>::
+ _Out_iter
+ match_results<_Bi_iter, _Alloc>::
format(_Out_iter __out,
const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
match_flag_type __flags) const
{
- _GLIBCXX_DEBUG_ASSERT( ready() );
+ __glibcxx_assert( ready() );
regex_traits<char_type> __traits;
typedef std::ctype<char_type> __ctype_type;
const __ctype_type&
auto __output = [&](size_t __idx)
{
- auto& __sub = _Base_type::operator[](__idx);
+ auto& __sub = (*this)[__idx];
if (__sub.matched)
- std::copy(__sub.first, __sub.second, __out);
+ __out = std::copy(__sub.first, __sub.second, __out);
};
if (__flags & regex_constants::format_sed)
{
- for (; __fmt_first != __fmt_last;)
- if (*__fmt_first == '&')
- {
- __output(0);
- ++__fmt_first;
- }
- else if (*__fmt_first == '\\')
- {
- if (++__fmt_first != __fmt_last
- && __fctyp.is(__ctype_type::digit, *__fmt_first))
- __output(__traits.value(*__fmt_first++, 10));
- else
- *__out++ = '\\';
- }
- else
- *__out++ = *__fmt_first++;
+ bool __escaping = false;
+ for (; __fmt_first != __fmt_last; __fmt_first++)
+ {
+ if (__escaping)
+ {
+ __escaping = false;
+ if (__fctyp.is(__ctype_type::digit, *__fmt_first))
+ __output(__traits.value(*__fmt_first, 10));
+ else
+ *__out++ = *__fmt_first;
+ continue;
+ }
+ if (*__fmt_first == '\\')
+ {
+ __escaping = true;
+ continue;
+ }
+ if (*__fmt_first == '&')
+ {
+ __output(0);
+ continue;
+ }
+ *__out++ = *__fmt_first;
+ }
+ if (__escaping)
+ *__out++ = '\\';
}
else
{
if (__next == __fmt_last)
break;
- std::copy(__fmt_first, __next, __out);
+ __out = std::copy(__fmt_first, __next, __out);
auto __eat = [&](char __ch) -> bool
{
else if (__eat('&'))
__output(0);
else if (__eat('`'))
- __output(_Base_type::size()-2);
+ {
+ auto& __sub = _M_prefix();
+ if (__sub.matched)
+ __out = std::copy(__sub.first, __sub.second, __out);
+ }
else if (__eat('\''))
- __output(_Base_type::size()-1);
+ {
+ auto& __sub = _M_suffix();
+ if (__sub.matched)
+ __out = std::copy(__sub.first, __sub.second, __out);
+ }
else if (__fctyp.is(__ctype_type::digit, *__next))
{
long __num = __traits.value(*__next, 10);
*__out++ = '$';
__fmt_first = __next;
}
- std::copy(__fmt_first, __fmt_last, __out);
+ __out = std::copy(__fmt_first, __fmt_last, __out);
}
return __out;
}
template<typename _Out_iter, typename _Bi_iter,
typename _Rx_traits, typename _Ch_type>
_Out_iter
- regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
- const basic_regex<_Ch_type, _Rx_traits>& __e,
- const _Ch_type* __fmt,
- regex_constants::match_flag_type __flags)
+ __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
+ const basic_regex<_Ch_type, _Rx_traits>& __e,
+ const _Ch_type* __fmt, size_t __len,
+ regex_constants::match_flag_type __flags)
{
typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
_IterT __i(__first, __last, __e, __flags);
if (__i == __end)
{
if (!(__flags & regex_constants::format_no_copy))
- std::copy(__first, __last, __out);
+ __out = std::copy(__first, __last, __out);
}
else
{
sub_match<_Bi_iter> __last;
- auto __len = char_traits<_Ch_type>::length(__fmt);
for (; __i != __end; ++__i)
{
if (!(__flags & regex_constants::format_no_copy))
- std::copy(__i->prefix().first, __i->prefix().second, __out);
+ __out = std::copy(__i->prefix().first, __i->prefix().second,
+ __out);
__out = __i->format(__out, __fmt, __fmt + __len, __flags);
__last = __i->suffix();
if (__flags & regex_constants::format_first_only)
break;
}
if (!(__flags & regex_constants::format_no_copy))
- std::copy(__last.first, __last.second, __out);
+ __out = std::copy(__last.first, __last.second, __out);
}
return __out;
}
typename _Rx_traits>
bool
regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
- operator==(const regex_iterator& __rhs) const
+ operator==(const regex_iterator& __rhs) const noexcept
{
- return (_M_match.empty() && __rhs._M_match.empty())
- || (_M_begin == __rhs._M_begin
- && _M_end == __rhs._M_end
- && _M_pregex == __rhs._M_pregex
- && _M_flags == __rhs._M_flags
- && _M_match[0] == __rhs._M_match[0]);
+ if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
+ return true;
+ return _M_pregex == __rhs._M_pregex
+ && _M_begin == __rhs._M_begin
+ && _M_end == __rhs._M_end
+ && _M_flags == __rhs._M_flags
+ && _M_match[0] == __rhs._M_match[0];
}
template<typename _Bi_iter,
{
if (__start == _M_end)
{
- _M_match = value_type();
+ _M_pregex = nullptr;
return *this;
}
else
| regex_constants::match_not_null
| regex_constants::match_continuous))
{
- _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
- _M_match.at(_M_match.size()).first = __prefix_first;
- _M_match._M_in_iterator = true;
+ __glibcxx_assert(_M_match[0].matched);
+ auto& __prefix = _M_match._M_prefix();
+ __prefix.first = __prefix_first;
+ __prefix.matched = __prefix.first != __prefix.second;
+ // [28.12.1.4.5]
_M_match._M_begin = _M_begin;
return *this;
}
_M_flags |= regex_constants::match_prev_avail;
if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
{
- _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
- _M_match.at(_M_match.size()).first = __prefix_first;
- _M_match._M_in_iterator = true;
+ __glibcxx_assert(_M_match[0].matched);
+ auto& __prefix = _M_match._M_prefix();
+ __prefix.first = __prefix_first;
+ __prefix.matched = __prefix.first != __prefix.second;
+ // [28.12.1.4.5]
_M_match._M_begin = _M_begin;
}
else
- _M_match = value_type();
+ _M_pregex = nullptr;
}
return *this;
}
_M_position = __rhs._M_position;
_M_subs = __rhs._M_subs;
_M_n = __rhs._M_n;
- _M_result = __rhs._M_result;
_M_suffix = __rhs._M_suffix;
_M_has_m1 = __rhs._M_has_m1;
- if (__rhs._M_result == &__rhs._M_suffix)
- _M_result = &_M_suffix;
+ _M_normalize_result();
return *this;
}
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace
-