From c2669da93de6bb84df96b14167429f2046acf4b8 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Tue, 24 Sep 2013 02:05:00 +0000 Subject: [PATCH] Makefile.am: Add regex.tcc. 2013-09-24 Tim Shen * include/Makefile.am: Add regex.tcc. * include/Makefile.in: Regenerate. * include/bits/regex.h: Remove definitions to regex.tcc. * include/bits/regex.tcc: New. (match_results::format, regex_replace): Implement; * include/bits/regex_compiler.h: Move _M_flags to the top of class member list, because other members' initialization depend on it. * include/bits/regex_compiler.tcc (_Compiler<>::_Compiler): Adjust member initializations. (_Compiler<>::_M_quantifier): Fix ungreedy interval quantifier. * include/bits/regex_executor.h: Remove _RegexT from _*Executor classes. In the future, all regex classes may refactor to *Impl style. * include/bits/regex_executor.tcc (_Executor::_M_set_results): Merge identical code from _*Executor classes. * testsuite/28_regex/algorithms/regex_match/extended/ string_dispatch_01.cc (fake_match<>): Adjust the hacking-style testcase caller for new __get_executors interface. * testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc: New. * testsuite/28_regex/match_results/format.cc: New. * testsuite/28_regex/traits/char/lookup_collatename.cc: Remove digraph testcase. * testsuite/28_regex/traits/wchar_t/lookup_collatename.cc: Likewise. From-SVN: r202858 --- libstdc++-v3/ChangeLog | 26 + libstdc++-v3/include/Makefile.am | 1 + libstdc++-v3/include/Makefile.in | 1 + libstdc++-v3/include/bits/regex.h | 650 ++++------------- libstdc++-v3/include/bits/regex.tcc | 677 ++++++++++++++++++ libstdc++-v3/include/bits/regex_compiler.h | 3 +- libstdc++-v3/include/bits/regex_compiler.tcc | 106 +-- libstdc++-v3/include/bits/regex_executor.h | 26 +- libstdc++-v3/include/bits/regex_executor.tcc | 43 +- .../extended/string_dispatch_01.cc | 3 +- .../regex_replace/char/basic_replace.cc | 51 ++ .../28_regex/match_results/format.cc | 51 ++ .../traits/char/lookup_collatename.cc | 9 +- .../traits/wchar_t/lookup_collatename.cc | 10 +- 14 files changed, 1048 insertions(+), 609 deletions(-) create mode 100644 libstdc++-v3/include/bits/regex.tcc create mode 100644 libstdc++-v3/testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc create mode 100644 libstdc++-v3/testsuite/28_regex/match_results/format.cc diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 8c201f1c3f08..432c36a151b5 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,29 @@ +2013-09-24 Tim Shen + + * include/Makefile.am: Add regex.tcc. + * include/Makefile.in: Regenerate. + * include/bits/regex.h: Remove definitions to regex.tcc. + * include/bits/regex.tcc: New. + (match_results::format, regex_replace): Implement; + * include/bits/regex_compiler.h: Move _M_flags to the top of class + member list, because other members' initialization depend on it. + * include/bits/regex_compiler.tcc + (_Compiler<>::_Compiler): Adjust member initializations. + (_Compiler<>::_M_quantifier): Fix ungreedy interval quantifier. + * include/bits/regex_executor.h: Remove _RegexT from _*Executor classes. + In the future, all regex classes may refactor to *Impl style. + * include/bits/regex_executor.tcc (_Executor::_M_set_results): + Merge identical code from _*Executor classes. + * testsuite/28_regex/algorithms/regex_match/extended/ + string_dispatch_01.cc (fake_match<>): Adjust the hacking-style testcase + caller for new __get_executors interface. + * testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc: + New. + * testsuite/28_regex/match_results/format.cc: New. + * testsuite/28_regex/traits/char/lookup_collatename.cc: Remove digraph + testcase. + * testsuite/28_regex/traits/wchar_t/lookup_collatename.cc: Likewise. + 2013-09-23 Paul Pluzhnikov * src/c++11/snprintf_lite.cc (__concat_size_t): Use diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am index 0bceb5776a5e..3be6e5750d60 100644 --- a/libstdc++-v3/include/Makefile.am +++ b/libstdc++-v3/include/Makefile.am @@ -126,6 +126,7 @@ bits_headers = \ ${bits_srcdir}/random.tcc \ ${bits_srcdir}/range_access.h \ ${bits_srcdir}/regex.h \ + ${bits_srcdir}/regex.tcc \ ${bits_srcdir}/regex_constants.h \ ${bits_srcdir}/regex_error.h \ ${bits_srcdir}/regex_scanner.h \ diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in index b16063970138..cd0b467315f2 100644 --- a/libstdc++-v3/include/Makefile.in +++ b/libstdc++-v3/include/Makefile.in @@ -393,6 +393,7 @@ bits_headers = \ ${bits_srcdir}/random.tcc \ ${bits_srcdir}/range_access.h \ ${bits_srcdir}/regex.h \ + ${bits_srcdir}/regex.tcc \ ${bits_srcdir}/regex_constants.h \ ${bits_srcdir}/regex_error.h \ ${bits_srcdir}/regex_scanner.h \ diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index 9d1438aab239..cbe903625b9d 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -214,7 +214,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * is known and can be converted into a primary sort key * then returns that key, otherwise returns an empty string. * - * @todo Implement this function. + * @todo Implement this function correctly. */ template string_type @@ -343,278 +343,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION locale_type _M_locale; }; - template - template - typename regex_traits<_Ch_type>::string_type - regex_traits<_Ch_type>:: - lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const - { - typedef std::ctype __ctype_type; - const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); - - static const char* __collatenames[] = - { - "NUL", - "SOH", - "STX", - "ETX", - "EOT", - "ENQ", - "ACK", - "alert", - "backspace", - "tab", - "newline", - "vertical-tab", - "form-feed", - "carriage-return", - "SO", - "SI", - "DLE", - "DC1", - "DC2", - "DC3", - "DC4", - "NAK", - "SYN", - "ETB", - "CAN", - "EM", - "SUB", - "ESC", - "IS4", - "IS3", - "IS2", - "IS1", - "space", - "exclamation-mark", - "quotation-mark", - "number-sign", - "dollar-sign", - "percent-sign", - "ampersand", - "apostrophe", - "left-parenthesis", - "right-parenthesis", - "asterisk", - "plus-sign", - "comma", - "hyphen", - "period", - "slash", - "zero", - "one", - "two", - "three", - "four", - "five", - "six", - "seven", - "eight", - "nine", - "colon", - "semicolon", - "less-than-sign", - "equals-sign", - "greater-than-sign", - "question-mark", - "commercial-at", - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "left-square-bracket", - "backslash", - "right-square-bracket", - "circumflex", - "underscore", - "grave-accent", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "left-curly-bracket", - "vertical-line", - "right-curly-bracket", - "tilde", - "DEL", - "" - }; - - // same as boost - static const char* __digraphs[] = - { - "ae", - "Ae", - "AE", - "ch", - "Ch", - "CH", - "ll", - "Ll", - "LL", - "ss", - "Ss", - "SS", - "nj", - "Nj", - "NJ", - "dz", - "Dz", - "DZ", - "lj", - "Lj", - "LJ", - "" - }; - - std::string __s(__last - __first, '?'); - __fctyp.narrow(__first, __last, '?', &*__s.begin()); - - for (unsigned int __i = 0; *__collatenames[__i]; __i++) - if (__s == __collatenames[__i]) - return string_type(1, __fctyp.widen((char)__i)); - - for (unsigned int __i = 0; *__digraphs[__i]; __i++) - { - const char* __now = __digraphs[__i]; - if (__s == __now) - { - string_type ret(__s.size(), __fctyp.widen('?')); - __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin()); - return ret; - } - } - return string_type(); - } - - template - template - typename regex_traits<_Ch_type>::char_class_type - regex_traits<_Ch_type>:: - lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const - { - typedef std::ctype __ctype_type; - typedef std::ctype __cctype_type; - typedef const pair _ClassnameEntry; - const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); - const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale)); - - static _ClassnameEntry __classnames[] = - { - {"d", ctype_base::digit}, - {"w", {ctype_base::alnum, _RegexMask::_S_under}}, - {"s", ctype_base::space}, - {"alnum", ctype_base::alnum}, - {"alpha", ctype_base::alpha}, - {"blank", {0, _RegexMask::_S_blank}}, - {"cntrl", ctype_base::cntrl}, - {"digit", ctype_base::digit}, - {"graph", ctype_base::graph}, - {"lower", ctype_base::lower}, - {"print", ctype_base::print}, - {"punct", ctype_base::punct}, - {"space", ctype_base::space}, - {"upper", ctype_base::upper}, - {"xdigit", ctype_base::xdigit}, - }; - - std::string __s(__last - __first, '?'); - __fctyp.narrow(__first, __last, '?', &__s[0]); - __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size()); - for (_ClassnameEntry* __it = __classnames; - __it < *(&__classnames + 1); - ++__it) - { - if (__s == __it->first) - { - if (__icase - && ((__it->second - & (ctype_base::lower | ctype_base::upper)) != 0)) - return ctype_base::alpha; - return __it->second; - } - } - return 0; - } - - template - bool - regex_traits<_Ch_type>:: - isctype(_Ch_type __c, char_class_type __f) const - { - typedef std::ctype __ctype_type; - const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); - - return __fctyp.is(__f._M_base, __c) - // [[:w:]] - || ((__f._M_extended & _RegexMask::_S_under) - && __c == __fctyp.widen('_')) - // [[:blank:]] - || ((__f._M_extended & _RegexMask::_S_blank) - && (__c == __fctyp.widen(' ') - || __c == __fctyp.widen('\t'))); - } - - template - int - regex_traits<_Ch_type>:: - value(_Ch_type __ch, int __radix) const - { - std::basic_istringstream __is(string_type(1, __ch)); - int __v; - if (__radix == 8) - __is >> std::oct; - else if (__radix == 16) - __is >> std::hex; - __is >> __v; - return __is.fail() ? -1 : __v; - } - // [7.8] Class basic_regex /** * Objects of specializations of this class represent regular expressions @@ -986,7 +714,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __detail::_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> __detail::__get_executor(_BiIter, _BiIter, - match_results<_BiIter, _Alloc>&, + std::vector, _Alloc>&, const basic_regex<_CharT, _TraitsT>&, regex_constants::match_flag_type); @@ -2032,14 +1760,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @pre ready() == true - * @todo Implement this function. */ template _Out_iter format(_Out_iter __out, const char_type* __fmt_first, const char_type* __fmt_last, - match_flag_type __flags = regex_constants::format_default) const - { return __out; } + match_flag_type __flags = regex_constants::format_default) const; /** * @pre ready() == true @@ -2229,32 +1955,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION match_results<_Bi_iter, _Alloc>& __m, const basic_regex<_Ch_type, _Rx_traits>& __re, regex_constants::match_flag_type __flags - = regex_constants::match_default) - { - if (__re._M_automaton == nullptr) - return false; - - auto __size = __re._M_automaton->_M_sub_count(); - __size += 2; - __m.resize(__size); - for (decltype(__size) __i = 0; __i < __size; ++__i) - __m.at(__i).matched = false; - - if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match()) - { - for (auto __it : __m) - if (!__it.matched) - __it.first = __it.second = __e; - __m.at(__m.size()).matched = false; - __m.at(__m.size()).first = __s; - __m.at(__m.size()).second = __s; - __m.at(__m.size()+1).matched = false; - __m.at(__m.size()+1).first = __e; - __m.at(__m.size()+1).second = __e; - return true; - } - return false; - } + = regex_constants::match_default); /** * @brief Indicates if there is a match between the regular expression @p e @@ -2271,7 +1972,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @throws an exception of type regex_error. */ template - bool + inline bool regex_match(_Bi_iter __first, _Bi_iter __last, const basic_regex<_Ch_type, _Rx_traits>& __re, regex_constants::match_flag_type __flags @@ -2388,40 +2089,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ template - inline bool + bool regex_search(_Bi_iter __first, _Bi_iter __last, match_results<_Bi_iter, _Alloc>& __m, const basic_regex<_Ch_type, _Rx_traits>& __re, regex_constants::match_flag_type __flags - = regex_constants::match_default) - { - if (__re._M_automaton == nullptr) - return false; - - auto __size = __re._M_automaton->_M_sub_count(); - __size += 2; - __m.resize(__size); - for (decltype(__size) __i = 0; __i < __size; ++__i) - __m.at(__i).matched = false; - - if (__detail::__get_executor(__first, __last, __m, __re, __flags) - ->_M_search()) - { - for (auto __it : __m) - if (!__it.matched) - __it.first = __it.second = __last; - __m.at(__m.size()).first = __first; - __m.at(__m.size()).second = __m[0].first; - __m.at(__m.size()+1).first = __m[0].second; - __m.at(__m.size()+1).second = __last; - __m.at(__m.size()).matched = - (__m.prefix().first != __m.prefix().second); - __m.at(__m.size()+1).matched = - (__m.suffix().first != __m.suffix().second); - return true; - } - return false; - } + = regex_constants::match_default); /** * Searches for a regular expression within a range. @@ -2530,45 +2203,96 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // std [28.11.4] Function template regex_replace /** - * @doctodo - * @param __out - * @param __first - * @param __last - * @param __e - * @param __fmt - * @param __flags + * @brief Search for a regular expression within a range for multiple times, + and replace the matched parts through filling a format string. + * @param __out [OUT] The output iterator. + * @param __first [IN] The start of the string to search. + * @param __last [IN] One-past-the-end of the string to search. + * @param __e [IN] The regular expression to search for. + * @param __fmt [IN] The format string. + * @param __flags [IN] Search and replace policy flags. * - * @returns out + * @returns __out * @throws an exception of type regex_error. - * - * @todo Implement this function. */ template + typename _Rx_traits, typename _Ch_type, + typename _St, typename _Sa> inline _Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex<_Ch_type, _Rx_traits>& __e, - const basic_string<_Ch_type>& __fmt, + const basic_string<_Ch_type, _St, _Sa>& __fmt, regex_constants::match_flag_type __flags = regex_constants::match_default) - { return __out; } + { + return regex_replace(__out, __first, __last, __e, __fmt.c_str(), __flags); + } /** - * @doctodo - * @param __s - * @param __e - * @param __fmt - * @param __flags + * @brief Search for a regular expression within a range for multiple times, + and replace the matched parts through filling a format C-string. + * @param __out [OUT] The output iterator. + * @param __first [IN] The start of the string to search. + * @param __last [IN] One-past-the-end of the string to search. + * @param __e [IN] The regular expression to search for. + * @param __fmt [IN] The format C-string. + * @param __flags [IN] Search and replace policy flags. * - * @returns a copy of string @p s with replacements. + * @returns __out + * @throws an exception of type regex_error. + */ + template + _Out_iter + regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, + const basic_regex<_Ch_type, _Rx_traits>& __e, + const _Ch_type* __fmt, + regex_constants::match_flag_type __flags + = regex_constants::match_default); + + /** + * @brief Search for a regular expression within a string for multiple times, + and replace the matched parts through filling a format string. + * @param __s [IN] The string to search and replace. + * @param __e [IN] The regular expression to search for. + * @param __fmt [IN] The format string. + * @param __flags [IN] Search and replace policy flags. * + * @returns The string after replacing. * @throws an exception of type regex_error. */ - template + template + inline basic_string<_Ch_type> + regex_replace(const basic_string<_Ch_type, _St, _Sa>& __s, + const basic_regex<_Ch_type, _Rx_traits>& __e, + const basic_string<_Ch_type, _Fst, _Fsa>& __fmt, + regex_constants::match_flag_type __flags + = regex_constants::match_default) + { + basic_string<_Ch_type> __result; + regex_replace(std::back_inserter(__result), + __s.begin(), __s.end(), __e, __fmt, __flags); + return __result; + } + + /** + * @brief Search for a regular expression within a string for multiple times, + and replace the matched parts through filling a format C-string. + * @param __s [IN] The string to search and replace. + * @param __e [IN] The regular expression to search for. + * @param __fmt [IN] The format C-string. + * @param __flags [IN] Search and replace policy flags. + * + * @returns The string after replacing. + * @throws an exception of type regex_error. + */ + template inline basic_string<_Ch_type> - regex_replace(const basic_string<_Ch_type>& __s, + regex_replace(const basic_string<_Ch_type, _St, _Sa>& __s, const basic_regex<_Ch_type, _Rx_traits>& __e, - const basic_string<_Ch_type>& __fmt, + const _Ch_type* __fmt, regex_constants::match_flag_type __flags = regex_constants::match_default) { @@ -2578,6 +2302,59 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __result; } + /** + * @brief Search for a regular expression within a C-string for multiple + times, and replace the matched parts through filling a format string. + * @param __s [IN] The C-string to search and replace. + * @param __e [IN] The regular expression to search for. + * @param __fmt [IN] The format string. + * @param __flags [IN] Search and replace policy flags. + * + * @returns The string after replacing. + * @throws an exception of type regex_error. + */ + template + inline basic_string<_Ch_type> + regex_replace(const _Ch_type* __s, + const basic_regex<_Ch_type, _Rx_traits>& __e, + const basic_string<_Ch_type, _St, _Sa>& __fmt, + regex_constants::match_flag_type __flags + = regex_constants::match_default) + { + basic_string<_Ch_type> __result; + regex_replace(std::back_inserter(__result), __s, + __s + char_traits<_Ch_type>::length(__s), + __e, __fmt, __flags); + return __result; + } + + /** + * @brief Search for a regular expression within a C-string for multiple + times, and replace the matched parts through filling a format C-string. + * @param __s [IN] The C-string to search and replace. + * @param __e [IN] The regular expression to search for. + * @param __fmt [IN] The format C-string. + * @param __flags [IN] Search and replace policy flags. + * + * @returns The string after replacing. + * @throws an exception of type regex_error. + */ + template + inline basic_string<_Ch_type> + regex_replace(const _Ch_type* __s, + const basic_regex<_Ch_type, _Rx_traits>& __e, + const _Ch_type* __fmt, + regex_constants::match_flag_type __flags + = regex_constants::match_default) + { + basic_string<_Ch_type> __result; + regex_replace(std::back_inserter(__result), __s, + __s + char_traits<_Ch_type>::length(__s), + __e, __fmt, __flags); + return __result; + } + //@} // std [28.12] Class template regex_iterator @@ -2685,68 +2462,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION match_results<_Bi_iter> _M_match; }; - template - bool - regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: - operator==(const regex_iterator& __rhs) const - { - return (_M_match.empty() && __rhs._M_match.empty()) - || (_M_begin == __rhs._M_begin - && _M_end == __rhs._M_end - && _M_pregex == __rhs._M_pregex - && _M_flags == __rhs._M_flags - && _M_match[0] == __rhs._M_match[0]); - } - - template - regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& - regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: - operator++() - { - // In all cases in which the call to regex_search returns true, - // match.prefix().first shall be equal to the previous value of - // match[0].second, and for each index i in the half-open range - // [0, match.size()) for which match[i].matched is true, - // match[i].position() shall return distance(begin, match[i].first). - // [28.12.1.4.5] - if (_M_match[0].matched) - { - auto __start = _M_match[0].second; - if (_M_match[0].first == _M_match[0].second) - if (__start == _M_end) - { - _M_match = value_type(); - return *this; - } - else - { - if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags - | regex_constants::match_not_null - | regex_constants::match_continuous)) - { - _M_match._M_in_iterator = true; - _M_match._M_begin = _M_begin; - return *this; - } - else - ++__start; - } - _M_flags |= regex_constants::match_prev_avail; - if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) - { - _M_match._M_in_iterator = true; - _M_match._M_begin = _M_begin; - } - else - _M_match = value_type(); - } - return *this; - } - typedef regex_iterator cregex_iterator; typedef regex_iterator sregex_iterator; #ifdef _GLIBCXX_USE_WCHAR_T @@ -2957,104 +2672,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_has_m1; }; - template - regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& - regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: - operator=(const regex_token_iterator& __rhs) - { - _M_position = __rhs._M_position; - _M_subs = __rhs._M_subs; - _M_n = __rhs._M_n; - _M_result = __rhs._M_result; - _M_suffix = __rhs._M_suffix; - _M_has_m1 = __rhs._M_has_m1; - if (__rhs._M_result == &__rhs._M_suffix) - _M_result = &_M_suffix; - } - - template - bool - regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: - operator==(const regex_token_iterator& __rhs) const - { - if (_M_end_of_seq() && __rhs._M_end_of_seq()) - return true; - if (_M_suffix.matched && __rhs._M_suffix.matched - && _M_suffix == __rhs._M_suffix) - return true; - if (_M_end_of_seq() || _M_suffix.matched - || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) - return false; - return _M_position == __rhs._M_position - && _M_n == __rhs._M_n - && _M_subs == __rhs._M_subs; - } - - template - regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& - regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: - operator++() - { - _Position __prev = _M_position; - if (_M_suffix.matched) - *this = regex_token_iterator(); - else if (_M_n + 1 < _M_subs.size()) - { - _M_n++; - _M_result = &_M_current_match(); - } - else - { - _M_n = 0; - ++_M_position; - if (_M_position != _Position()) - _M_result = &_M_current_match(); - else if (_M_has_m1 && __prev->suffix().length() != 0) - { - _M_suffix.matched = true; - _M_suffix.first = __prev->suffix().first; - _M_suffix.second = __prev->suffix().second; - _M_result = &_M_suffix; - } - else - *this = regex_token_iterator(); - } - return *this; - } - - template - void - regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: - _M_init(_Bi_iter __a, _Bi_iter __b) - { - _M_has_m1 = false; - for (auto __it : _M_subs) - if (__it == -1) - { - _M_has_m1 = true; - break; - } - if (_M_position != _Position()) - _M_result = &_M_current_match(); - else if (_M_has_m1) - { - _M_suffix.matched = true; - _M_suffix.first = __a; - _M_suffix.second = __b; - _M_result = &_M_suffix; - } - else - _M_result = nullptr; - } - /** @brief Token iterator for C-style NULL-terminated strings. */ typedef regex_token_iterator cregex_token_iterator; @@ -3073,3 +2690,4 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION } // namespace +#include diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc new file mode 100644 index 000000000000..24316d26616f --- /dev/null +++ b/libstdc++-v3/include/bits/regex.tcc @@ -0,0 +1,677 @@ +// class template regex -*- C++ -*- + +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** + * @file bits/regex.tcc + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{regex} + */ + +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + + template + template + typename regex_traits<_Ch_type>::string_type + regex_traits<_Ch_type>:: + lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const + { + typedef std::ctype __ctype_type; + const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); + + static const char* __collatenames[] = + { + "NUL", + "SOH", + "STX", + "ETX", + "EOT", + "ENQ", + "ACK", + "alert", + "backspace", + "tab", + "newline", + "vertical-tab", + "form-feed", + "carriage-return", + "SO", + "SI", + "DLE", + "DC1", + "DC2", + "DC3", + "DC4", + "NAK", + "SYN", + "ETB", + "CAN", + "EM", + "SUB", + "ESC", + "IS4", + "IS3", + "IS2", + "IS1", + "space", + "exclamation-mark", + "quotation-mark", + "number-sign", + "dollar-sign", + "percent-sign", + "ampersand", + "apostrophe", + "left-parenthesis", + "right-parenthesis", + "asterisk", + "plus-sign", + "comma", + "hyphen", + "period", + "slash", + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "colon", + "semicolon", + "less-than-sign", + "equals-sign", + "greater-than-sign", + "question-mark", + "commercial-at", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "left-square-bracket", + "backslash", + "right-square-bracket", + "circumflex", + "underscore", + "grave-accent", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "left-curly-bracket", + "vertical-line", + "right-curly-bracket", + "tilde", + "DEL", + "" + }; + + // same as boost + //static const char* __digraphs[] = + // { + // "ae", + // "Ae", + // "AE", + // "ch", + // "Ch", + // "CH", + // "ll", + // "Ll", + // "LL", + // "ss", + // "Ss", + // "SS", + // "nj", + // "Nj", + // "NJ", + // "dz", + // "Dz", + // "DZ", + // "lj", + // "Lj", + // "LJ", + // "" + // }; + + std::string __s(__last - __first, '?'); + __fctyp.narrow(__first, __last, '?', &*__s.begin()); + + for (unsigned int __i = 0; *__collatenames[__i]; __i++) + if (__s == __collatenames[__i]) + return string_type(1, __fctyp.widen(static_cast(__i))); + + //for (unsigned int __i = 0; *__digraphs[__i]; __i++) + // { + // const char* __now = __digraphs[__i]; + // if (__s == __now) + // { + // string_type ret(__s.size(), __fctyp.widen('?')); + // __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin()); + // return ret; + // } + // } + return string_type(); + } + + template + template + typename regex_traits<_Ch_type>::char_class_type + regex_traits<_Ch_type>:: + lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const + { + typedef std::ctype __ctype_type; + typedef std::ctype __cctype_type; + typedef const pair _ClassnameEntry; + const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); + const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale)); + + static _ClassnameEntry __classnames[] = + { + {"d", ctype_base::digit}, + {"w", {ctype_base::alnum, _RegexMask::_S_under}}, + {"s", ctype_base::space}, + {"alnum", ctype_base::alnum}, + {"alpha", ctype_base::alpha}, + {"blank", {0, _RegexMask::_S_blank}}, + {"cntrl", ctype_base::cntrl}, + {"digit", ctype_base::digit}, + {"graph", ctype_base::graph}, + {"lower", ctype_base::lower}, + {"print", ctype_base::print}, + {"punct", ctype_base::punct}, + {"space", ctype_base::space}, + {"upper", ctype_base::upper}, + {"xdigit", ctype_base::xdigit}, + }; + + std::string __s(__last - __first, '?'); + __fctyp.narrow(__first, __last, '?', &__s[0]); + __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size()); + for (_ClassnameEntry* __it = __classnames; + __it < *(&__classnames + 1); + ++__it) + { + if (__s == __it->first) + { + if (__icase + && ((__it->second + & (ctype_base::lower | ctype_base::upper)) != 0)) + return ctype_base::alpha; + return __it->second; + } + } + return 0; + } + + template + bool + regex_traits<_Ch_type>:: + isctype(_Ch_type __c, char_class_type __f) const + { + typedef std::ctype __ctype_type; + const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); + + return __fctyp.is(__f._M_base, __c) + // [[:w:]] + || ((__f._M_extended & _RegexMask::_S_under) + && __c == __fctyp.widen('_')) + // [[:blank:]] + || ((__f._M_extended & _RegexMask::_S_blank) + && (__c == __fctyp.widen(' ') + || __c == __fctyp.widen('\t'))); + } + + template + int + regex_traits<_Ch_type>:: + value(_Ch_type __ch, int __radix) const + { + std::basic_istringstream __is(string_type(1, __ch)); + int __v; + if (__radix == 8) + __is >> std::oct; + else if (__radix == 16) + __is >> std::hex; + __is >> __v; + return __is.fail() ? -1 : __v; + } + + template + template + _Out_iter match_results<_Bi_iter, _Alloc>:: + format(_Out_iter __out, + const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, + const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, + match_flag_type __flags) const + { + _GLIBCXX_DEBUG_ASSERT( ready() ); + regex_traits __traits; + typedef std::ctype __ctype_type; + const __ctype_type& + __fctyp(use_facet<__ctype_type>(__traits.getloc())); + + auto __output = [&](int __idx) + { + auto& __sub = _Base_type::operator[](__idx); + if (__sub.matched) + std::copy(__sub.first, __sub.second, __out); + }; + + if (__flags & regex_constants::format_sed) + { + for (; __fmt_first != __fmt_last;) + if (*__fmt_first == '&') + { + __output(0); + ++__fmt_first; + } + else if (*__fmt_first == '\\') + { + if (++__fmt_first != __fmt_last + && __fctyp.is(__ctype_type::digit, *__fmt_first)) + __output(__traits.value(*__fmt_first++, 10)); + else + *__out++ = '\\'; + } + else + *__out++ = *__fmt_first++; + } + else + { + while (1) + { + auto __next = std::find(__fmt_first, __fmt_last, '$'); + if (__next == __fmt_last) + break; + + std::copy(__fmt_first, __next, __out); + + auto __eat = [&](char __ch) -> bool + { + if (*__next == __ch) + { + ++__next; + return true; + } + return false; + }; + + if (++__next == __fmt_last) + *__out++ = '$'; + else if (__eat('$')) + *__out++ = '$'; + else if (__eat('&')) + __output(0); + else if (__eat('`')) + __output(_Base_type::size()-2); + else if (__eat('\'')) + __output(_Base_type::size()-1); + else if (__fctyp.is(__ctype_type::digit, *__next)) + { + int __num = __traits.value(*__next, 10); + if (++__next != __fmt_last + && __fctyp.is(__ctype_type::digit, *__next)) + { + __num *= 10; + __num += __traits.value(*__next++, 10); + } + if (0 <= __num && __num < this->size()) + __output(__num); + } + else + *__out++ = '$'; + __fmt_first = __next; + } + std::copy(__fmt_first, __fmt_last, __out); + } + return __out; + } + + template + bool + regex_match(_Bi_iter __s, + _Bi_iter __e, + match_results<_Bi_iter, _Alloc>& __m, + const basic_regex<_Ch_type, _Rx_traits>& __re, + regex_constants::match_flag_type __flags + = regex_constants::match_default) + { + if (__re._M_automaton == nullptr) + return false; + + typename match_results<_Bi_iter, _Alloc>::_Base_type& __res = __m; + auto __size = __re._M_automaton->_M_sub_count(); + __size += 2; + __res.resize(__size); + for (decltype(__size) __i = 0; __i < __size; ++__i) + __res[__i].matched = false; + + if (__detail::__get_executor(__s, __e, __res, __re, __flags)->_M_match()) + { + for (auto __it : __res) + if (!__it.matched) + __it.first = __it.second = __e; + auto& __pre = __res[__res.size()-2]; + auto& __suf = __res[__res.size()-1]; + __pre.matched = false; + __pre.first = __s; + __pre.second = __s; + __suf.matched = false; + __suf.first = __e; + __suf.second = __e; + return true; + } + return false; + } + + template + bool + regex_search(_Bi_iter __first, _Bi_iter __last, + match_results<_Bi_iter, _Alloc>& __m, + const basic_regex<_Ch_type, _Rx_traits>& __re, + regex_constants::match_flag_type __flags + = regex_constants::match_default) + { + if (__re._M_automaton == nullptr) + return false; + + typename match_results<_Bi_iter, _Alloc>::_Base_type& __res = __m; + auto __size = __re._M_automaton->_M_sub_count(); + __size += 2; + __res.resize(__size); + for (decltype(__size) __i = 0; __i < __size; ++__i) + __res[__i].matched = false; + + if (__detail::__get_executor(__first, __last, __res, __re, __flags) + ->_M_search()) + { + for (auto __it : __res) + if (!__it.matched) + __it.first = __it.second = __last; + auto& __pre = __res[__res.size()-2]; + auto& __suf = __res[__res.size()-1]; + __pre.first = __first; + __pre.second = __res[0].first; + __pre.matched = (__pre.first != __pre.second); + __suf.first = __res[0].second; + __suf.second = __last; + __suf.matched = (__suf.first != __suf.second); + return true; + } + return false; + } + + template + _Out_iter + regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, + const basic_regex<_Ch_type, _Rx_traits>& __e, + const _Ch_type* __fmt, + regex_constants::match_flag_type __flags + = regex_constants::match_default) + { + typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; + _IterT __i(__first, __last, __e, __flags); + _IterT __end; + if (__i == __end) + { + if (!(__flags & regex_constants::format_no_copy)) + std::copy(__first, __last, __out); + } + else + { + sub_match<_Bi_iter> __last; + auto __len = char_traits<_Ch_type>::length(__fmt); + for (; __i != __end; ++__i) + { + if (!(__flags & regex_constants::format_no_copy)) + std::copy(__i->prefix().first, __i->prefix().second, __out); + __out = __i->format(__out, __fmt, __fmt + __len, __flags); + __last = __i->suffix(); + if (__flags & regex_constants::format_first_only) + break; + } + if (!(__flags & regex_constants::format_no_copy)) + std::copy(__last.first, __last.second, __out); + } + return __out; + } + + template + bool + regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: + operator==(const regex_iterator& __rhs) const + { + return (_M_match.empty() && __rhs._M_match.empty()) + || (_M_begin == __rhs._M_begin + && _M_end == __rhs._M_end + && _M_pregex == __rhs._M_pregex + && _M_flags == __rhs._M_flags + && _M_match[0] == __rhs._M_match[0]); + } + + template + regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& + regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: + operator++() + { + // In all cases in which the call to regex_search returns true, + // match.prefix().first shall be equal to the previous value of + // match[0].second, and for each index i in the half-open range + // [0, match.size()) for which match[i].matched is true, + // match[i].position() shall return distance(begin, match[i].first). + // [28.12.1.4.5] + if (_M_match[0].matched) + { + auto __start = _M_match[0].second; + auto __prefix_first = _M_match[0].second; + if (_M_match[0].first == _M_match[0].second) + if (__start == _M_end) + { + _M_match = value_type(); + return *this; + } + else + { + if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags + | regex_constants::match_not_null + | regex_constants::match_continuous)) + { + _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); + _M_match.at(_M_match.size()).first = __prefix_first; + _M_match._M_in_iterator = true; + _M_match._M_begin = _M_begin; + return *this; + } + else + ++__start; + } + _M_flags |= regex_constants::match_prev_avail; + if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) + { + _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); + _M_match.at(_M_match.size()).first = __prefix_first; + _M_match._M_in_iterator = true; + _M_match._M_begin = _M_begin; + } + else + _M_match = value_type(); + } + return *this; + } + + template + regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& + regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: + operator=(const regex_token_iterator& __rhs) + { + _M_position = __rhs._M_position; + _M_subs = __rhs._M_subs; + _M_n = __rhs._M_n; + _M_result = __rhs._M_result; + _M_suffix = __rhs._M_suffix; + _M_has_m1 = __rhs._M_has_m1; + if (__rhs._M_result == &__rhs._M_suffix) + _M_result = &_M_suffix; + } + + template + bool + regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: + operator==(const regex_token_iterator& __rhs) const + { + if (_M_end_of_seq() && __rhs._M_end_of_seq()) + return true; + if (_M_suffix.matched && __rhs._M_suffix.matched + && _M_suffix == __rhs._M_suffix) + return true; + if (_M_end_of_seq() || _M_suffix.matched + || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) + return false; + return _M_position == __rhs._M_position + && _M_n == __rhs._M_n + && _M_subs == __rhs._M_subs; + } + + template + regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& + regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: + operator++() + { + _Position __prev = _M_position; + if (_M_suffix.matched) + *this = regex_token_iterator(); + else if (_M_n + 1 < _M_subs.size()) + { + _M_n++; + _M_result = &_M_current_match(); + } + else + { + _M_n = 0; + ++_M_position; + if (_M_position != _Position()) + _M_result = &_M_current_match(); + else if (_M_has_m1 && __prev->suffix().length() != 0) + { + _M_suffix.matched = true; + _M_suffix.first = __prev->suffix().first; + _M_suffix.second = __prev->suffix().second; + _M_result = &_M_suffix; + } + else + *this = regex_token_iterator(); + } + return *this; + } + + template + void + regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: + _M_init(_Bi_iter __a, _Bi_iter __b) + { + _M_has_m1 = false; + for (auto __it : _M_subs) + if (__it == -1) + { + _M_has_m1 = true; + break; + } + if (_M_position != _Position()) + _M_result = &_M_current_match(); + else if (_M_has_m1) + { + _M_suffix.matched = true; + _M_suffix.first = __a; + _M_suffix.second = __b; + _M_result = &_M_suffix; + } + else + _M_result = nullptr; + } + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace + diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 3b85d3a46c3b..4e393e7be9a7 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -120,13 +120,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return ret; } + _FlagT _M_flags; const _TraitsT& _M_traits; const _CtypeT& _M_ctype; _ScannerT _M_scanner; _RegexT _M_nfa; _StringT _M_value; _StackT _M_stack; - _FlagT _M_flags; }; template @@ -207,7 +207,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __s.data() + __s.size()); if (__st.empty()) __throw_regex_error(regex_constants::error_collate); - // TODO: digraph _M_char_set.insert(_M_translate(__st[0])); } diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index 7f9a19af2d96..94f3d5ea86cb 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -63,9 +63,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Compiler<_FwdIter, _CharT, _TraitsT>:: _Compiler(_FwdIter __b, _FwdIter __e, const _TraitsT& __traits, _FlagT __flags) - : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), - _M_ctype(std::use_facet>(_M_traits.getloc())), - _M_nfa(__flags), _M_flags(__flags) + : _M_flags((__flags + & (regex_constants::ECMAScript + | regex_constants::basic + | regex_constants::extended + | regex_constants::grep + | regex_constants::egrep + | regex_constants::awk)) + ? __flags + : __flags | regex_constants::ECMAScript), + _M_traits(__traits), + _M_scanner(__b, __e, _M_flags, _M_traits.getloc()), + _M_ctype(std::use_facet>(_M_traits.getloc())), + _M_nfa(_M_flags) { _StateSeqT __r(_M_nfa, _M_nfa._M_start()); __r._M_append(_M_nfa._M_insert_subexpr_begin()); @@ -85,7 +95,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_disjunction() { this->_M_alternative(); - // TODO empty alternative like, um, "(|asdf)" while (_M_match_token(_ScannerT::_S_token_or)) { _StateSeqT __alt1 = _M_pop(); @@ -170,7 +179,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Compiler<_FwdIter, _CharT, _TraitsT>:: _M_quantifier() { - bool __neg = regex_constants::ECMAScript; + bool __neg = (_M_flags & regex_constants::ECMAScript); auto __init = [this, &__neg]() { if (_M_stack.empty()) @@ -207,53 +216,66 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } else if (_M_match_token(_ScannerT::_S_token_interval_begin)) { - __init(); + if (_M_stack.empty()) + __throw_regex_error(regex_constants::error_badrepeat); if (!_M_match_token(_ScannerT::_S_token_dup_count)) __throw_regex_error(regex_constants::error_badbrace); _StateSeqT __r(_M_pop()); _StateSeqT __e(_M_nfa, _M_nfa._M_insert_dummy()); int __min_rep = _M_cur_int_value(10); + bool __infi = false; + int __n; + // {3 - for (int __i = 0; __i < __min_rep; ++__i) - __e._M_append(__r._M_clone()); if (_M_match_token(_ScannerT::_S_token_comma)) if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7} - { - int __n = _M_cur_int_value(10) - __min_rep; - if (__n < 0) - __throw_regex_error(regex_constants::error_badbrace); - auto __end = _M_nfa._M_insert_dummy(); - // _M_alt is the "match more" branch, and _M_next is the - // "match less" one. Switch _M_alt and _M_next of all created - // nodes. This is a hacking but IMO works well. - std::stack<_StateIdT> __stack; - for (int __i = 0; __i < __n; ++__i) - { - auto __tmp = __r._M_clone(); - auto __alt = _M_nfa._M_insert_alt(__tmp._M_start, - __end, __neg); - __stack.push(__alt); - __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end)); - } - __e._M_append(__end); - while (!__stack.empty()) - { - auto& __tmp = _M_nfa[__stack.top()]; - __stack.pop(); - swap(__tmp._M_next, __tmp._M_alt); - } - } - else // {3,} - { - auto __tmp = __r._M_clone(); - _StateSeqT __s(_M_nfa, - _M_nfa._M_insert_alt(_S_invalid_state_id, - __tmp._M_start, __neg)); - __tmp._M_append(__s); - __e._M_append(__s); - } + __n = _M_cur_int_value(10) - __min_rep; + else + __infi = true; + else + __n = 0; if (!_M_match_token(_ScannerT::_S_token_interval_end)) __throw_regex_error(regex_constants::error_brace); + + __neg = __neg && _M_match_token(_ScannerT::_S_token_opt); + + for (int __i = 0; __i < __min_rep; ++__i) + __e._M_append(__r._M_clone()); + + if (__infi) + { + auto __tmp = __r._M_clone(); + _StateSeqT __s(_M_nfa, + _M_nfa._M_insert_alt(_S_invalid_state_id, + __tmp._M_start, __neg)); + __tmp._M_append(__s); + __e._M_append(__s); + } + else + { + if (__n < 0) + __throw_regex_error(regex_constants::error_badbrace); + auto __end = _M_nfa._M_insert_dummy(); + // _M_alt is the "match more" branch, and _M_next is the + // "match less" one. Switch _M_alt and _M_next of all created + // nodes. This is a hacking but IMO works well. + std::stack<_StateIdT> __stack; + for (int __i = 0; __i < __n; ++__i) + { + auto __tmp = __r._M_clone(); + auto __alt = _M_nfa._M_insert_alt(__tmp._M_start, + __end, __neg); + __stack.push(__alt); + __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end)); + } + __e._M_append(__end); + while (!__stack.empty()) + { + auto& __tmp = _M_nfa[__stack.top()]; + __stack.pop(); + swap(__tmp._M_next, __tmp._M_alt); + } + } _M_stack.push(__e); } } diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index b8e9266f9102..a0149d2a5486 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -62,7 +62,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { public: typedef basic_regex<_CharT, _TraitsT> _RegexT; - typedef match_results<_BiIter, _Alloc> _ResultsT; typedef std::vector, _Alloc> _ResultsVec; typedef regex_constants::match_flag_type _FlagT; typedef typename _TraitsT::char_class_type _ClassT; @@ -70,14 +69,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION public: _Executor(_BiIter __begin, _BiIter __end, - _ResultsT& __results, + _ResultsVec& __results, const _RegexT& __re, _FlagT __flags) : _M_begin(__begin), _M_end(__end), _M_results(__results), _M_re(__re), - _M_flags(__flags) + _M_flags((__flags & regex_constants::match_prev_avail) + ? (__flags + & ~regex_constants::match_not_bol + & ~regex_constants::match_not_bow) + : __flags) { } // Set matched when string exactly match the pattern. @@ -145,6 +148,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_lookahead(_State<_CharT, _TraitsT> __state) const; + void + _M_set_results(_ResultsVec& __cur_results); + public: virtual void _M_init(_BiIter __cur) = 0; @@ -159,8 +165,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const _BiIter _M_begin; const _BiIter _M_end; const _RegexT& _M_re; - _ResultsT& _M_results; - const _FlagT _M_flags; + _ResultsVec& _M_results; + _FlagT _M_flags; bool _M_match_mode; }; @@ -186,14 +192,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; typedef _NFA<_CharT, _TraitsT> _NFAT; typedef typename _BaseT::_RegexT _RegexT; - typedef typename _BaseT::_ResultsT _ResultsT; typedef typename _BaseT::_ResultsVec _ResultsVec; typedef typename _BaseT::_FlagT _FlagT; public: _DFSExecutor(_BiIter __begin, _BiIter __end, - _ResultsT& __results, + _ResultsVec& __results, const _RegexT& __re, _FlagT __flags) : _BaseT(__begin, __end, __results, __re, __flags), @@ -249,7 +254,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; typedef _NFA<_CharT, _TraitsT> _NFAT; typedef typename _BaseT::_RegexT _RegexT; - typedef typename _BaseT::_ResultsT _ResultsT; typedef typename _BaseT::_ResultsVec _ResultsVec; typedef typename _BaseT::_FlagT _FlagT; // Here's a solution for greedy/ungreedy mode in BFS approach. We need to @@ -314,7 +318,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_inc(unsigned int __idx, bool __neg) { _M_quant_keys[__idx] += __neg ? 1 : -1; } - _ResultsVec + _ResultsVec& _M_get() { return *this; } @@ -326,7 +330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION public: _BFSExecutor(_BiIter __begin, _BiIter __end, - _ResultsT& __results, + _ResultsVec& __results, const _RegexT& __re, _FlagT __flags) : _BaseT(__begin, __end, __results, __re, __flags), @@ -377,7 +381,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> __get_executor(_BiIter __b, _BiIter __e, - match_results<_BiIter, _Alloc>& __m, + std::vector, _Alloc>& __m, const basic_regex<_CharT, _TraitsT>& __re, regex_constants::match_flag_type __flags); diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index af2455b8a4e3..3a4080081b74 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -148,17 +148,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION && (this->_M_flags & regex_constants::match_not_null)) __ret = false; if (__ret) - { - _ResultsVec& __res(this->_M_results); - if (this->_M_re.flags() & regex_constants::nosubs) - { - _M_cur_results.resize(3); // truncate - __res.resize(3); - } - for (unsigned int __i = 0; __i < _M_cur_results.size(); ++__i) - if (_M_cur_results[__i].matched) - __res[__i] = _M_cur_results[__i]; - } + this->_M_set_results(_M_cur_results); break; default: _GLIBCXX_DEBUG_ASSERT(false); @@ -187,18 +177,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (this->_M_match_mode) __ret = _M_includes_some(); if (__ret) - { - _ResultsVec& __res(this->_M_results); - if (this->_M_re.flags() & regex_constants::nosubs) - { - // truncate - _M_cur_results->resize(3); - __res.resize(3); - } - for (unsigned int __i = 0; __i < _M_cur_results->size(); ++__i) - if ((*_M_cur_results)[__i].matched) - __res[__i] = (*_M_cur_results)[__i]; - } + this->_M_set_results(_M_cur_results->_M_get()); return __ret; } @@ -401,12 +380,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __sub->_M_search_from_first(); } + template + void _Executor<_BiIter, _Alloc, _CharT, _TraitsT>:: + _M_set_results(_ResultsVec& __cur_results) + { + if (_M_re.flags() & regex_constants::nosubs) + { + // truncate + __cur_results.resize(3); + _M_results.resize(3); + } + for (unsigned int __i = 0; __i < __cur_results.size(); ++__i) + if (__cur_results[__i].matched) + _M_results[__i] = __cur_results[__i]; + } + template std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> __get_executor(_BiIter __b, _BiIter __e, - match_results<_BiIter, _Alloc>& __m, + std::vector, _Alloc>& __m, const basic_regex<_CharT, _TraitsT>& __re, regex_constants::match_flag_type __flags) { diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc index cb502eadfb4a..4634c7d1d35a 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc @@ -38,9 +38,10 @@ template, _Alloc>&)(__m); VERIFY( (dynamic_cast <__detail::_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*> - (&*__detail::__get_executor(__s, __e, __m, __re, __flags)) + (&*__detail::__get_executor(__s, __e, __res, __re, __flags)) != nullptr) ); } diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc new file mode 100644 index 000000000000..ca3f16f7d23b --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc @@ -0,0 +1,51 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-09-24 Tim Shen +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.11.4 regex_replace +// Tests ECMAScript regex_replace. + +#include +#include + +using namespace std; + +void +test01() +{ + bool test __attribute__((unused)) = true; + + VERIFY(regex_replace(string("This is a string"), regex("\\b\\w*\\b"), "|$0|") + == "|This||| |is||| |a||| |string|||"); + VERIFY(regex_replace(string("This is a string"), regex("\\b\\w*\\b"), "|$0|", + regex_constants::format_no_copy) + == "|This||||is||||a||||string|||"); + VERIFY(regex_replace(string("This is a string"), regex("\\b\\w*\\b"), "|$0|", + regex_constants::format_first_only) + == "|This| is a string"); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/match_results/format.cc b/libstdc++-v3/testsuite/28_regex/match_results/format.cc new file mode 100644 index 000000000000..be0801621437 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/match_results/format.cc @@ -0,0 +1,51 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-09-24 Tim Shen +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.10.5 formatting +// Tests ECMAScript format() + +#include +#include + +using namespace std; + +void +test01() +{ + bool test __attribute__((unused)) = true; + + cmatch m; + VERIFY(regex_search("*** this is a string !!!", m, + regex("(\\w+) (\\w+) (\\w+) (\\w+)"))); + VERIFY(m.format("$&|$`|$3|$4|$2|$1|$'$$$") + == "this is a string|*** |a|string|is|this| !!!$$"); + VERIFY(m.format("&|\\3|\\4|\\2|\\1|\\", + regex_constants::format_sed) + == "this is a string|a|string|is|this|\\"); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc index 7e0b259e0b7c..dba0fc357e56 100644 --- a/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc +++ b/libstdc++-v3/testsuite/28_regex/traits/char/lookup_collatename.cc @@ -35,12 +35,9 @@ test01() typedef char CharT; typedef std::regex_traits traits; - char name[] = "ll"; - traits t; - - traits::string_type sname = t.lookup_collatename(name, name+sizeof(name)-1); - - VERIFY( !sname.empty() ); + traits t; + CharT name[] = "tilde"; + VERIFY(t.lookup_collatename(name, name+sizeof(name)-1) == "~"); } int main() diff --git a/libstdc++-v3/testsuite/28_regex/traits/wchar_t/lookup_collatename.cc b/libstdc++-v3/testsuite/28_regex/traits/wchar_t/lookup_collatename.cc index 197bb9b4a78a..3d20cfaf9a00 100644 --- a/libstdc++-v3/testsuite/28_regex/traits/wchar_t/lookup_collatename.cc +++ b/libstdc++-v3/testsuite/28_regex/traits/wchar_t/lookup_collatename.cc @@ -33,13 +33,9 @@ test01() typedef wchar_t CharT; typedef std::regex_traits traits; - wchar_t name[] = L"ll"; - traits t; - - traits::string_type sname = - t.lookup_collatename(name, name+sizeof(name)/sizeof(*name)-1); - - VERIFY( !sname.empty() ); + traits t; + CharT name[] = L"tilde"; + VERIFY(t.lookup_collatename(name, name+sizeof(name)/sizeof(*name)-1) == L"~"); } int main() -- 2.39.2