Makefile.am: Add regex_scanner.{h,tcc}.

author Tim Shen <timshen91@gmail.com>

Tue, 27 Aug 2013 02:49:22 +0000 (02:49 +0000)

committer Tim Shen <timshen@gcc.gnu.org>

Tue, 27 Aug 2013 02:49:22 +0000 (02:49 +0000)
author Tim Shen <timshen91@gmail.com>
Tue, 27 Aug 2013 02:49:22 +0000 (02:49 +0000)
committer Tim Shen <timshen@gcc.gnu.org>
Tue, 27 Aug 2013 02:49:22 +0000 (02:49 +0000)
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog

index 0e34fb9416495ad4aeda4bd273d6950e5e9f2c7f..866156686d379aed7c3395dc37df5a97dedb655a 100644 (file)
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,22 @@
+2013-08-26  Tim Shen  <timshen91@gmail.com>
+
+       * include/Makefile.am: Add regex_scanner.{h,tcc}.
+       * include/Makefile.in: Regenerate.
+       * include/bits/regex.h (match_search): Handle the `__first == __last`
+         situation correctly.
+       * include/bits/regex_compiler.h: Move _Scanner...
+       * include/bits/regex_scanner.h: ...to here. New.
+       * include/bits/regex_compiler.tcc: Move _Scanner...
+       * include/bits/regex_scanner.tcc: ...to here, too. New.
+       * include/bits/regex_executor.tcc: Use value instead of reference for
+         submatch.
+       * include/std/regex: Add regex_scanner.h
+       * testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc: New.
+       * testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc: New.
+       * testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc: New.
+       * testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc: New.
+       * testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc: New.
+
  2013-08-22  Tim Shen  <timshen91@gmail.com>
  
         * include/bits/regex.h: Replace 8 spaces in indentation with a tab.
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am

index 5971af3edc150dd4cb06d16123d495c8b0153da4..0bceb5776a5e1712f6593fd5474a1e9f2c7486d2 100644 (file)
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -128,6 +128,8 @@ bits_headers = \
         ${bits_srcdir}/regex.h \
         ${bits_srcdir}/regex_constants.h \
         ${bits_srcdir}/regex_error.h \
+       ${bits_srcdir}/regex_scanner.h \
+       ${bits_srcdir}/regex_scanner.tcc \
         ${bits_srcdir}/regex_automaton.h \
         ${bits_srcdir}/regex_automaton.tcc \
         ${bits_srcdir}/regex_compiler.h \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in

index aa8ef43b22426cf62f48b5943575145539546cb0..b160639701387b943128e52d97f959bd0ecb9d54 100644 (file)
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -395,6 +395,8 @@ bits_headers = \
         ${bits_srcdir}/regex.h \
         ${bits_srcdir}/regex_constants.h \
         ${bits_srcdir}/regex_error.h \
+       ${bits_srcdir}/regex_scanner.h \
+       ${bits_srcdir}/regex_scanner.tcc \
         ${bits_srcdir}/regex_automaton.h \
         ${bits_srcdir}/regex_automaton.tcc \
         ${bits_srcdir}/regex_compiler.h \
diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h

index 555dfc6e1fb6cc02da87621197f7850fd52124ed..48388198ce0ad0f75541d67123bc3090ad8d1069 100644 (file)
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -740,11 +740,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         * @throws regex_error if @p [__first, __last) is not a valid regular
         *         expression.
         */
-      template<typename _InputIterator>
-       basic_regex(_InputIterator __first, _InputIterator __last,
+      template<typename _FwdIter>
+       basic_regex(_FwdIter __first, _FwdIter __last,
                     flag_type __f = ECMAScript)
         : _M_flags(__f),
-         _M_automaton(__detail::_Compiler<_InputIterator, _Ch_type, _Rx_traits>
+         _M_automaton(__detail::_Compiler<_FwdIter, _Ch_type, _Rx_traits>
                        (__first, __last, _M_traits, _M_flags)._M_get_nfa())
         { }
  
@@ -2371,7 +2371,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      {
        if (__re._M_automaton == nullptr)
         return false;
-      for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
+      auto __cur = __first;
+      // Continue when __cur == __last
+      do
         {
           __detail::__get_executor(__cur, __last, __m, __re, __flags)
             ->_M_search_from_first();
@@ -2391,10 +2393,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
               return true;
             }
         }
+      while (__cur++ != __last);
        return false;
      }
  
-
    /**
     * Searches for a regular expression within a range.
     * @param __first [IN]  The start of the string to search.
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h

index 4ab36d28d2b3bd5617d1c780f48d2bae120b9be3..1d588b91df8b376c7100b6facb2b30f49a1a497a 100644 (file)
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -39,197 +39,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     * @{
     */
  
-  /// Matches a character range (bracket expression)
    template<typename _CharT, typename _TraitsT>
-    struct _BracketMatcher
-    {
-      typedef typename _TraitsT::char_class_type  _CharClassT;
-      typedef typename _TraitsT::string_type      _StringT;
-      typedef regex_constants::syntax_option_type _FlagT;
-
-      explicit
-      _BracketMatcher(bool __is_non_matching,
-                     const _TraitsT& __t,
-                     _FlagT __flags)
-      : _M_is_non_matching(__is_non_matching), _M_traits(__t),
-       _M_flags(__flags), _M_class_set(0)
-      { }
-
-      bool
-      operator()(_CharT) const;
-
-      void
-      _M_add_char(_CharT __c)
-      {
-       if (_M_flags & regex_constants::collate)
-         if (_M_is_icase())
-           _M_char_set.push_back(_M_traits.translate_nocase(__c));
-         else
-           _M_char_set.push_back(_M_traits.translate(__c));
-       else
-         _M_char_set.push_back(__c);
-      }
-
-      void
-      _M_add_collating_element(const _StringT& __s)
-      {
-       auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
-       if (__st.empty())
-         __throw_regex_error(regex_constants::error_collate);
-       // TODO: digraph
-       _M_char_set.push_back(__st[0]);
-      }
-
-      void
-      _M_add_equivalence_class(const _StringT& __s)
-      {
-       _M_add_character_class(
-         _M_traits.transform_primary(&*__s.begin(), &*__s.end()));
-      }
-
-      void
-      _M_add_character_class(const _StringT& __s)
-      {
-       auto __st = _M_traits.
-         lookup_classname(&*__s.begin(), &*__s.end(), _M_is_icase());
-       if (__st == 0)
-         __throw_regex_error(regex_constants::error_ctype);
-       _M_class_set |= __st;
-      }
-
-      void
-      _M_make_range(_CharT __l, _CharT __r)
-      { _M_range_set.push_back(make_pair(_M_get_str(__l), _M_get_str(__r))); }
-
-      bool
-      _M_is_icase() const
-      { return _M_flags & regex_constants::icase; }
-
-      _StringT
-      _M_get_str(_CharT __c) const
-      {
-       auto __s = _StringT(1,
-                           _M_is_icase()
-                           ? _M_traits.translate_nocase(__c)
-                           : _M_traits.translate(__c));
-       return _M_traits.transform(__s.begin(), __s.end());
-      }
-
-      _TraitsT                              _M_traits;
-      _FlagT                                _M_flags;
-      bool                                  _M_is_non_matching;
-      std::vector<_CharT>                   _M_char_set;
-      std::vector<pair<_StringT, _StringT>> _M_range_set;
-      _CharClassT                           _M_class_set;
-    };
-
-  /**
-   * @brief struct _Scanner. Scans an input range for regex tokens.
-   *
-   * The %_Scanner class interprets the regular expression pattern in
-   * the input range passed to its constructor as a sequence of parse
-   * tokens passed to the regular expression compiler.  The sequence
-   * of tokens provided depends on the flag settings passed to the
-   * constructor: different regular expression grammars will interpret
-   * the same input pattern in syntactically different ways.
-   */
-  template<typename _InputIter>
-    class _Scanner
-    {
-    public:
-      typedef unsigned int                                          _StateT;
-      typedef typename std::iterator_traits<_InputIter>::value_type _CharT;
-      typedef std::basic_string<_CharT>                             _StringT;
-      typedef regex_constants::syntax_option_type                   _FlagT;
-      typedef const std::ctype<_CharT>                              _CtypeT;
-
-      /// Token types returned from the scanner.
-      enum _TokenT
-      {
-       _S_token_anychar,
-       _S_token_backref,
-       _S_token_bracket_begin,
-       _S_token_bracket_inverse_begin,
-       _S_token_bracket_end,
-       _S_token_char_class_name,
-       _S_token_closure0,
-       _S_token_closure1,
-       _S_token_collelem_multi,
-       _S_token_collelem_single,
-       _S_token_collsymbol,
-       _S_token_comma,
-       _S_token_dash,
-       _S_token_dup_count,
-       _S_token_eof,
-       _S_token_equiv_class_name,
-       _S_token_interval_begin,
-       _S_token_interval_end,
-       _S_token_line_begin,
-       _S_token_line_end,
-       _S_token_opt,
-       _S_token_or,
-       _S_token_ord_char,
-       _S_token_subexpr_begin,
-       _S_token_subexpr_end,
-       _S_token_word_begin,
-       _S_token_word_end,
-       _S_token_unknown
-      };
-
-      _Scanner(_InputIter __begin, _InputIter __end,
-              _FlagT __flags, std::locale __loc)
-      : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
-       _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
-      { _M_advance(); }
-
-      void
-      _M_advance();
-
-      _TokenT
-      _M_token() const
-      { return _M_curToken; }
-
-      const _StringT&
-      _M_value() const
-      { return _M_curValue; }
-
-#ifdef _GLIBCXX_DEBUG
-      std::ostream&
-      _M_print(std::ostream&);
-#endif
-
-    private:
-      void
-      _M_eat_escape();
-
-      void
-      _M_scan_in_brace();
-
-      void
-      _M_scan_in_bracket();
-
-      void
-      _M_eat_charclass();
-
-      void
-      _M_eat_equivclass();
-
-      void
-      _M_eat_collsymbol();
-
-      static constexpr _StateT _S_state_in_brace    = 1 << 0;
-      static constexpr _StateT _S_state_in_bracket  = 1 << 1;
-      _InputIter  _M_current;
-      _InputIter  _M_end;
-      _FlagT      _M_flags;
-      _CtypeT&    _M_ctype;
-      _TokenT     _M_curToken;
-      _StringT    _M_curValue;
-      _StateT     _M_state;
-    };
+    struct _BracketMatcher;
  
    /// Builds an NFA from an input iterator interval.
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      class _Compiler
      {
      public:
@@ -237,7 +51,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        typedef _NFA<_CharT, _TraitsT>              _RegexT;
        typedef regex_constants::syntax_option_type _FlagT;
  
-      _Compiler(_InputIter __b, _InputIter __e,
+      _Compiler(_FwdIter __b, _FwdIter __e,
                 const _TraitsT& __traits, _FlagT __flags);
  
        std::shared_ptr<_RegexT>
@@ -245,7 +59,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        { return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); }
  
      private:
-      typedef _Scanner<_InputIter>                            _ScannerT;
+      typedef _Scanner<_FwdIter>                              _ScannerT;
        typedef typename _ScannerT::_TokenT                     _TokenT;
        typedef _StateSeq<_CharT, _TraitsT>                     _StateSeqT;
        typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
@@ -276,7 +90,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        bool
        _M_bracket_expression();
  
-      bool
+      void
        _M_bracket_list(_BMatcherT& __matcher);
  
        bool
@@ -303,14 +117,111 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        int
        _M_cur_int_value(int __radix);
  
+      bool
+      _M_try_char();
+
+      _CharT
+      _M_get_char();
+
        const _TraitsT& _M_traits;
        _ScannerT       _M_scanner;
-      _StringT        _M_cur_value;
+      _StringT        _M_value;
        _RegexT         _M_state_store;
        _StackT         _M_stack;
        _FlagT          _M_flags;
      };
  
+  /// Matches a character range (bracket expression)
+  template<typename _CharT, typename _TraitsT>
+    struct _BracketMatcher
+    {
+      typedef typename _TraitsT::char_class_type  _CharClassT;
+      typedef typename _TraitsT::string_type      _StringT;
+      typedef regex_constants::syntax_option_type _FlagT;
+
+      explicit
+      _BracketMatcher(bool __is_non_matching,
+                     const _TraitsT& __t,
+                     _FlagT __flags)
+      : _M_is_non_matching(__is_non_matching), _M_traits(__t),
+       _M_flags(__flags), _M_class_set(0)
+      { }
+
+      bool
+      operator()(_CharT) const;
+
+      void
+      _M_add_char(_CharT __c)
+      { _M_char_set.push_back(_M_translate(__c)); }
+
+      void
+      _M_add_collating_element(const _StringT& __s)
+      {
+       auto __st = _M_traits.lookup_collatename(__s.data(),
+                                                __s.data() + __s.size());
+       if (__st.empty())
+         __throw_regex_error(regex_constants::error_collate);
+       // TODO: digraph
+       _M_char_set.push_back(__st[0]);
+      }
+
+      void
+      _M_add_equivalence_class(const _StringT& __s)
+      {
+       _M_add_character_class(
+         _M_traits.transform_primary(__s.data(),
+                                     __s.data() + __s.size()));
+      }
+
+      void
+      _M_add_character_class(const _StringT& __s)
+      {
+       auto __st = _M_traits.
+         lookup_classname(__s.data(), __s.data() + __s.size(), _M_is_icase());
+       if (__st == 0)
+         __throw_regex_error(regex_constants::error_ctype);
+       _M_class_set |= __st;
+      }
+
+      void
+      _M_make_range(_CharT __l, _CharT __r)
+      {
+       _M_range_set.push_back(
+         make_pair(_M_get_str(_M_translate(__l)),
+                   _M_get_str(_M_translate(__r))));
+      }
+
+      _CharT
+      _M_translate(_CharT __c) const
+      {
+       if (_M_flags & regex_constants::collate)
+         if (_M_is_icase())
+           return _M_traits.translate_nocase(__c);
+         else
+           return _M_traits.translate(__c);
+       else
+         return __c;
+      }
+
+      bool
+      _M_is_icase() const
+      { return _M_flags & regex_constants::icase; }
+
+      _StringT
+      _M_get_str(_CharT __c) const
+      {
+       _StringT __s(1, __c);
+       return _M_traits.transform(__s.begin(), __s.end());
+      }
+
+      _TraitsT                              _M_traits;
+      _FlagT                                _M_flags;
+      bool                                  _M_is_non_matching;
+      std::vector<_CharT>                   _M_char_set;
+      std::vector<pair<_StringT, _StringT>> _M_range_set;
+      _CharClassT                           _M_class_set;
+    };
+
   //@} regex-detail
  _GLIBCXX_END_NAMESPACE_VERSION
  } // namespace __detail
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc

index 5755c2a0e86ae3f7f1dcec309909f4c7117204a6..bed091a4486deb5bbfb2db51f6236c21ef7e7b70 100644 (file)
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -34,506 +34,15 @@ namespace __detail
  {
  _GLIBCXX_BEGIN_NAMESPACE_VERSION
  
-  template<typename _BiIter>
-    void
-    _Scanner<_BiIter>::
-    _M_advance()
-    {
-      if (_M_current == _M_end)
-       {
-         _M_curToken = _S_token_eof;
-         return;
-       }
-
-      _CharT __c = *_M_current;
-      if (_M_state & _S_state_in_bracket)
-       {
-         _M_scan_in_bracket();
-         return;
-       }
-      if (_M_state & _S_state_in_brace)
-       {
-         _M_scan_in_brace();
-         return;
-       }
-#if 0
-      // TODO: re-enable line anchors when _M_assertion is implemented.
-      // See PR libstdc++/47724
-      else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
-       {
-         _M_curToken = _S_token_line_begin;
-         ++_M_current;
-         return;
-       }
-      else if (__c == _M_ctype.widen('$'))
-       {
-         _M_curToken = _S_token_line_end;
-         ++_M_current;
-         return;
-       }
-#endif
-      else if (__c == _M_ctype.widen('.'))
-       {
-         _M_curToken = _S_token_anychar;
-         ++_M_current;
-         return;
-       }
-      else if (__c == _M_ctype.widen('*'))
-       {
-         _M_curToken = _S_token_closure0;
-         ++_M_current;
-         return;
-       }
-      else if (__c == _M_ctype.widen('+'))
-       {
-         _M_curToken = _S_token_closure1;
-         ++_M_current;
-         return;
-       }
-      else if (__c == _M_ctype.widen('|'))
-       {
-         _M_curToken = _S_token_or;
-         ++_M_current;
-         return;
-       }
-      else if (__c == _M_ctype.widen('['))
-       {
-         if (*++_M_current == _M_ctype.widen('^'))
-           {
-             _M_curToken = _S_token_bracket_inverse_begin;
-             ++_M_current;
-           }
-         else
-           _M_curToken = _S_token_bracket_begin;
-         _M_state |= _S_state_in_bracket;
-         return;
-       }
-      else if (__c == _M_ctype.widen('\\'))
-       {
-         _M_eat_escape();
-         return;
-       }
-      else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
-       {
-         if (__c == _M_ctype.widen('('))
-           {
-             _M_curToken = _S_token_subexpr_begin;
-             ++_M_current;
-             return;
-           }
-         else if (__c == _M_ctype.widen(')'))
-           {
-             _M_curToken = _S_token_subexpr_end;
-             ++_M_current;
-             return;
-           }
-         else if (__c == _M_ctype.widen('{'))
-           {
-             _M_curToken = _S_token_interval_begin;
-             _M_state |= _S_state_in_brace;
-             ++_M_current;
-             return;
-           }
-       }
-
-      _M_curToken = _S_token_ord_char;
-      _M_curValue.assign(1, __c);
-      ++_M_current;
-    }
-
-  template<typename _BiIter>
-    void
-    _Scanner<_BiIter>::
-    _M_scan_in_brace()
-    {
-      if (_M_ctype.is(_CtypeT::digit, *_M_current))
-       {
-         _M_curToken = _S_token_dup_count;
-         _M_curValue.assign(1, *_M_current);
-         ++_M_current;
-         while (_M_current != _M_end
-                && _M_ctype.is(_CtypeT::digit, *_M_current))
-           {
-             _M_curValue += *_M_current;
-             ++_M_current;
-           }
-         return;
-       }
-      else if (*_M_current == _M_ctype.widen(','))
-       {
-         _M_curToken = _S_token_comma;
-         ++_M_current;
-         return;
-       }
-      if (_M_flags & (regex_constants::basic | regex_constants::grep))
-       {
-         if (*_M_current == _M_ctype.widen('\\'))
-           _M_eat_escape();
-       }
-      else
-       {
-         if (*_M_current == _M_ctype.widen('}'))
-           {
-             _M_curToken = _S_token_interval_end;
-             _M_state &= ~_S_state_in_brace;
-             ++_M_current;
-             return;
-           }
-       }
-    }
-
-  template<typename _BiIter>
-    void
-    _Scanner<_BiIter>::
-    _M_scan_in_bracket()
-    {
-      if (*_M_current == _M_ctype.widen('['))
-       {
-         ++_M_current;
-         if (_M_current == _M_end)
-           {
-             _M_curToken = _S_token_eof;
-             return;
-           }
-
-         if (*_M_current == _M_ctype.widen('.'))
-           {
-             _M_curToken = _S_token_collsymbol;
-             _M_eat_collsymbol();
-             return;
-           }
-         else if (*_M_current == _M_ctype.widen(':'))
-           {
-             _M_curToken = _S_token_char_class_name;
-             _M_eat_charclass();
-             return;
-           }
-         else if (*_M_current == _M_ctype.widen('='))
-           {
-             _M_curToken = _S_token_equiv_class_name;
-             _M_eat_equivclass();
-             return;
-           }
-       }
-      else if (*_M_current == _M_ctype.widen('-'))
-       {
-         _M_curToken = _S_token_dash;
-         ++_M_current;
-         return;
-       }
-      else if (*_M_current == _M_ctype.widen(']'))
-       {
-         _M_curToken = _S_token_bracket_end;
-         _M_state &= ~_S_state_in_bracket;
-         ++_M_current;
-         return;
-       }
-      else if (*_M_current == _M_ctype.widen('\\'))
-       {
-         _M_eat_escape();
-         return;
-       }
-      _M_curToken = _S_token_collelem_single;
-      _M_curValue.assign(1, *_M_current);
-      ++_M_current;
-    }
-
-  // TODO Complete it.
-  template<typename _BiIter>
-    void
-    _Scanner<_BiIter>::
-    _M_eat_escape()
-    {
-      ++_M_current;
-      if (_M_current == _M_end)
-       {
-         _M_curToken = _S_token_eof;
-         return;
-       }
-      _CharT __c = *_M_current;
-      ++_M_current;
-
-      if (__c == _M_ctype.widen('('))
-       {
-         if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
-           {
-             _M_curToken = _S_token_ord_char;
-             _M_curValue.assign(1, __c);
-           }
-         else
-           _M_curToken = _S_token_subexpr_begin;
-       }
-      else if (__c == _M_ctype.widen(')'))
-       {
-         if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
-           {
-             _M_curToken = _S_token_ord_char;
-             _M_curValue.assign(1, __c);
-           }
-         else
-           _M_curToken = _S_token_subexpr_end;
-       }
-      else if (__c == _M_ctype.widen('{'))
-       {
-         if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
-           {
-             _M_curToken = _S_token_ord_char;
-             _M_curValue.assign(1, __c);
-           }
-         else
-           {
-             _M_curToken = _S_token_interval_begin;
-             _M_state |= _S_state_in_brace;
-           }
-       }
-      else if (__c == _M_ctype.widen('}'))
-       {
-         if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
-           {
-             _M_curToken = _S_token_ord_char;
-             _M_curValue.assign(1, __c);
-           }
-         else
-           {
-             if (!(_M_state && _S_state_in_brace))
-               __throw_regex_error(regex_constants::error_badbrace);
-             _M_state &= ~_S_state_in_brace;
-             _M_curToken = _S_token_interval_end;
-           }
-       }
-      else if (__c == _M_ctype.widen('x'))
-       {
-         ++_M_current;
-         if (_M_current == _M_end)
-           {
-             _M_curToken = _S_token_eof;
-             return;
-           }
-         if (_M_ctype.is(_CtypeT::digit, *_M_current))
-           {
-             _M_curValue.assign(1, *_M_current);
-             ++_M_current;
-             if (_M_current == _M_end)
-               {
-                 _M_curToken = _S_token_eof;
-                 return;
-               }
-             if (_M_ctype.is(_CtypeT::digit, *_M_current))
-               {
-                 _M_curValue += *_M_current;
-                 ++_M_current;
-                 return;
-               }
-           }
-       }
-      else if (__c == _M_ctype.widen('^')
-              || __c == _M_ctype.widen('.')
-              || __c == _M_ctype.widen('*')
-              || __c == _M_ctype.widen('$')
-              || __c == _M_ctype.widen('\\'))
-       {
-         _M_curToken = _S_token_ord_char;
-         _M_curValue.assign(1, __c);
-       }
-      else if (_M_ctype.is(_CtypeT::digit, __c))
-       {
-         _M_curToken = _S_token_backref;
-         _M_curValue.assign(1, __c);
-       }
-      else if (_M_state & _S_state_in_bracket)
-       {
-         if (__c == _M_ctype.widen('-')
-             || __c == _M_ctype.widen('[')
-             || __c == _M_ctype.widen(']'))
-           {
-             _M_curToken = _S_token_ord_char;
-             _M_curValue.assign(1, __c);
-           }
-         else if ((_M_flags & regex_constants::ECMAScript)
-                  && __c == _M_ctype.widen('b'))
-           {
-             _M_curToken = _S_token_ord_char;
-             _M_curValue.assign(1, _M_ctype.widen(' '));
-           }
-         else
-           __throw_regex_error(regex_constants::error_escape);
-       }
-      else
-       __throw_regex_error(regex_constants::error_escape);
-    }
-
-  // Eats a character class or throwns an exception.
-  // current point to ':' delimiter on entry, char after ']' on return
-  template<typename _BiIter>
-    void
-    _Scanner<_BiIter>::
-    _M_eat_charclass()
-    {
-      ++_M_current; // skip ':'
-      if (_M_current == _M_end)
-       __throw_regex_error(regex_constants::error_ctype);
-      for (_M_curValue.clear();
-          _M_current != _M_end && *_M_current != _M_ctype.widen(':');
-          ++_M_current)
-       _M_curValue += *_M_current;
-      if (_M_current == _M_end)
-       __throw_regex_error(regex_constants::error_ctype);
-      ++_M_current; // skip ':'
-      if (*_M_current != _M_ctype.widen(']'))
-       __throw_regex_error(regex_constants::error_ctype);
-      ++_M_current; // skip ']'
-    }
-
-
-  template<typename _BiIter>
-    void
-    _Scanner<_BiIter>::
-    _M_eat_equivclass()
-    {
-      ++_M_current; // skip '='
-      if (_M_current == _M_end)
-       __throw_regex_error(regex_constants::error_collate);
-      for (_M_curValue.clear();
-          _M_current != _M_end && *_M_current != _M_ctype.widen('=');
-          ++_M_current)
-       _M_curValue += *_M_current;
-      if (_M_current == _M_end)
-       __throw_regex_error(regex_constants::error_collate);
-      ++_M_current; // skip '='
-      if (*_M_current != _M_ctype.widen(']'))
-       __throw_regex_error(regex_constants::error_collate);
-      ++_M_current; // skip ']'
-    }
-
-
-  template<typename _BiIter>
-    void
-    _Scanner<_BiIter>::
-    _M_eat_collsymbol()
-    {
-      ++_M_current; // skip '.'
-      if (_M_current == _M_end)
-       __throw_regex_error(regex_constants::error_collate);
-      for (_M_curValue.clear();
-          _M_current != _M_end && *_M_current != _M_ctype.widen('.');
-          ++_M_current)
-       _M_curValue += *_M_current;
-      if (_M_current == _M_end)
-       __throw_regex_error(regex_constants::error_collate);
-      ++_M_current; // skip '.'
-      if (*_M_current != _M_ctype.widen(']'))
-       __throw_regex_error(regex_constants::error_collate);
-      ++_M_current; // skip ']'
-    }
-
-#ifdef _GLIBCXX_DEBUG
-  template<typename _BiIter>
-    std::ostream&
-    _Scanner<_BiIter>::
-    _M_print(std::ostream& ostr)
-    {
-      switch (_M_curToken)
-      {
-       case _S_token_anychar:
-         ostr << "any-character\n";
-         break;
-       case _S_token_backref:
-         ostr << "backref\n";
-         break;
-       case _S_token_bracket_begin:
-         ostr << "bracket-begin\n";
-         break;
-       case _S_token_bracket_inverse_begin:
-         ostr << "bracket-inverse-begin\n";
-         break;
-       case _S_token_bracket_end:
-         ostr << "bracket-end\n";
-         break;
-       case _S_token_char_class_name:
-         ostr << "char-class-name \"" << _M_curValue << "\"\n";
-         break;
-       case _S_token_closure0:
-         ostr << "closure0\n";
-         break;
-       case _S_token_closure1:
-         ostr << "closure1\n";
-         break;
-       case _S_token_collelem_multi:
-         ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
-         break;
-       case _S_token_collelem_single:
-         ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
-         break;
-       case _S_token_collsymbol:
-         ostr << "collsymbol \"" << _M_curValue << "\"\n";
-         break;
-       case _S_token_comma:
-         ostr << "comma\n";
-         break;
-       case _S_token_dash:
-         ostr << "dash\n";
-         break;
-       case _S_token_dup_count:
-         ostr << "dup count: " << _M_curValue << "\n";
-         break;
-       case _S_token_eof:
-         ostr << "EOF\n";
-         break;
-       case _S_token_equiv_class_name:
-         ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
-         break;
-       case _S_token_interval_begin:
-         ostr << "interval begin\n";
-         break;
-       case _S_token_interval_end:
-         ostr << "interval end\n";
-         break;
-       case _S_token_line_begin:
-         ostr << "line begin\n";
-         break;
-       case _S_token_line_end:
-         ostr << "line end\n";
-         break;
-       case _S_token_opt:
-         ostr << "opt\n";
-         break;
-       case _S_token_or:
-         ostr << "or\n";
-         break;
-       case _S_token_ord_char:
-         ostr << "ordinary character: \"" << _M_value() << "\"\n";
-         break;
-       case _S_token_subexpr_begin:
-         ostr << "subexpr begin\n";
-         break;
-       case _S_token_subexpr_end:
-         ostr << "subexpr end\n";
-         break;
-       case _S_token_word_begin:
-         ostr << "word begin\n";
-         break;
-       case _S_token_word_end:
-         ostr << "word end\n";
-         break;
-       case _S_token_unknown:
-         ostr << "-- unknown token --\n";
-         break;
-       default:
-         _GLIBCXX_DEBUG_ASSERT(false);
-      }
-      return ostr;
-    }
-#endif
-
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
-    _Compiler<_InputIter, _CharT, _TraitsT>::
-    _Compiler(_InputIter __b, _InputIter __e,
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _Compiler(_FwdIter __b, _FwdIter __e,
               const _TraitsT& __traits, _FlagT __flags)
      : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
        _M_state_store(__flags), _M_flags(__flags)
      {
        _StateSeqT __r(_M_state_store,
-                   _M_state_store._M_insert_subexpr_begin());
+                    _M_state_store._M_insert_subexpr_begin());
        _M_disjunction();
        if (!_M_stack.empty())
         {
@@ -544,23 +53,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        __r._M_append(_M_state_store._M_insert_accept());
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      bool
-    _Compiler<_InputIter, _CharT, _TraitsT>::
-    _M_match_token(_Compiler<_InputIter, _CharT, _TraitsT>::_TokenT token)
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _M_match_token(_TokenT token)
      {
-      if (token == _M_scanner._M_token())
+      if (token == _M_scanner._M_get_token())
         {
-         _M_cur_value = _M_scanner._M_value();
+         _M_value = _M_scanner._M_get_value();
           _M_scanner._M_advance();
           return true;
         }
        return false;
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      void
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_disjunction()
      {
        this->_M_alternative();
@@ -573,9 +82,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         }
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      void
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_alternative()
      {
        if (this->_M_term())
@@ -591,9 +100,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         }
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      bool
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_term()
      {
        if (this->_M_assertion())
@@ -606,37 +115,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        return false;
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  // TODO Implement it.
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      bool
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_assertion()
      {
-      if (_M_match_token(_ScannerT::_S_token_line_begin))
-       {
-         // __m.push(_Matcher::_S_opcode_line_begin);
-         return true;
-       }
-      if (_M_match_token(_ScannerT::_S_token_line_end))
-       {
-         // __m.push(_Matcher::_S_opcode_line_end);
-         return true;
-       }
-      if (_M_match_token(_ScannerT::_S_token_word_begin))
-       {
-         // __m.push(_Matcher::_S_opcode_word_begin);
-         return true;
-       }
-      if (_M_match_token(_ScannerT::_S_token_word_end))
-       {
-         // __m.push(_Matcher::_S_opcode_word_end);
-         return true;
-       }
        return false;
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      void
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_quantifier()
      {
        if (_M_match_token(_ScannerT::_S_token_closure0))
@@ -707,15 +197,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         }
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      bool
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_atom()
      {
        if (_M_match_token(_ScannerT::_S_token_anychar))
         {
           const static auto&
-         __any_matcher = [](_CharT) -> bool
+         __any_matcher = [](_CharT __ch) -> bool
           { return true; };
  
           _M_stack.push(_StateSeqT(_M_state_store,
@@ -723,9 +213,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                   (__any_matcher)));
           return true;
         }
-      if (_M_match_token(_ScannerT::_S_token_ord_char))
+      if (_M_try_char())
         {
-         auto __c = _M_cur_value[0];
+         _CharT __c = _M_value[0];
           __detail::_Matcher<_CharT> f;
           if (_M_flags & regex_constants::icase)
             {
@@ -744,7 +234,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         }
        if (_M_match_token(_ScannerT::_S_token_backref))
         {
-         // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
           _M_stack.push(_StateSeqT(_M_state_store, _M_state_store.
                                    _M_insert_backref(_M_cur_int_value(10))));
           return true;
@@ -770,90 +259,111 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        return _M_bracket_expression();
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      bool
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_bracket_expression()
      {
-      bool __inverse =
-       _M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
-      if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
+      bool __neg =
+       _M_match_token(_ScannerT::_S_token_bracket_neg_begin);
+      if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
         return false;
-      _BMatcherT __matcher( __inverse, _M_traits, _M_flags);
-      // special case: only if  _not_ chr first after
-      // '[' or '[^' or if ECMAscript
-      if (!_M_bracket_list(__matcher) // list is empty
-         && !(_M_flags & regex_constants::ECMAScript))
-       __throw_regex_error(regex_constants::error_brack);
+      _BMatcherT __matcher(__neg, _M_traits, _M_flags);
+      _M_bracket_list(__matcher);
        _M_stack.push(_StateSeqT(_M_state_store,
                               _M_state_store._M_insert_matcher(__matcher)));
        return true;
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
-    bool // list is non-empty
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+    void
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_bracket_list(_BMatcherT& __matcher)
      {
        if (_M_match_token(_ScannerT::_S_token_bracket_end))
-       return false;
+       return;
        _M_expression_term(__matcher);
        _M_bracket_list(__matcher);
-      return true;
+      return;
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      void
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_expression_term(_BMatcherT& __matcher)
      {
        if (_M_match_token(_ScannerT::_S_token_collsymbol))
         {
-         __matcher._M_add_collating_element(_M_cur_value);
+         __matcher._M_add_collating_element(_M_value);
           return;
         }
        if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
         {
-         __matcher._M_add_equivalence_class(_M_cur_value);
+         __matcher._M_add_equivalence_class(_M_value);
           return;
         }
        if (_M_match_token(_ScannerT::_S_token_char_class_name))
         {
-         __matcher._M_add_character_class(_M_cur_value);
+         __matcher._M_add_character_class(_M_value);
           return;
         }
-      if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
+      if (_M_try_char()) // [a
         {
-         auto __ch = _M_cur_value[0];
-         if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
+         auto __ch = _M_value[0];
+         if (_M_try_char())
             {
-             // If the dash is the last character in the bracket expression,
-             // it is not special.
-             if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
-               __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
-             else // [a-z]
+             if (_M_value[0] == std::use_facet<std::ctype<_CharT>>
+                  (_M_traits.getloc()).widen('-')) // [a-
                 {
-                 if (!_M_match_token(_ScannerT::_S_token_collelem_single))
+                 if (_M_try_char()) // [a-z]
+                   {
+                     __matcher._M_make_range(__ch, _M_value[0]);
+                     return;
+                   }
+                 // If the dash is the last character in the bracket
+                 // expression, it is not special.
+                 if (_M_scanner._M_get_token()
+                     != _ScannerT::_S_token_bracket_end)
                     __throw_regex_error(regex_constants::error_range);
-                 __matcher._M_make_range(__ch, _M_cur_value[0]);
                 }
+             __matcher._M_add_char(_M_value[0]);
             }
-         else // [a]
-           __matcher._M_add_char(__ch);
+         __matcher._M_add_char(__ch);
           return;
         }
        __throw_regex_error(regex_constants::error_brack);
      }
  
-  template<typename _InputIter, typename _CharT, typename _TraitsT>
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
+    bool
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
+    _M_try_char()
+    {
+      bool __is_char = false;
+      if (_M_match_token(_ScannerT::_S_token_oct_num))
+       {
+         __is_char = true;
+         _M_value.assign(1, _M_cur_int_value(8));
+       }
+      else if (_M_match_token(_ScannerT::_S_token_hex_num))
+       {
+         __is_char = true;
+         _M_value.assign(1, _M_cur_int_value(16));
+       }
+      else if (_M_match_token(_ScannerT::_S_token_ord_char))
+       __is_char = true;
+      return __is_char;
+    }
+
+  template<typename _FwdIter, typename _CharT, typename _TraitsT>
      int
-    _Compiler<_InputIter, _CharT, _TraitsT>::
+    _Compiler<_FwdIter, _CharT, _TraitsT>::
      _M_cur_int_value(int __radix)
      {
        int __v = 0;
        for (typename _StringT::size_type __i = 0;
-          __i < _M_cur_value.length(); ++__i)
-       __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
+          __i < _M_value.length(); ++__i)
+       __v =__v * __radix + _M_traits.value(_M_value[__i], __radix);
        return __v;
      }
  
@@ -861,35 +371,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      bool _BracketMatcher<_CharT, _TraitsT>::
      operator()(_CharT __ch) const
      {
-      auto __oldch = __ch;
-      if (_M_flags & regex_constants::collate)
-       if (_M_is_icase())
-         __ch = _M_traits.translate_nocase(__ch);
-       else
-         __ch = _M_traits.translate(__ch);
-
        bool __ret = false;
-      for (auto __c : _M_char_set)
-       if (__c == __ch)
-         {
-           __ret = true;
-           break;
-         }
-      if (!__ret && _M_traits.isctype(__oldch, _M_class_set))
+      if (_M_traits.isctype(__ch, _M_class_set))
         __ret = true;
        else
         {
-         _StringT __s = _M_get_str(__ch);
-         for (auto& __it : _M_range_set)
-           if (__it.first <= __s && __s <= __it.second)
+         __ch = _M_translate(__ch);
+
+         for (auto __c : _M_char_set)
+           if (__c == __ch)
               {
                 __ret = true;
                 break;
               }
+         if (!__ret)
+           {
+             _StringT __s = _M_get_str(__ch);
+             for (auto& __it : _M_range_set)
+               if (__it.first <= __s && __s <= __it.second)
+                 {
+                   __ret = true;
+                   break;
+                 }
+           }
         }
        if (_M_is_non_matching)
-       __ret = !__ret;
-      return __ret;
+       return !__ret;
+      else
+       return __ret;
      }
  
  _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc

index bc99331ce8e9d0d3ee60f12a3ef0954b5e230584..edfd0b649ff78bb7a3bca20bae47f47ecaaa1b8b 100644 (file)
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -260,7 +260,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        auto __size = __u.size();
        for (auto __i = 0; __i < __size; __i++)
         {
-         auto& __uit = __u[__i], __vit = __v[__i];
+         auto __uit = __u[__i], __vit = __v[__i];
           if (__uit.matched && !__vit.matched)
             return true;
           if (!__uit.matched && __vit.matched)
diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h

new file mode 100644 (file)

index 0000000..080ef63
--- /dev/null
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -0,0 +1,194 @@
+// class template regex -*- C++ -*-
+
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+/**
+ *  @file bits/regex_scanner.h
+ *  This is an internal header file, included by other library headers.
+ *  Do not attempt to use it directly. @headername{regex}
+ */
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+namespace __detail
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+  /**
+   * @addtogroup regex-detail
+   * @{
+   */
+
+  /**
+   * @brief struct _Scanner. Scans an input range for regex tokens.
+   *
+   * The %_Scanner class interprets the regular expression pattern in
+   * the input range passed to its constructor as a sequence of parse
+   * tokens passed to the regular expression compiler.  The sequence
+   * of tokens provided depends on the flag settings passed to the
+   * constructor: different regular expression grammars will interpret
+   * the same input pattern in syntactically different ways.
+   */
+  template<typename _FwdIter>
+    class _Scanner
+    {
+    public:
+      typedef typename std::iterator_traits<_FwdIter>::value_type _CharT;
+      typedef std::basic_string<_CharT>                           _StringT;
+      typedef regex_constants::syntax_option_type                 _FlagT;
+      typedef const std::ctype<_CharT>                            _CtypeT;
+
+      /// Token types returned from the scanner.
+      enum _TokenT
+      {
+       _S_token_anychar,
+       _S_token_ord_char,
+       _S_token_oct_num,
+       _S_token_hex_num,
+       _S_token_backref,
+       _S_token_subexpr_begin,
+       _S_token_subexpr_no_group_begin,
+       _S_token_subexpr_lookahead_begin,
+       _S_token_subexpr_neg_lookahead_begin,
+       _S_token_subexpr_end,
+       _S_token_bracket_begin,
+       _S_token_bracket_neg_begin,
+       _S_token_bracket_end,
+       _S_token_interval_begin,
+       _S_token_interval_end,
+       _S_token_quoted_class,
+       _S_token_char_class_name,
+       _S_token_collsymbol,
+       _S_token_equiv_class_name,
+       _S_token_opt,
+       _S_token_or,
+       _S_token_closure0,
+       _S_token_closure1,
+       _S_token_line_begin,
+       _S_token_line_end,
+       _S_token_comma,
+       _S_token_dup_count,
+       _S_token_eof,
+       _S_token_unknown
+      };
+
+      _Scanner(_FwdIter __begin, _FwdIter __end,
+              _FlagT __flags, std::locale __loc);
+
+      void
+      _M_advance();
+
+      _TokenT
+      _M_get_token() const
+      { return _M_token; }
+
+      const _StringT&
+      _M_get_value() const
+      { return _M_value; }
+
+#ifdef _GLIBCXX_DEBUG
+      std::ostream&
+      _M_print(std::ostream&);
+#endif
+
+    private:
+      enum _StateT
+      {
+       _S_state_normal,
+       _S_state_in_brace,
+       _S_state_in_bracket,
+      };
+
+      void
+      _M_scan_normal();
+
+      void
+      _M_scan_in_bracket();
+
+      void
+      _M_scan_in_brace();
+
+      void
+      _M_eat_escape_ecma();
+
+      void
+      _M_eat_escape_posix();
+
+      void
+      _M_eat_escape_awk();
+
+      void
+      _M_eat_class(char);
+
+      constexpr bool
+      _M_is_ecma()
+      { return _M_flags & regex_constants::ECMAScript; }
+
+      constexpr bool
+      _M_is_basic()
+      { return _M_flags & (regex_constants::basic | regex_constants::grep); }
+
+      constexpr bool
+      _M_is_extended()
+      {
+       return _M_flags & (regex_constants::extended
+                          | regex_constants::egrep
+                          | regex_constants::awk);
+      }
+
+      constexpr bool
+      _M_is_grep()
+      { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
+
+      constexpr bool
+      _M_is_awk()
+      { return _M_flags & regex_constants::awk; }
+
+      _StateT                       _M_state;
+      _FwdIter                      _M_current;
+      _FwdIter                      _M_end;
+      _FlagT                        _M_flags;
+      _CtypeT&                      _M_ctype;
+      _TokenT                       _M_token;
+      _StringT                      _M_value;
+      bool                          _M_at_bracket_start;
+    public:
+      // TODO: make them static when this file is stable.
+      const std::map<char, _TokenT> _M_token_map;
+      const std::map<char, char>    _M_ecma_escape_map;
+      const std::map<char, char>    _M_awk_escape_map;
+      const std::set<char>          _M_ecma_spec_char;
+      const std::set<char>          _M_basic_spec_char;
+      const std::set<char>          _M_extended_spec_char;
+
+      const std::map<char, char>&   _M_escape_map;
+      const std::set<char>&         _M_spec_char;
+      void (_Scanner::* _M_eat_escape)();
+    };
+
+ //@} regex-detail
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace __detail
+} // namespace std
+
+#include <bits/regex_scanner.tcc>
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc

new file mode 100644 (file)

index 0000000..0d1d2cd
--- /dev/null
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -0,0 +1,609 @@
+// class template regex -*- C++ -*-
+
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+/**
+ *  @file bits/regex_scanner.tcc
+ *  This is an internal header file, included by other library headers.
+ *  Do not attempt to use it directly. @headername{regex}
+ */
+
+// TODO make comments doxygen format
+
+// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
+// and awk
+// 1) grep is basic except '\n' is treated as '|'
+// 2) egrep is extended except '\n' is treated as '|'
+// 3) awk is extended except special escaping rules, and there's no
+//    back-reference.
+//
+// References:
+//
+// ECMAScript: ECMA-262 15.10
+//
+// basic, extended:
+// http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html
+//
+// awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+namespace __detail
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+  template<typename _FwdIter>
+    _Scanner<_FwdIter>::
+    _Scanner(_FwdIter __begin, _FwdIter __end,
+            _FlagT __flags, std::locale __loc)
+    : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
+      _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_normal),
+      _M_at_bracket_start(false),
+      _M_token_map
+       {
+         {'^', _S_token_line_begin},
+         {'$', _S_token_line_end},
+         {'.', _S_token_anychar},
+         {'*', _S_token_closure0},
+         {'+', _S_token_closure1},
+         {'?', _S_token_opt},
+         {'|', _S_token_or},
+         // grep and egrep
+         {'\n', _S_token_or},
+       },
+      _M_ecma_escape_map
+       {
+         {'0', '\0'},
+         {'b', '\b'},
+         {'f', '\f'},
+         {'n', '\n'},
+         {'r', '\r'},
+         {'t', '\t'},
+         {'v', '\v'},
+       },
+      _M_awk_escape_map
+       {
+         {'"', '"'},
+         {'/', '/'},
+         {'\\', '\\'},
+         {'a', '\a'},
+         {'b', '\b'},
+         {'f', '\f'},
+         {'n', '\n'},
+         {'r', '\r'},
+         {'t', '\t'},
+         {'v', '\v'},
+       },
+      _M_escape_map(_M_is_ecma()
+                   ? _M_ecma_escape_map
+                   : _M_awk_escape_map),
+      _M_ecma_spec_char
+       {
+         '^',
+         '$',
+         '\\',
+         '.',
+         '*',
+         '+',
+         '?',
+         '(',
+         ')',
+         '[',
+         ']',
+         '{',
+         '}',
+         '|',
+       },
+      _M_basic_spec_char
+       {
+         '.',
+         '[',
+         '\\',
+         '*',
+         '^',
+         '$',
+       },
+      _M_extended_spec_char
+       {
+         '.',
+         '[',
+         '\\',
+         '(',
+         ')',
+         '*',
+         '+',
+         '?',
+         '{',
+         '|',
+         '^',
+         '$',
+       },
+      _M_eat_escape(_M_is_ecma()
+                   ? &_Scanner::_M_eat_escape_ecma
+                   : &_Scanner::_M_eat_escape_posix),
+      _M_spec_char(_M_is_ecma()
+                  ? _M_ecma_spec_char
+                  : _M_is_basic()
+                  ? _M_basic_spec_char
+                  : _M_extended_spec_char)
+    { _M_advance(); }
+
+  template<typename _FwdIter>
+    void
+    _Scanner<_FwdIter>::
+    _M_advance()
+    {
+      if (_M_current == _M_end)
+       {
+         _M_token = _S_token_eof;
+         return;
+       }
+
+      if (_M_state == _S_state_normal)
+       _M_scan_normal();
+      else if (_M_state == _S_state_in_bracket)
+       _M_scan_in_bracket();
+      else if (_M_state == _S_state_in_brace)
+       _M_scan_in_brace();
+      else
+       _GLIBCXX_DEBUG_ASSERT(false);
+    }
+
+  // Differences between styles:
+  // 1) "\(", "\)", "\{" in basic. It's not escaping.
+  // 2) "(?:", "(?=", "(?!" in ECMAScript.
+  template<typename _FwdIter>
+    void
+    _Scanner<_FwdIter>::
+    _M_scan_normal()
+    {
+      auto __c = *_M_current++;
+
+      if (__c == '\\')
+       {
+         if (_M_current == _M_end)
+           __throw_regex_error(regex_constants::error_escape);
+
+         if (!_M_is_basic()
+             || (*_M_current != '('
+                 && *_M_current != ')'
+                 && *_M_current != '{'))
+           {
+             (this->*_M_eat_escape)();
+             return;
+           }
+         __c = *_M_current++;
+       }
+      if (__c == '(')
+       {
+         if (_M_is_ecma() && *_M_current == '?')
+           {
+             if (++_M_current == _M_end)
+               __throw_regex_error(regex_constants::error_paren);
+
+             if (*_M_current == ':')
+               {
+                 ++_M_current;
+                 _M_token = _S_token_subexpr_no_group_begin;
+               }
+             else if (*_M_current == '=')
+               {
+                 ++_M_current;
+                 _M_token = _S_token_subexpr_lookahead_begin;
+               }
+             else if (*_M_current == '!')
+               {
+                 ++_M_current;
+                 _M_token = _S_token_subexpr_neg_lookahead_begin;
+               }
+             else
+               __throw_regex_error(regex_constants::error_paren);
+           }
+         else
+           _M_token = _S_token_subexpr_begin;
+       }
+      else if (__c == ')')
+       _M_token = _S_token_subexpr_end;
+      else if (__c == '[')
+       {
+         _M_state = _S_state_in_bracket;
+         _M_at_bracket_start = true;
+         if (_M_current != _M_end && *_M_current == '^')
+           {
+             _M_token = _S_token_bracket_neg_begin;
+             ++_M_current;
+           }
+         else
+           _M_token = _S_token_bracket_begin;
+       }
+      else if (__c == '{')
+       {
+         _M_state = _S_state_in_brace;
+         _M_token = _S_token_interval_begin;
+       }
+      else if (_M_spec_char.count(__c)
+              && __c != ']'
+              && __c != '}'
+              || (_M_is_grep() && __c == '\n'))
+       _M_token = _M_token_map.at(__c);
+      else
+       {
+         _M_token = _S_token_ord_char;
+         _M_value.assign(1, __c);
+       }
+    }
+
+  // Differences between styles:
+  // 1) different semantics of "[]" and "[^]".
+  // 2) Escaping in bracket expr.
+  template<typename _FwdIter>
+    void
+    _Scanner<_FwdIter>::
+    _M_scan_in_bracket()
+    {
+      if (_M_current == _M_end)
+       __throw_regex_error(regex_constants::error_brack);
+
+      auto __c = *_M_current++;
+
+      if (__c == '[')
+       {
+         if (_M_current == _M_end)
+           __throw_regex_error(regex_constants::error_brack);
+
+         if (*_M_current == '.')
+           {
+             _M_token = _S_token_collsymbol;
+             _M_eat_class(*_M_current++);
+           }
+         else if (*_M_current == ':')
+           {
+             _M_token = _S_token_char_class_name;
+             _M_eat_class(*_M_current++);
+           }
+         else if (*_M_current == '=')
+           {
+             _M_token = _S_token_equiv_class_name;
+             _M_eat_class(*_M_current++);
+           }
+         else
+           {
+             _M_token = _S_token_ord_char;
+             _M_value.assign(1, __c);
+           }
+       }
+      // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
+      // literally. So "[]]" or "[^]]" is valid regex. See the testcases
+      // `*/empty_range.cc`.
+      else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
+       {
+         _M_token = _S_token_bracket_end;
+         _M_state = _S_state_normal;
+       }
+      // ECMAScirpt and awk permmits escaping in bracket.
+      else if (__c == '\\' && (_M_is_ecma() || _M_is_awk()))
+       (this->*_M_eat_escape)();
+      else
+       {
+         _M_token = _S_token_ord_char;
+         _M_value.assign(1, __c);
+       }
+      _M_at_bracket_start = false;
+    }
+
+  // Differences between styles:
+  // 1) "\}" in basic style.
+  template<typename _FwdIter>
+    void
+    _Scanner<_FwdIter>::
+    _M_scan_in_brace()
+    {
+      if (_M_current == _M_end)
+       __throw_regex_error(regex_constants::error_brace);
+
+      auto __c = *_M_current++;
+
+      if (_M_ctype.is(_CtypeT::digit, __c))
+       {
+         _M_token = _S_token_dup_count;
+         _M_value.assign(1, __c);
+         while (_M_current != _M_end
+                && _M_ctype.is(_CtypeT::digit, *_M_current))
+           _M_value += *_M_current++;
+       }
+      else if (__c == ',')
+       _M_token = _S_token_comma;
+      // basic use \}.
+      else if (_M_is_basic())
+       {
+         if (__c == '\\' && _M_current != _M_end && *_M_current == '}')
+           {
+             _M_state = _S_state_normal;
+             _M_token = _S_token_interval_end;
+             ++_M_current;
+           }
+         else
+           __throw_regex_error(regex_constants::error_brace);
+       }
+      else if (__c == '}')
+       {
+         _M_state = _S_state_normal;
+         _M_token = _S_token_interval_end;
+       }
+      else
+       __throw_regex_error(regex_constants::error_brace);
+    }
+
+  template<typename _FwdIter>
+    void
+    _Scanner<_FwdIter>::
+    _M_eat_escape_ecma()
+    {
+      if (_M_current == _M_end)
+       __throw_regex_error(regex_constants::error_escape);
+
+      auto __c = *_M_current++;
+
+      if (_M_escape_map.count(__c)
+         && (__c != 'b' || _M_state == _S_state_in_bracket))
+       {
+         _M_token = _S_token_ord_char;
+         _M_value.assign(1, _M_escape_map.at(__c));
+       }
+      // N3376 28.13
+      else if (__c == 'b'
+              || __c == 'B'
+              || __c == 'd'
+              || __c == 'D'
+              || __c == 's'
+              || __c == 'S'
+              || __c == 'w'
+              || __c == 'W')
+       {
+         _M_token = _S_token_quoted_class;
+         _M_value.assign(1, __c);
+       }
+      else if (__c == 'c')
+       {
+         if (_M_current == _M_end)
+           __throw_regex_error(regex_constants::error_escape);
+         _M_token = _S_token_ord_char;
+         _M_value.assign(1, *_M_current++);
+       }
+      else if (__c == 'x' || __c == 'u')
+       {
+         _M_value.erase();
+         for (int i = 0; i < (__c == 'x' ? 2 : 4); i++)
+           {
+             if (_M_current == _M_end
+                 || !_M_ctype.is(_CtypeT::xdigit, *_M_current))
+               __throw_regex_error(regex_constants::error_escape);
+             _M_value += *_M_current++;
+           }
+         _M_token = _S_token_hex_num;
+       }
+      // ECMAScript recongnizes multi-digit back-references.
+      else if (_M_ctype.is(_CtypeT::digit, __c))
+       {
+         _M_value.assign(1, __c);
+         while (_M_current != _M_end
+                && _M_ctype.is(_CtypeT::digit, *_M_current))
+           _M_value += *_M_current++;
+         _M_token = _S_token_backref;
+       }
+      else
+       {
+         _M_token = _S_token_ord_char;
+         _M_value.assign(1, __c);
+       }
+    }
+
+  template<typename _FwdIter>
+    void
+    _Scanner<_FwdIter>::
+    _M_eat_escape_posix()
+    {
+      if (_M_current == _M_end)
+       __throw_regex_error(regex_constants::error_escape);
+
+      auto __c = *_M_current;
+
+      if (_M_spec_char.count(__c))
+       {
+         _M_token = _S_token_ord_char;
+         _M_value.assign(1, __c);
+       }
+      // We MUST judge awk before handling backrefs. There's no backref in awk.
+      else if (_M_is_awk())
+       {
+         _M_eat_escape_awk();
+         return;
+       }
+      else if (_M_ctype.is(_CtypeT::digit, __c) && __c != '0')
+       {
+         _M_token = _S_token_backref;
+         _M_value.assign(1, __c);
+       }
+      else
+       __throw_regex_error(regex_constants::error_escape);
+      ++_M_current;
+    }
+
+  template<typename _FwdIter>
+    void
+    _Scanner<_FwdIter>::
+    _M_eat_escape_awk()
+    {
+      auto __c = *_M_current++;
+
+      if (_M_escape_map.count(__c))
+       {
+         _M_token = _S_token_ord_char;
+         _M_value.assign(1, _M_escape_map.at(__c));
+       }
+      // \ddd for oct representation
+      else if (_M_ctype.is(_CtypeT::digit, __c)
+              && __c != '8'
+              && __c != '9')
+       {
+         _M_value.assign(1,  __c);
+         for (int __i = 0;
+              __i < 2
+              && _M_current != _M_end
+              && _M_ctype.is(_CtypeT::digit, *_M_current)
+              && *_M_current != '8'
+              && *_M_current != '9';
+              __i++)
+           _M_value += *_M_current++;
+         _M_token = _S_token_oct_num;
+         return;
+       }
+      else
+       __throw_regex_error(regex_constants::error_escape);
+    }
+
+  // Eats a character class or throwns an exception.
+  // __ch cound be ':', '.' or '=', _M_current is the char after ']' when
+  // returning.
+  template<typename _FwdIter>
+    void
+    _Scanner<_FwdIter>::
+    _M_eat_class(char __ch)
+    {
+      for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
+       _M_value += *_M_current++;
+      if (_M_current == _M_end
+         || *_M_current++ != __ch
+         || _M_current == _M_end // skip __ch
+         || *_M_current++ != ']') // skip ']'
+       if (__ch == ':')
+         __throw_regex_error(regex_constants::error_ctype);
+       else
+         __throw_regex_error(regex_constants::error_collate);
+    }
+
+#ifdef _GLIBCXX_DEBUG
+  template<typename _FwdIter>
+    std::ostream&
+    _Scanner<_FwdIter>::
+    _M_print(std::ostream& ostr)
+    {
+      switch (_M_token)
+      {
+      case _S_token_anychar:
+       ostr << "any-character\n";
+       break;
+      case _S_token_backref:
+       ostr << "backref\n";
+       break;
+      case _S_token_bracket_begin:
+       ostr << "bracket-begin\n";
+       break;
+      case _S_token_bracket_neg_begin:
+       ostr << "bracket-neg-begin\n";
+       break;
+      case _S_token_bracket_end:
+       ostr << "bracket-end\n";
+       break;
+      case _S_token_char_class_name:
+       ostr << "char-class-name \"" << _M_value << "\"\n";
+       break;
+      case _S_token_closure0:
+       ostr << "closure0\n";
+       break;
+      case _S_token_closure1:
+       ostr << "closure1\n";
+       break;
+      case _S_token_collsymbol:
+       ostr << "collsymbol \"" << _M_value << "\"\n";
+       break;
+      case _S_token_comma:
+       ostr << "comma\n";
+       break;
+      case _S_token_dup_count:
+       ostr << "dup count: " << _M_value << "\n";
+       break;
+      case _S_token_eof:
+       ostr << "EOF\n";
+       break;
+      case _S_token_equiv_class_name:
+       ostr << "equiv-class-name \"" << _M_value << "\"\n";
+       break;
+      case _S_token_interval_begin:
+       ostr << "interval begin\n";
+       break;
+      case _S_token_interval_end:
+       ostr << "interval end\n";
+       break;
+      case _S_token_line_begin:
+       ostr << "line begin\n";
+       break;
+      case _S_token_line_end:
+       ostr << "line end\n";
+       break;
+      case _S_token_opt:
+       ostr << "opt\n";
+       break;
+      case _S_token_or:
+       ostr << "or\n";
+       break;
+      case _S_token_ord_char:
+       ostr << "ordinary character: \"" << _M_value << "\"\n";
+       break;
+      case _S_token_subexpr_begin:
+       ostr << "subexpr begin\n";
+       break;
+      case _S_token_subexpr_no_group_begin:
+       ostr << "no grouping subexpr begin\n";
+       break;
+      case _S_token_subexpr_lookahead_begin:
+       ostr << "lookahead subexpr begin\n";
+       break;
+      case _S_token_subexpr_neg_lookahead_begin:
+       ostr << "neg lookahead subexpr begin\n";
+       break;
+      case _S_token_subexpr_end:
+       ostr << "subexpr end\n";
+       break;
+      case _S_token_unknown:
+       ostr << "-- unknown token --\n";
+       break;
+      case _S_token_oct_num:
+       ostr << "oct number " << _M_value << "\n";
+       break;
+      case _S_token_hex_num:
+       ostr << "hex number " << _M_value << "\n";
+       break;
+      case _S_token_quoted_class:
+       ostr << "quoted class " << "\\" << _M_value << "\n";
+       break;
+      default:
+       _GLIBCXX_DEBUG_ASSERT(false);
+      }
+      return ostr;
+    }
+#endif
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace __detail
+} // namespace
diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex

index ac9a2a85b9b2a82aa0c088fca82a01f8257fac45..36dd0a97b8f302e036c6cd9a222bfc7c3770b735 100644 (file)
--- a/libstdc++-v3/include/std/regex
+++ b/libstdc++-v3/include/std/regex
@@ -56,6 +56,7 @@
  
  #include <bits/regex_constants.h>
  #include <bits/regex_error.h>
+#include <bits/regex_scanner.h>
  #include <bits/regex_automaton.h>
  #include <bits/regex_compiler.h>
  #include <bits/regex_executor.h>
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc

new file mode 100644 (file)

index 0000000..d4edf12
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc
@@ -0,0 +1,50 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-26  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests awk escaping.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  regex("\\[", regex_constants::awk);
+  VERIFY(regex_match("\"", regex("[\\\"]", regex_constants::awk)));
+  VERIFY(regex_match("/", regex("/", regex_constants::awk)));
+  VERIFY(regex_match("\a", regex("\\a", regex_constants::awk)));
+  VERIFY(regex_match("\"", regex("\\\"", regex_constants::awk)));
+  VERIFY(regex_match("5", regex("\\65", regex_constants::awk)));
+  VERIFY(regex_match("53", regex("\\0653", regex_constants::awk)));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc

new file mode 100644 (file)

index 0000000..eb22569
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc
@@ -0,0 +1,57 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-26  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests ECMAScript empty range.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+#define FAIL(s) \
+  try\
+    {\
+      regex re(s, regex_constants::basic);\
+      VERIFY(false);\
+    }\
+  catch (...)\
+    {\
+      VERIFY(true);\
+    }
+  FAIL("[]");
+  FAIL("[^]");
+  VERIFY(regex_match("]", regex("[]]", regex_constants::basic)));
+  VERIFY(!regex_match("]", regex("[^]]", regex_constants::basic)));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc

new file mode 100644 (file)

index 0000000..a7ef0fb
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc
@@ -0,0 +1,54 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-26  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests ECMAScript \x and \u.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  VERIFY(regex_match(":", regex("\\x3a")));
+  VERIFY(regex_match(L"\u1234", wregex(L"\\u1234")));
+  try
+    {
+      regex("\\u400x");
+      VERIFY(false);
+    }
+  catch (...)
+    {
+      VERIFY(true);
+    }
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc

new file mode 100644 (file)

index 0000000..93bca45
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc
@@ -0,0 +1,47 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-26  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests ECMAScript empty range.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  VERIFY(!regex_match("x", regex("[]")));
+  VERIFY(regex_match("x", regex("[^]")));
+  VERIFY(!regex_match("]", regex("[]]")));
+  VERIFY(!regex_match("]", regex("[^]]")));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc

new file mode 100644 (file)

index 0000000..a2d290d
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc
@@ -0,0 +1,42 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-08-26  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.3 regex_search
+// Tests BRE against a std::string target.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  VERIFY(std::regex_search("", std::regex("")));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
author	Tim Shen <timshen91@gmail.com>
	Tue, 27 Aug 2013 02:49:22 +0000 (02:49 +0000)
committer	Tim Shen <timshen@gcc.gnu.org>
	Tue, 27 Aug 2013 02:49:22 +0000 (02:49 +0000)
libstdc++-v3/ChangeLog		patch \| blob \| blame \| history
libstdc++-v3/include/Makefile.am		patch \| blob \| blame \| history
libstdc++-v3/include/Makefile.in		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex.h		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_compiler.h		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_compiler.tcc		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_executor.tcc		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_scanner.h	[new file with mode: 0644]	patch \| blob
libstdc++-v3/include/bits/regex_scanner.tcc	[new file with mode: 0644]	patch \| blob
libstdc++-v3/include/std/regex		patch \| blob \| blame \| history
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc	[new file with mode: 0644]	patch \| blob
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc	[new file with mode: 0644]	patch \| blob
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc	[new file with mode: 0644]	patch \| blob
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc	[new file with mode: 0644]	patch \| blob
libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc	[new file with mode: 0644]	patch \| blob