]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not use std::map.
authorTim Shen <timshen91@gmail.com>
Thu, 16 Jan 2014 23:35:21 +0000 (23:35 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Thu, 16 Jan 2014 23:35:21 +0000 (23:35 +0000)
2014-01-17  Tim Shen  <timshen91@gmail.com>

* include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
use std::map.
* include/bits/regex_automaton.h: Do not use std::set.
* include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
_BracketMatcher<>::_M_add_collating_element(),
_BracketMatcher<>::_M_add_equivalence_class(),
_BracketMatcher<>::_M_make_range()): Likewise.
* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
Likewise.
* include/bits/regex_executor.h: Do not use std::queue.
* include/bits/regex_executor.tcc (_Executor<>::_M_main(),
_Executor<>::_M_dfs()): Likewise.
* include/std/regex: Remove <map>, <set> and <queue>.

2014-01-17  Tim Shen  <timshen91@gmail.com>

* include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
basic_regex<>::assign()): Change __compile_nfa to accept
const _CharT* only.
* include/bits/regex_compiler.h: Change _Compiler's template
argument from <_FwdIter, _TraitsT> to <_TraitsT>.
* include/bits/regex_compiler.tcc: Likewise.

2014-01-17  Tim Shen  <timshen91@gmail.com>

* include/bits/regex_compiler.h: Change _ScannerT into char-type
templated.
* include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
_ScannerBase from _Scanner; Change _Scanner's template argument from
_FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
instead.
* include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
_Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
_Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
Likewise.
* include/std/regex: Add <cstring> for using strchr.

2014-01-17  Tim Shen  <timshen91@gmail.com>

* bits/regex_automaton.tcc: Indentation fix.
* bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
_RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
_BracketMatcher<>): Add bool option template parameters and
specializations to make matching more efficient and space saving.
* bits/regex_compiler.tcc: Likewise.

From-SVN: r206690

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex.h
libstdc++-v3/include/bits/regex_automaton.h
libstdc++-v3/include/bits/regex_automaton.tcc
libstdc++-v3/include/bits/regex_compiler.h
libstdc++-v3/include/bits/regex_compiler.tcc
libstdc++-v3/include/bits/regex_executor.h
libstdc++-v3/include/bits/regex_executor.tcc
libstdc++-v3/include/bits/regex_scanner.h
libstdc++-v3/include/bits/regex_scanner.tcc
libstdc++-v3/include/std/regex

index 18b44da054af3724bbc91d5ae5f0b740ab32067b..73b48b2f5dadbebc7171672bc820c1429c0e891d 100644 (file)
@@ -1,3 +1,51 @@
+2014-01-17  Tim Shen  <timshen91@gmail.com>
+
+       * include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
+       use std::map.
+       * include/bits/regex_automaton.h: Do not use std::set.
+       * include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
+       _BracketMatcher<>::_M_add_collating_element(),
+       _BracketMatcher<>::_M_add_equivalence_class(),
+       _BracketMatcher<>::_M_make_range()): Likewise.
+       * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
+       Likewise.
+       * include/bits/regex_executor.h: Do not use std::queue.
+       * include/bits/regex_executor.tcc (_Executor<>::_M_main(),
+       _Executor<>::_M_dfs()): Likewise.
+       * include/std/regex: Remove <map>, <set> and <queue>.
+
+2014-01-17  Tim Shen  <timshen91@gmail.com>
+
+       * include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
+       basic_regex<>::assign()): Change __compile_nfa to accept
+       const _CharT* only.
+       * include/bits/regex_compiler.h: Change _Compiler's template
+       argument from <_FwdIter, _TraitsT> to <_TraitsT>.
+       * include/bits/regex_compiler.tcc: Likewise.
+
+2014-01-17  Tim Shen  <timshen91@gmail.com>
+
+       * include/bits/regex_compiler.h: Change _ScannerT into char-type
+       templated.
+       * include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
+       _ScannerBase from _Scanner; Change _Scanner's template argument from
+       _FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
+       instead.
+       * include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
+       _Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
+       _Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
+       Likewise.
+       * include/std/regex: Add <cstring> for using strchr.
+
+2014-01-17  Tim Shen  <timshen91@gmail.com>
+
+       * bits/regex_automaton.tcc: Indentation fix.
+       * bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
+       _RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
+       _BracketMatcher<>): Add bool option template parameters and
+       specializations to make matching more efficient and space saving.
+       * bits/regex_compiler.tcc: Likewise.
+
 2014-01-15  François Dumont  <fdumont@gcc.gnu.org>
 
        PR libstdc++/59712
index ae8e1f5d363952a3d08a4d35029b8bce8a4f1e91..816f5cfb0045c51132db1b5cad64ea43dd448920 100644 (file)
@@ -60,51 +60,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename, typename, typename, bool>
     class _Executor;
 
-  template<typename _Tp>
-    struct __has_contiguous_iter : std::false_type { };
-
-  template<typename _Ch, typename _Tr, typename _Alloc>
-    struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
-    : std::true_type  // string<Ch> storage is contiguous
-    { };
-
-  template<typename _Tp, typename _Alloc>
-    struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
-    : std::true_type  // vector<Tp> storage is contiguous
-    { };
-
-  template<typename _Alloc>
-    struct __has_contiguous_iter<std::vector<bool, _Alloc>>
-    : std::false_type // vector<bool> storage is not contiguous
-    { };
-
-  template<typename _Tp>
-    struct __is_contiguous_normal_iter : std::false_type { };
-
-  template<typename _Tp, typename _Cont>
-    struct
-    __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
-    : __has_contiguous_iter<_Cont>::type
-    { };
-
-  template<typename _Iter, typename _TraitsT>
-    using __enable_if_contiguous_normal_iter
-      = typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
-                           std::shared_ptr<_NFA<_TraitsT>> >::type;
-
-  template<typename _Iter, typename _TraitsT>
-    using __disable_if_contiguous_normal_iter
-      = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
-                           std::shared_ptr<_NFA<_TraitsT>> >::type;
-
-  template<typename _FwdIter, typename _TraitsT>
-    __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
-    __compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
-                 regex_constants::syntax_option_type __flags);
-
-  template<typename _Iter, typename _TraitsT>
-    __enable_if_contiguous_normal_iter<_Iter, _TraitsT>
-    __compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
+  template<typename _TraitsT>
+    inline std::shared_ptr<_NFA<_TraitsT>>
+    __compile_nfa(const typename _TraitsT::char_type* __first,
+                 const typename _TraitsT::char_type* __last,
+                 const _TraitsT& __traits,
                  regex_constants::syntax_option_type __flags);
 
 _GLIBCXX_END_NAMESPACE_VERSION
@@ -561,7 +521,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                    flag_type __f = ECMAScript)
        : _M_flags(__f),
          _M_original_str(__first, __last),
-         _M_automaton(__detail::__compile_nfa(__first, __last, _M_traits,
+         _M_automaton(__detail::__compile_nfa(_M_original_str.c_str(),
+                                              _M_original_str.c_str()
+                                                + _M_original_str.size(),
+                                              _M_traits,
                                               _M_flags))
        { }
 
@@ -698,7 +661,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        {
          _M_flags = __flags;
          _M_original_str.assign(__s.begin(), __s.end());
-         _M_automaton = __detail::__compile_nfa(__s.begin(), __s.end(),
+         auto __p = _M_original_str.c_str();
+         _M_automaton = __detail::__compile_nfa(__p,
+                                                __p + _M_original_str.size(),
                                                 _M_traits, _M_flags);
          return *this;
        }
index ea73675c6f3775fddfa4c5e8e242b34f6ba34148..a442cfe21b7f122fe10546ad186f66b5340dcd5a 100644 (file)
@@ -41,7 +41,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    */
 
   typedef long _StateIdT;
-  typedef std::set<_StateIdT> _StateSet;
   static const _StateIdT _S_invalid_state_id  = -1;
 
   template<typename _CharT>
@@ -138,16 +137,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_start() const
     { return _M_start_state; }
 
-    const _StateSet&
-    _M_final_states() const
-    { return _M_accepting_states; }
-
     _SizeT
     _M_sub_count() const
     { return _M_subexpr_count; }
 
     std::vector<size_t>       _M_paren_stack;
-    _StateSet                 _M_accepting_states;
     _FlagT                    _M_flags;
     _StateIdT                 _M_start_state;
     _SizeT                    _M_subexpr_count;
@@ -172,7 +166,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_insert_accept()
       {
        auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
-       this->_M_accepting_states.insert(__ret);
        return __ret;
       }
 
index 7edc67f8e88322cc6fe95996a119c6dbc162e795..759b053c5eff486696d828858ee7c8ee111c4e8e 100644 (file)
@@ -134,9 +134,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
     {
       __ostr << "digraph _Nfa {\n"
-               "  rankdir=LR;\n";
+               "  rankdir=LR;\n";
       for (size_t __i = 0; __i < this->size(); ++__i)
-        (*this)[__i]._M_dot(__ostr, __i);
+       (*this)[__i]._M_dot(__ostr, __i);
       __ostr << "}\n";
       return __ostr;
     }
@@ -186,7 +186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _StateSeq<_TraitsT>
     _StateSeq<_TraitsT>::_M_clone()
     {
-      std::map<_StateIdT, _StateIdT> __m;
+      std::vector<_StateIdT> __m(_M_nfa.size(), -1);
       std::stack<_StateIdT> __stack;
       __stack.push(_M_start);
       while (!__stack.empty())
@@ -194,30 +194,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          auto __u = __stack.top();
          __stack.pop();
          auto __dup = _M_nfa[__u];
+         // _M_insert_state() never return -1
          auto __id = _M_nfa._M_insert_state(__dup);
          __m[__u] = __id;
          if (__u == _M_end)
            continue;
-         if (__m.count(__dup._M_next) == 0)
+         if (__dup._M_next != _S_invalid_state_id && __m[__dup._M_next] == -1)
            __stack.push(__dup._M_next);
          if (__dup._M_opcode == _S_opcode_alternative
              || __dup._M_opcode == _S_opcode_subexpr_lookahead)
-           if (__m.count(__dup._M_alt) == 0)
+           if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1)
              __stack.push(__dup._M_alt);
        }
-      for (auto __it : __m)
+      long __size = static_cast<long>(__m.size());
+      for (long __k = 0; __k < __size; __k++)
        {
-         auto& __ref = _M_nfa[__it.second];
-         if (__ref._M_next != -1)
+         long __v;
+         if ((__v = __m[__k]) == -1)
+           continue;
+         auto& __ref = _M_nfa[__v];
+         if (__ref._M_next != _S_invalid_state_id)
            {
-             _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next));
+             _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_next] != -1);
              __ref._M_next = __m[__ref._M_next];
            }
          if (__ref._M_opcode == _S_opcode_alternative
              || __ref._M_opcode == _S_opcode_subexpr_lookahead)
-           if (__ref._M_alt != -1)
+           if (__ref._M_alt != _S_invalid_state_id)
              {
-               _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt));
+               _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_alt] != -1);
                __ref._M_alt = __m[__ref._M_alt];
              }
        }
index 4ac67dfed971975dafb1cec63cad4267ce51bc44..216f8fbebe4badc4a772a38de1d6dfebb67e965c 100644 (file)
@@ -39,19 +39,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * @{
    */
 
-  template<typename _TraitsT>
+  template<typename, bool, bool>
     struct _BracketMatcher;
 
   /// Builds an NFA from an input iterator interval.
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     class _Compiler
     {
     public:
-      typedef typename _TraitsT::string_type      _StringT;
+      typedef typename _TraitsT::char_type        _CharT;
+      typedef const _CharT*                       _IterT;
       typedef _NFA<_TraitsT>                     _RegexT;
       typedef regex_constants::syntax_option_type _FlagT;
 
-      _Compiler(_FwdIter __b, _FwdIter __e,
+      _Compiler(_IterT __b, _IterT __e,
                const _TraitsT& __traits, _FlagT __flags);
 
       std::shared_ptr<_RegexT>
@@ -59,12 +60,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { return make_shared<_RegexT>(std::move(_M_nfa)); }
 
     private:
-      typedef _Scanner<_FwdIter>                              _ScannerT;
-      typedef typename _ScannerT::_TokenT                     _TokenT;
-      typedef _StateSeq<_TraitsT>                            _StateSeqT;
-      typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
-      typedef _BracketMatcher<_TraitsT>                              _BMatcherT;
-      typedef std::ctype<typename _TraitsT::char_type>        _CtypeT;
+      typedef _Scanner<_CharT>               _ScannerT;
+      typedef typename _TraitsT::string_type _StringT;
+      typedef typename _ScannerT::_TokenT    _TokenT;
+      typedef _StateSeq<_TraitsT>            _StateSeqT;
+      typedef std::stack<_StateSeqT>         _StackT;
+      typedef std::ctype<_CharT>             _CtypeT;
 
       // accepts a specific token or returns false.
       bool
@@ -91,20 +92,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       bool
       _M_bracket_expression();
 
-      void
-      _M_expression_term(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_any_matcher_ecma();
 
-      bool
-      _M_range_expression(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_any_matcher_posix();
 
-      bool
-      _M_collating_symbol(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_char_matcher();
 
-      bool
-      _M_equivalence_class(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_character_class_matcher();
 
-      bool
-      _M_character_class(_BMatcherT& __matcher);
+      template<bool __icase, bool __collate>
+       void
+       _M_insert_bracket_matcher(bool __neg);
+
+      template<bool __icase, bool __collate>
+       void
+       _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
+                          __matcher);
 
       int
       _M_cur_int_value(int __radix);
@@ -129,33 +140,119 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StackT         _M_stack;
     };
 
-  template<typename _FwdIter, typename _TraitsT>
-    inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
-    __compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
+  template<typename _TraitsT>
+    inline std::shared_ptr<_NFA<_TraitsT>>
+    __compile_nfa(const typename _TraitsT::char_type* __first,
+                 const typename _TraitsT::char_type* __last,
+                 const _TraitsT& __traits,
                  regex_constants::syntax_option_type __flags)
     {
-      using _Cmplr = _Compiler<_FwdIter, _TraitsT>;
+      using _Cmplr = _Compiler<_TraitsT>;
       return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
     }
 
-  template<typename _Iter, typename _TraitsT>
-    inline __enable_if_contiguous_normal_iter<_Iter, _TraitsT>
-    __compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
-                 regex_constants::syntax_option_type __flags)
+  // [28.13.14]
+  template<typename _TraitsT, bool __icase, bool __collate>
+    class _RegexTranslator
     {
-      size_t __len = __last - __first;
-      const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
-      return __compile_nfa(__cfirst, __cfirst + __len, __traits, __flags);
-    }
+    public:
+      typedef typename _TraitsT::char_type           _CharT;
+      typedef typename _TraitsT::string_type         _StringT;
+      typedef typename std::conditional<__collate,
+                                       _StringT,
+                                       _CharT>::type _StrTransT;
+
+      explicit
+      _RegexTranslator(const _TraitsT& __traits)
+      : _M_traits(__traits)
+      { }
+
+      _CharT
+      _M_translate(_CharT __ch) const
+      {
+       if (__icase)
+         return _M_traits.translate_nocase(__ch);
+       else if (__collate)
+         return _M_traits.translate(__ch);
+       else
+         return __ch;
+      }
+
+      _StrTransT
+      _M_transform(_CharT __ch) const
+      {
+       return _M_transform_impl(__ch, typename integral_constant<bool,
+                                __collate>::type());
+      }
+
+    private:
+      _StrTransT
+      _M_transform_impl(_CharT __ch, false_type) const
+      { return __ch; }
+
+      _StrTransT
+      _M_transform_impl(_CharT __ch, true_type) const
+      {
+       _StrTransT __str = _StrTransT(1, _M_translate(__ch));
+       return _M_traits.transform(__str.begin(), __str.end());
+      }
 
-  template<typename _TraitsT, bool __is_ecma>
-    struct _AnyMatcher
+      const _TraitsT& _M_traits;
+    };
+
+  template<typename _TraitsT>
+    class _RegexTranslator<_TraitsT, false, false>
     {
-      typedef typename _TraitsT::char_type       _CharT;
+    public:
+      typedef typename _TraitsT::char_type _CharT;
+      typedef _CharT                       _StrTransT;
+
+      explicit
+      _RegexTranslator(const _TraitsT& __traits)
+      { }
+
+      _CharT
+      _M_translate(_CharT __ch) const
+      { return __ch; }
+
+      _StrTransT
+      _M_transform(_CharT __ch) const
+      { return __ch; }
+    };
+
+  template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
+    struct _AnyMatcher;
+
+  template<typename _TraitsT, bool __icase, bool __collate>
+    struct _AnyMatcher<_TraitsT, false, __icase, __collate>
+    {
+      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+      typedef typename _TransT::_CharT                       _CharT;
 
       explicit
       _AnyMatcher(const _TraitsT& __traits)
-      : _M_traits(__traits)
+      : _M_translator(__traits)
+      { }
+
+      bool
+      operator()(_CharT __ch) const
+      {
+       static auto __nul = _M_translator._M_translate('\0');
+       return _M_translator._M_translate(__ch) != __nul;
+      }
+
+      _TransT _M_translator;
+    };
+
+  template<typename _TraitsT, bool __icase, bool __collate>
+    struct _AnyMatcher<_TraitsT, true, __icase, __collate>
+    {
+      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+      typedef typename _TransT::_CharT                       _CharT;
+
+      explicit
+      _AnyMatcher(const _TraitsT& __traits)
+      : _M_translator(__traits)
       { }
 
       bool
@@ -165,92 +262,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       bool
       _M_apply(_CharT __ch, true_type) const
       {
-       auto __c = _M_traits.translate(__ch);
-       if (__is_ecma)
-         {
-           static auto __n = _M_traits.translate('\n');
-           static auto __r = _M_traits.translate('\r');
-           return __c != __n && __c != __r;
-         }
-       else
-         {
-           static auto __nul = _M_traits.translate('\0');
-           return __c != __nul;
-         }
+       auto __c = _M_translator._M_translate(__ch);
+       auto __n = _M_translator._M_translate('\n');
+       auto __r = _M_translator._M_translate('\r');
+       return __c != __n && __c != __r;
       }
 
       bool
       _M_apply(_CharT __ch, false_type) const
       {
-       auto __c = _M_traits.translate(__ch);
-       if (__is_ecma)
-         {
-           static auto __n = _M_traits.translate('\n');
-           static auto __r = _M_traits.translate('\r');
-           static auto __u2028 = _M_traits.translate(u'\u2028');
-           static auto __u2029 = _M_traits.translate(u'\u2029');
-           return __c != __n && __c != __r && __c != __u2028
-             && __c != __u2029;
-         }
-       else
-         {
-           static auto __nul = _M_traits.translate('\0');
-           return __c != __nul;
-         }
+       auto __c = _M_translator._M_translate(__ch);
+       auto __n = _M_translator._M_translate('\n');
+       auto __r = _M_translator._M_translate('\r');
+       auto __u2028 = _M_translator._M_translate(u'\u2028');
+       auto __u2029 = _M_translator._M_translate(u'\u2029');
+       return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
       }
 
-      const _TraitsT& _M_traits;
+      _TransT _M_translator;
     };
 
-  template<typename _TraitsT, bool __icase>
+  template<typename _TraitsT, bool __icase, bool __collate>
     struct _CharMatcher
     {
-      typedef typename _TraitsT::char_type       _CharT;
+      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+      typedef typename _TransT::_CharT                       _CharT;
 
       _CharMatcher(_CharT __ch, const _TraitsT& __traits)
-      : _M_traits(__traits), _M_ch(_M_translate(__ch))
+      : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
       { }
 
       bool
       operator()(_CharT __ch) const
-      { return _M_ch == _M_translate(__ch); }
+      { return _M_ch == _M_translator._M_translate(__ch); }
 
-      _CharT
-      _M_translate(_CharT __ch) const
-      {
-       if (__icase)
-         return _M_traits.translate_nocase(__ch);
-       else
-         return _M_traits.translate(__ch);
-      }
-
-      const _TraitsT& _M_traits;
-      _CharT          _M_ch;
+      _TransT _M_translator;
+      _CharT  _M_ch;
     };
 
   /// Matches a character range (bracket expression)
-  // TODO: Convert used _M_flags fields to template parameters, including
-  // collate and icase. Avoid using std::set, could use flat_set
-  // (sorted vector and binary search) instead.
-  template<typename _TraitsT>
+  template<typename _TraitsT, bool __icase, bool __collate>
     struct _BracketMatcher
     {
     public:
-      typedef typename _TraitsT::char_type       _CharT;
-      typedef typename _TraitsT::char_class_type  _CharClassT;
-      typedef typename _TraitsT::string_type      _StringT;
-      typedef regex_constants::syntax_option_type _FlagT;
+      typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+      typedef typename _TransT::_CharT                       _CharT;
+      typedef typename _TransT::_StrTransT                   _StrTransT;
+      typedef typename _TraitsT::string_type                 _StringT;
+      typedef typename _TraitsT::char_class_type             _CharClassT;
 
     public:
       _BracketMatcher(bool __is_non_matching,
-                     const _TraitsT& __traits,
-                     _FlagT __flags)
-      :
+                     const _TraitsT& __traits)
+      : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
+      _M_is_non_matching(__is_non_matching)
 #ifdef _GLIBCXX_DEBUG
-      _M_is_ready(false),
+      , _M_is_ready(false)
 #endif
-      _M_traits(__traits), _M_class_set(0), _M_flags(__flags),
-      _M_is_non_matching(__is_non_matching)
       { }
 
       bool
@@ -263,7 +331,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_add_char(_CharT __c)
       {
-       _M_char_set.insert(_M_translate(__c));
+       _M_char_set.push_back(_M_translator._M_translate(__c));
 #ifdef _GLIBCXX_DEBUG
        _M_is_ready = false;
 #endif
@@ -276,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                                 __s.data() + __s.size());
        if (__st.empty())
          __throw_regex_error(regex_constants::error_collate);
-       _M_char_set.insert(_M_translate(__st[0]));
+       _M_char_set.push_back(_M_translator._M_translate(__st[0]));
 #ifdef _GLIBCXX_DEBUG
        _M_is_ready = false;
 #endif
@@ -291,7 +359,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          __throw_regex_error(regex_constants::error_collate);
        __st = _M_traits.transform_primary(__st.data(),
                                           __st.data() + __st.size());
-       _M_equiv_set.insert(__st);
+       _M_equiv_set.push_back(__st);
 #ifdef _GLIBCXX_DEBUG
        _M_is_ready = false;
 #endif
@@ -302,7 +370,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
        auto __mask = _M_traits.lookup_classname(__s.data(),
                                                 __s.data() + __s.size(),
-                                                _M_is_icase());
+                                                __icase);
        if (__mask == 0)
          __throw_regex_error(regex_constants::error_ctype);
        _M_class_set |= __mask;
@@ -314,12 +382,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_make_range(_CharT __l, _CharT __r)
       {
-       if (_M_flags & regex_constants::collate)
-         _M_range_set.insert(
-           make_pair(_M_get_str(_M_translate(__l)),
-                     _M_get_str(_M_translate(__r))));
-       else
-         _M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
+       _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
+                                     _M_translator._M_transform(__r)));
 #ifdef _GLIBCXX_DEBUG
        _M_is_ready = false;
 #endif
@@ -350,26 +414,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_apply(_CharT __ch, true_type) const
       { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
 
-      _CharT
-      _M_translate(_CharT __c) const
-      {
-       if (_M_is_icase())
-         return _M_traits.translate_nocase(__c);
-       else
-         return _M_traits.translate(__c);
-      }
-
-      bool
-      _M_is_icase() const
-      { return _M_flags & regex_constants::icase; }
-
-      _StringT
-      _M_get_str(_CharT __c) const
-      {
-       _StringT __s(1, __c);
-       return _M_traits.transform(__s.begin(), __s.end());
-      }
-
       void
       _M_make_cache(true_type)
       {
@@ -383,16 +427,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { }
 
     private:
-      _CacheT                            _M_cache;
-      std::set<_CharT>                   _M_char_set;
-      std::set<_StringT>                 _M_equiv_set;
-      std::set<pair<_StringT, _StringT>> _M_range_set;
-      const _TraitsT&                    _M_traits;
-      _CharClassT                        _M_class_set;
-      _FlagT                             _M_flags;
-      bool                               _M_is_non_matching;
+      _CacheT                                   _M_cache;
+      std::vector<_CharT>                       _M_char_set;
+      std::vector<_StringT>                     _M_equiv_set;
+      std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
+      _CharClassT                               _M_class_set;
+      _TransT                                   _M_translator;
+      const _TraitsT&                           _M_traits;
+      bool                                      _M_is_non_matching;
 #ifdef _GLIBCXX_DEBUG
-      bool                               _M_is_ready;
+      bool                                      _M_is_ready;
 #endif
     };
 
index 4da653f2aa3a0dcff3cd9dfbec946f2e56b2eb0e..621e43f3ea391b7e3ec22f54d6b8cddf35fba53b 100644 (file)
@@ -59,9 +59,9 @@ namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-  template<typename _FwdIter, typename _TraitsT>
-    _Compiler<_FwdIter, _TraitsT>::
-    _Compiler(_FwdIter __b, _FwdIter __e,
+  template<typename _TraitsT>
+    _Compiler<_TraitsT>::
+    _Compiler(_IterT __b, _IterT __e,
              const _TraitsT& __traits, _FlagT __flags)
     : _M_flags((__flags
                & (regex_constants::ECMAScript
@@ -89,9 +89,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_nfa._M_eliminate_dummy();
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     void
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_disjunction()
     {
       this->_M_alternative();
@@ -110,9 +110,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        }
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     void
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_alternative()
     {
       if (this->_M_term())
@@ -126,9 +126,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_term()
     {
       if (this->_M_assertion())
@@ -141,9 +141,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return false;
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_assertion()
     {
       if (_M_match_token(_ScannerT::_S_token_line_begin))
@@ -172,9 +172,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return true;
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     void
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_quantifier()
     {
       bool __neg = (_M_flags & regex_constants::ECMAScript);
@@ -278,52 +278,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        }
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+#define __INSERT_REGEX_MATCHER(__func, args...)\
+       do\
+         if (!(_M_flags & regex_constants::icase))\
+           if (!(_M_flags & regex_constants::collate))\
+             __func<false, false>(args);\
+           else\
+             __func<false, true>(args);\
+         else\
+           if (!(_M_flags & regex_constants::collate))\
+             __func<true, false>(args);\
+           else\
+             __func<true, true>(args);\
+       while (false)
+
+  template<typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_atom()
     {
       if (_M_match_token(_ScannerT::_S_token_anychar))
        {
-         if (_M_flags & regex_constants::ECMAScript)
-           _M_stack.push(_StateSeqT(_M_nfa,
-                                    _M_nfa._M_insert_matcher
-                                    (_AnyMatcher<_TraitsT,
-                                       true>(_M_traits))));
+         if (!(_M_flags & regex_constants::ECMAScript))
+           __INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix);
          else
-           _M_stack.push(_StateSeqT(_M_nfa,
-                                    _M_nfa._M_insert_matcher
-                                    (_AnyMatcher<_TraitsT,
-                                       false>(_M_traits))));
+           __INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma);
        }
       else if (_M_try_char())
-       {
-         if (_M_flags & regex_constants::icase)
-           _M_stack.push(_StateSeqT(_M_nfa,
-                                    _M_nfa._M_insert_matcher
-                                    (_CharMatcher<_TraitsT,
-                                       true>(_M_value[0],
-                                             _M_traits))));
-         else
-           _M_stack.push(_StateSeqT(_M_nfa,
-                                    _M_nfa._M_insert_matcher
-                                    (_CharMatcher<_TraitsT,
-                                       false>(_M_value[0],
-                                              _M_traits))));
-       }
+       __INSERT_REGEX_MATCHER(_M_insert_char_matcher);
       else if (_M_match_token(_ScannerT::_S_token_backref))
        _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
                                 _M_insert_backref(_M_cur_int_value(10))));
       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
-       {
-         _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
-         _BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
-                              _M_traits, _M_flags);
-         __matcher._M_add_character_class(_M_value);
-         __matcher._M_ready();
-         _M_stack.push(_StateSeqT(_M_nfa,
-               _M_nfa._M_insert_matcher(std::move(__matcher))));
-       }
+       __INSERT_REGEX_MATCHER(_M_insert_character_class_matcher);
       else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
        {
          _StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy());
@@ -348,28 +335,90 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return true;
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_bracket_expression()
     {
       bool __neg =
        _M_match_token(_ScannerT::_S_token_bracket_neg_begin);
       if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
        return false;
-      _BMatcherT __matcher(__neg, _M_traits, _M_flags);
+      __INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg);
+      return true;
+    }
+#undef __INSERT_REGEX_MATCHER
+
+  template<typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_TraitsT>::
+    _M_insert_any_matcher_ecma()
+    {
+      _M_stack.push(_StateSeqT(_M_nfa,
+       _M_nfa._M_insert_matcher
+         (_AnyMatcher<_TraitsT, true, __icase, __collate>
+           (_M_traits))));
+    }
+
+  template<typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_TraitsT>::
+    _M_insert_any_matcher_posix()
+    {
+      _M_stack.push(_StateSeqT(_M_nfa,
+       _M_nfa._M_insert_matcher
+         (_AnyMatcher<_TraitsT, false, __icase, __collate>
+           (_M_traits))));
+    }
+
+  template<typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_TraitsT>::
+    _M_insert_char_matcher()
+    {
+      _M_stack.push(_StateSeqT(_M_nfa,
+       _M_nfa._M_insert_matcher
+         (_CharMatcher<_TraitsT, __icase, __collate>
+           (_M_value[0], _M_traits))));
+    }
+
+  template<typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_TraitsT>::
+    _M_insert_character_class_matcher()
+    {
+      _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
+      _BracketMatcher<_TraitsT, __icase, __collate> __matcher
+       (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
+      __matcher._M_add_character_class(_M_value);
+      __matcher._M_ready();
+      _M_stack.push(_StateSeqT(_M_nfa,
+       _M_nfa._M_insert_matcher(std::move(__matcher))));
+    }
+
+  template<typename _TraitsT>
+  template<bool __icase, bool __collate>
+    void
+    _Compiler<_TraitsT>::
+    _M_insert_bracket_matcher(bool __neg)
+    {
+      _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
       while (!_M_match_token(_ScannerT::_S_token_bracket_end))
        _M_expression_term(__matcher);
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(_M_nfa,
                               _M_nfa._M_insert_matcher(std::move(__matcher))));
-      return true;
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
+  template<bool __icase, bool __collate>
     void
-    _Compiler<_FwdIter, _TraitsT>::
-    _M_expression_term(_BMatcherT& __matcher)
+    _Compiler<_TraitsT>::
+    _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
     {
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
        __matcher._M_add_collating_element(_M_value);
@@ -403,9 +452,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        __throw_regex_error(regex_constants::error_brack);
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_try_char()
     {
       bool __is_char = false;
@@ -424,9 +473,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __is_char;
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     bool
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_match_token(_TokenT token)
     {
       if (token == _M_scanner._M_get_token())
@@ -438,9 +487,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return false;
     }
 
-  template<typename _FwdIter, typename _TraitsT>
+  template<typename _TraitsT>
     int
-    _Compiler<_FwdIter, _TraitsT>::
+    _Compiler<_TraitsT>::
     _M_cur_int_value(int __radix)
     {
       long __v = 0;
@@ -450,25 +499,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __v;
     }
 
-  template<typename _TraitsT>
+  template<typename _TraitsT, bool __icase, bool __collate>
     bool
-    _BracketMatcher<_TraitsT>::_M_apply(_CharT __ch, false_type) const
+    _BracketMatcher<_TraitsT, __icase, __collate>::
+    _M_apply(_CharT __ch, false_type) const
     {
       bool __ret = false;
-      if (_M_traits.isctype(__ch, _M_class_set)
-         || _M_char_set.count(_M_translate(__ch))
-         || _M_equiv_set.count(_M_traits.transform_primary(&__ch, &__ch+1)))
+      if (std::find(_M_char_set.begin(), _M_char_set.end(),
+                   _M_translator._M_translate(__ch))
+         != _M_char_set.end())
        __ret = true;
       else
        {
-         _StringT __s = _M_get_str(_M_flags & regex_constants::collate
-                                   ? _M_translate(__ch) : __ch);
+         auto __s = _M_translator._M_transform(__ch);
          for (auto& __it : _M_range_set)
            if (__it.first <= __s && __s <= __it.second)
              {
                __ret = true;
                break;
              }
+         if (_M_traits.isctype(__ch, _M_class_set))
+           __ret = true;
+         else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
+                            _M_traits.transform_primary(&__ch, &__ch+1))
+                  != _M_equiv_set.end())
+           __ret = true;
        }
       if (_M_is_non_matching)
        return !__ret;
index bed90148da871e5784ab4fabab361967edef1a27..0885716dbfb52bf1d6d1d49b57051c6dacff2a19 100644 (file)
@@ -65,7 +65,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_nfa(*__re._M_automaton),
       _M_results(__results),
       _M_match_queue(__dfs_mode ? nullptr
-                    : new queue<pair<_StateIdT, _ResultsVec>>()),
+                    : new vector<pair<_StateIdT, _ResultsVec>>()),
       _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
       _M_flags((__flags & regex_constants::match_prev_avail)
               ? (__flags
@@ -133,23 +133,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_lookahead(_State<_TraitsT> __state);
 
     public:
-      _ResultsVec                                          _M_cur_results;
-      _BiIter                                              _M_current;
-      const _BiIter                                        _M_begin;
-      const _BiIter                                        _M_end;
-      const _RegexT&                                       _M_re;
-      const _NFAT&                                         _M_nfa;
-      _ResultsVec&                                         _M_results;
+      _ResultsVec                                           _M_cur_results;
+      _BiIter                                               _M_current;
+      const _BiIter                                         _M_begin;
+      const _BiIter                                         _M_end;
+      const _RegexT&                                        _M_re;
+      const _NFAT&                                          _M_nfa;
+      _ResultsVec&                                          _M_results;
       // Used in BFS, saving states that need to be considered for the next
       // character.
-      std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
+      std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
       // Used in BFS, indicating that which state is already visited.
-      std::unique_ptr<vector<bool>>                        _M_visited;
-      _FlagT                                               _M_flags;
+      std::unique_ptr<vector<bool>>                         _M_visited;
+      _FlagT                                                _M_flags;
       // To record current solution.
-      _StateIdT                                            _M_start_state;
+      _StateIdT                                             _M_start_state;
       // Do we have a solution so far?
-      bool                                                 _M_has_sol;
+      bool                                                  _M_has_sol;
     };
 
  //@} regex-detail
index 85b70b8a5ebf2b89ec4f9abb5488e72c461a1337..e1cfcb06164c813976246b38d238ca84a3323e8e 100644 (file)
@@ -111,7 +111,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        }
       else
        {
-         _M_match_queue->push(make_pair(_M_start_state, _M_results));
+         _M_match_queue->push_back(make_pair(_M_start_state, _M_results));
          bool __ret = false;
          while (1)
            {
@@ -120,10 +120,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                break;
              _M_visited->assign(_M_visited->size(), false);
              auto _M_old_queue = std::move(*_M_match_queue);
-             while (!_M_old_queue.empty())
+             for (auto __task : _M_old_queue)
                {
-                 auto __task = _M_old_queue.front();
-                 _M_old_queue.pop();
                  _M_cur_results = __task.second;
                  _M_dfs<__match_mode>(__task.first);
                }
@@ -279,7 +277,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
            }
          else
            if (__state._M_matches(*_M_current))
-             _M_match_queue->push(make_pair(__state._M_next, _M_cur_results));
+             _M_match_queue->push_back(make_pair(__state._M_next,
+                                                 _M_cur_results));
          break;
        // First fetch the matched result from _M_cur_results as __submatch;
        // then compare it with
index d113c5d52624d17df2317ae78281d7e875b3e17b..6dc2b4edf6f11e65314b3a5342ce27aca50c721d 100644 (file)
@@ -39,6 +39,154 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * @{
    */
 
+  struct _ScannerBase
+  {
+  public:
+    /// Token types returned from the scanner.
+    enum _TokenT
+    {
+      _S_token_anychar,
+      _S_token_ord_char,
+      _S_token_oct_num,
+      _S_token_hex_num,
+      _S_token_backref,
+      _S_token_subexpr_begin,
+      _S_token_subexpr_no_group_begin,
+      _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
+      _S_token_subexpr_end,
+      _S_token_bracket_begin,
+      _S_token_bracket_neg_begin,
+      _S_token_bracket_end,
+      _S_token_interval_begin,
+      _S_token_interval_end,
+      _S_token_quoted_class,
+      _S_token_char_class_name,
+      _S_token_collsymbol,
+      _S_token_equiv_class_name,
+      _S_token_opt,
+      _S_token_or,
+      _S_token_closure0,
+      _S_token_closure1,
+      _S_token_ungreedy,
+      _S_token_line_begin,
+      _S_token_line_end,
+      _S_token_word_bound, // neg if _M_value[0] == 'n'
+      _S_token_comma,
+      _S_token_dup_count,
+      _S_token_eof,
+      _S_token_unknown
+    };
+
+  protected:
+    typedef regex_constants::syntax_option_type _FlagT;
+
+    enum _StateT
+    {
+      _S_state_normal,
+      _S_state_in_brace,
+      _S_state_in_bracket,
+    };
+
+  protected:
+    _ScannerBase(_FlagT __flags)
+    : _M_state(_S_state_normal),
+    _M_flags(__flags),
+    _M_escape_tbl(_M_is_ecma()
+                 ? _M_ecma_escape_tbl
+                 : _M_awk_escape_tbl),
+    _M_spec_char(_M_is_ecma()
+                ? _M_ecma_spec_char
+                : _M_is_basic()
+                ? _M_basic_spec_char
+                : _M_extended_spec_char),
+    _M_at_bracket_start(false)
+    { }
+
+  protected:
+    const char*
+    _M_find_escape(char __c)
+    {
+      auto __it = _M_escape_tbl;
+      for (; __it->first != '\0'; ++__it)
+       if (__it->first == __c)
+         return &__it->second;
+      return nullptr;
+    }
+
+    bool
+    _M_is_ecma() const
+    { return _M_flags & regex_constants::ECMAScript; }
+
+    bool
+    _M_is_basic() const
+    { return _M_flags & (regex_constants::basic | regex_constants::grep); }
+
+    bool
+    _M_is_extended() const
+    {
+      return _M_flags & (regex_constants::extended
+                        | regex_constants::egrep
+                        | regex_constants::awk);
+    }
+
+    bool
+    _M_is_grep() const
+    { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
+
+    bool
+    _M_is_awk() const
+    { return _M_flags & regex_constants::awk; }
+
+  protected:
+    const std::pair<char, _TokenT> _M_token_tbl[9] =
+      {
+       {'^', _S_token_line_begin},
+       {'$', _S_token_line_end},
+       {'.', _S_token_anychar},
+       {'*', _S_token_closure0},
+       {'+', _S_token_closure1},
+       {'?', _S_token_opt},
+       {'|', _S_token_or},
+       {'\n', _S_token_or}, // grep and egrep
+       {'\0', _S_token_or},
+      };
+    const std::pair<char, char> _M_ecma_escape_tbl[8] =
+      {
+       {'0', '\0'},
+       {'b', '\b'},
+       {'f', '\f'},
+       {'n', '\n'},
+       {'r', '\r'},
+       {'t', '\t'},
+       {'v', '\v'},
+       {'\0', '\0'},
+      };
+    const std::pair<char, char> _M_awk_escape_tbl[11] =
+      {
+       {'"', '"'},
+       {'/', '/'},
+       {'\\', '\\'},
+       {'a', '\a'},
+       {'b', '\b'},
+       {'f', '\f'},
+       {'n', '\n'},
+       {'r', '\r'},
+       {'t', '\t'},
+       {'v', '\v'},
+       {'\0', '\0'},
+      };
+    const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
+    const char* _M_basic_spec_char = ".[\\*^$";
+    const char* _M_extended_spec_char = ".[\\()*+?{|^$";
+
+    _StateT                       _M_state;
+    _FlagT                        _M_flags;
+    _TokenT                       _M_token;
+    const std::pair<char, char>*  _M_escape_tbl;
+    const char*                   _M_spec_char;
+    bool                          _M_at_bracket_start;
+  };
+
   /**
    * @brief struct _Scanner. Scans an input range for regex tokens.
    *
@@ -49,51 +197,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * constructor: different regular expression grammars will interpret
    * the same input pattern in syntactically different ways.
    */
-  template<typename _FwdIter>
+  template<typename _CharT>
     class _Scanner
+    : public _ScannerBase
     {
     public:
-      typedef typename std::iterator_traits<_FwdIter>::value_type _CharT;
+      typedef const _CharT*                                       _IterT;
       typedef std::basic_string<_CharT>                           _StringT;
       typedef regex_constants::syntax_option_type                 _FlagT;
       typedef const std::ctype<_CharT>                            _CtypeT;
 
-      /// Token types returned from the scanner.
-      enum _TokenT
-      {
-       _S_token_anychar,
-       _S_token_ord_char,
-       _S_token_oct_num,
-       _S_token_hex_num,
-       _S_token_backref,
-       _S_token_subexpr_begin,
-       _S_token_subexpr_no_group_begin,
-       _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
-       _S_token_subexpr_end,
-       _S_token_bracket_begin,
-       _S_token_bracket_neg_begin,
-       _S_token_bracket_end,
-       _S_token_interval_begin,
-       _S_token_interval_end,
-       _S_token_quoted_class,
-       _S_token_char_class_name,
-       _S_token_collsymbol,
-       _S_token_equiv_class_name,
-       _S_token_opt,
-       _S_token_or,
-       _S_token_closure0,
-       _S_token_closure1,
-       _S_token_ungreedy,
-       _S_token_line_begin,
-       _S_token_line_end,
-       _S_token_word_bound, // neg if _M_value[0] == 'n'
-       _S_token_comma,
-       _S_token_dup_count,
-       _S_token_eof,
-       _S_token_unknown
-      };
-
-      _Scanner(_FwdIter __begin, _FwdIter __end,
+      _Scanner(_IterT __begin, _IterT __end,
               _FlagT __flags, std::locale __loc);
 
       void
@@ -113,13 +227,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
     private:
-      enum _StateT
-      {
-       _S_state_normal,
-       _S_state_in_brace,
-       _S_state_in_bracket,
-      };
-
       void
       _M_scan_normal();
 
@@ -141,49 +248,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_eat_class(char);
 
-      constexpr bool
-      _M_is_ecma()
-      { return _M_flags & regex_constants::ECMAScript; }
-
-      constexpr bool
-      _M_is_basic()
-      { return _M_flags & (regex_constants::basic | regex_constants::grep); }
-
-      constexpr bool
-      _M_is_extended()
-      {
-       return _M_flags & (regex_constants::extended
-                          | regex_constants::egrep
-                          | regex_constants::awk);
-      }
-
-      constexpr bool
-      _M_is_grep()
-      { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
-
-      constexpr bool
-      _M_is_awk()
-      { return _M_flags & regex_constants::awk; }
-
-      _StateT                       _M_state;
-      _FwdIter                      _M_current;
-      _FwdIter                      _M_end;
-      _FlagT                        _M_flags;
+      _IterT                        _M_current;
+      _IterT                        _M_end;
       _CtypeT&                      _M_ctype;
-      _TokenT                       _M_token;
       _StringT                      _M_value;
-      bool                          _M_at_bracket_start;
-    public:
-      // FIXME: make them static when this file is stable.
-      const std::map<char, _TokenT> _M_token_map;
-      const std::map<char, char>    _M_ecma_escape_map;
-      const std::map<char, char>    _M_awk_escape_map;
-      const std::set<char>          _M_ecma_spec_char;
-      const std::set<char>          _M_basic_spec_char;
-      const std::set<char>          _M_extended_spec_char;
-
-      const std::map<char, char>&   _M_escape_map;
-      const std::set<char>&         _M_spec_char;
       void (_Scanner::* _M_eat_escape)();
     };
 
index 34d78ec76477ad8b029694879ef33707b15c1b49..d954d0764e263358b78c65b3008413878b1d3c59 100644 (file)
@@ -52,106 +52,22 @@ namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-  template<typename _FwdIter>
-    _Scanner<_FwdIter>::
-    _Scanner(_FwdIter __begin, _FwdIter __end,
+  template<typename _CharT>
+    _Scanner<_CharT>::
+    _Scanner(typename _Scanner::_IterT __begin,
+            typename _Scanner::_IterT __end,
             _FlagT __flags, std::locale __loc)
-    : _M_state(_S_state_normal), _M_current(__begin), _M_end(__end),
-      _M_flags(__flags),
+    : _ScannerBase(__flags),
+      _M_current(__begin), _M_end(__end),
       _M_ctype(std::use_facet<_CtypeT>(__loc)),
-      _M_at_bracket_start(false),
-      _M_token_map
-       {
-         {'^', _S_token_line_begin},
-         {'$', _S_token_line_end},
-         {'.', _S_token_anychar},
-         {'*', _S_token_closure0},
-         {'+', _S_token_closure1},
-         {'?', _S_token_opt},
-         {'|', _S_token_or},
-         // grep and egrep
-         {'\n', _S_token_or},
-       },
-      _M_ecma_escape_map
-       {
-         {'0', '\0'},
-         {'b', '\b'},
-         {'f', '\f'},
-         {'n', '\n'},
-         {'r', '\r'},
-         {'t', '\t'},
-         {'v', '\v'},
-       },
-      _M_awk_escape_map
-       {
-         {'"', '"'},
-         {'/', '/'},
-         {'\\', '\\'},
-         {'a', '\a'},
-         {'b', '\b'},
-         {'f', '\f'},
-         {'n', '\n'},
-         {'r', '\r'},
-         {'t', '\t'},
-         {'v', '\v'},
-       },
-      _M_ecma_spec_char
-       {
-         '^',
-         '$',
-         '\\',
-         '.',
-         '*',
-         '+',
-         '?',
-         '(',
-         ')',
-         '[',
-         ']',
-         '{',
-         '}',
-         '|',
-       },
-      _M_basic_spec_char
-       {
-         '.',
-         '[',
-         '\\',
-         '*',
-         '^',
-         '$',
-       },
-      _M_extended_spec_char
-       {
-         '.',
-         '[',
-         '\\',
-         '(',
-         ')',
-         '*',
-         '+',
-         '?',
-         '{',
-         '|',
-         '^',
-         '$',
-       },
-      _M_escape_map(_M_is_ecma()
-                   ? _M_ecma_escape_map
-                   : _M_awk_escape_map),
-      _M_spec_char(_M_is_ecma()
-                  ? _M_ecma_spec_char
-                  : _M_is_basic()
-                  ? _M_basic_spec_char
-                  : _M_extended_spec_char),
       _M_eat_escape(_M_is_ecma()
                    ? &_Scanner::_M_eat_escape_ecma
                    : &_Scanner::_M_eat_escape_posix)
     { _M_advance(); }
 
-  template<typename _FwdIter>
+  template<typename _CharT>
     void
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_advance()
     {
       if (_M_current == _M_end)
@@ -173,12 +89,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Differences between styles:
   // 1) "\(", "\)", "\{" in basic. It's not escaping.
   // 2) "(?:", "(?=", "(?!" in ECMAScript.
-  template<typename _FwdIter>
+  template<typename _CharT>
     void
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_scan_normal()
     {
       auto __c = *_M_current++;
+      const char* __pos;
 
       if (__c == '\\')
        {
@@ -244,11 +161,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          _M_state = _S_state_in_brace;
          _M_token = _S_token_interval_begin;
        }
-      else if ((_M_spec_char.count(_M_ctype.narrow(__c, '\0'))
+      else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')))
+                 != nullptr
+               && *__pos != '\0'
                && __c != ']'
                && __c != '}')
               || (_M_is_grep() && __c == '\n'))
-       _M_token = _M_token_map.at(__c);
+       {
+         auto __it = _M_token_tbl;
+         auto __narrowc = _M_ctype.narrow(__c, '\0');
+         for (; __it->first != '\0'; ++__it)
+           if (__it->first == __narrowc)
+             {
+               _M_token = __it->second;
+               return;
+             }
+         _GLIBCXX_DEBUG_ASSERT(false);
+       }
       else
        {
          _M_token = _S_token_ord_char;
@@ -259,9 +188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Differences between styles:
   // 1) different semantics of "[]" and "[^]".
   // 2) Escaping in bracket expr.
-  template<typename _FwdIter>
+  template<typename _CharT>
     void
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_scan_in_bracket()
     {
       if (_M_current == _M_end)
@@ -316,9 +245,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Differences between styles:
   // 1) "\}" in basic style.
-  template<typename _FwdIter>
+  template<typename _CharT>
     void
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_scan_in_brace()
     {
       if (_M_current == _M_end)
@@ -357,21 +286,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        __throw_regex_error(regex_constants::error_badbrace);
     }
 
-  template<typename _FwdIter>
+  template<typename _CharT>
     void
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_eat_escape_ecma()
     {
       if (_M_current == _M_end)
        __throw_regex_error(regex_constants::error_escape);
 
       auto __c = *_M_current++;
+      auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
 
-      if (_M_escape_map.count(_M_ctype.narrow(__c, '\0'))
-         && (__c != 'b' || _M_state == _S_state_in_bracket))
+      if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket))
        {
          _M_token = _S_token_ord_char;
-         _M_value.assign(1, _M_escape_map.at(__c));
+         _M_value.assign(1, *__pos);
        }
       else if (__c == 'b')
        {
@@ -431,17 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Differences between styles:
   // 1) Extended doesn't support backref, but basic does.
-  template<typename _FwdIter>
+  template<typename _CharT>
     void
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_eat_escape_posix()
     {
       if (_M_current == _M_end)
        __throw_regex_error(regex_constants::error_escape);
 
       auto __c = *_M_current;
+      auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
 
-      if (_M_spec_char.count(_M_ctype.narrow(__c, '\0')))
+      if (__pos != nullptr && *__pos != '\0')
        {
          _M_token = _S_token_ord_char;
          _M_value.assign(1, __c);
@@ -469,17 +399,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       ++_M_current;
     }
 
-  template<typename _FwdIter>
+  template<typename _CharT>
     void
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_eat_escape_awk()
     {
       auto __c = *_M_current++;
+      auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
 
-      if (_M_escape_map.count(_M_ctype.narrow(__c, '\0')))
+      if (__pos != nullptr)
        {
          _M_token = _S_token_ord_char;
-         _M_value.assign(1, _M_escape_map.at(__c));
+         _M_value.assign(1, *__pos);
        }
       // \ddd for oct representation
       else if (_M_ctype.is(_CtypeT::digit, __c)
@@ -505,9 +436,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Eats a character class or throwns an exception.
   // __ch cound be ':', '.' or '=', _M_current is the char after ']' when
   // returning.
-  template<typename _FwdIter>
+  template<typename _CharT>
     void
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_eat_class(char __ch)
     {
       for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
@@ -525,9 +456,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
 #ifdef _GLIBCXX_DEBUG
-  template<typename _FwdIter>
+  template<typename _CharT>
     std::ostream&
-    _Scanner<_FwdIter>::
+    _Scanner<_CharT>::
     _M_print(std::ostream& ostr)
     {
       switch (_M_token)
index 9395f5011e0ad5c23111a73be26c4c5735c042a2..9161f48354496c1864ad66258a002473052e359d 100644 (file)
 #include <iterator>
 #include <locale>
 #include <memory>
-#include <map>
-#include <queue>
-#include <set>
 #include <sstream>
 #include <stack>
 #include <stdexcept>
 #include <string>
 #include <utility>
 #include <vector>
+#include <cstring>
 
 #include <bits/regex_constants.h>
 #include <bits/regex_error.h>