]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Thompson matcher refactored.
authorTim Shen <timshen91@gmail.com>
Tue, 30 Jul 2013 12:02:55 +0000 (12:02 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Tue, 30 Jul 2013 12:02:55 +0000 (12:02 +0000)
2013-07-30  Tim Shen  <timshen91@gmail.com>

Thompson matcher refactored. Fix grouping problem.
* include/bits/regex.h: Use a dispatcher _M_get_matcher().
* include/bits/regex_compiler.h: Tweak for auto switching.
* include/bits/regex_grep_matcher.h: Class structure.
* include/bits/regex_grep_matcher.tcc: _BFSMatcher(Thompson
matcher) refactoring.
* include/bits/regex_nfa.h: Change _Results's interfaces.
* include/std/regex: Includes <map> and <queue>.
* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
For both matchers.
* testsuite/28_regex/algorithms/regex_match/extended/57173.cc:
For both matchers.
* testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc:
New.

From-SVN: r201334

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex.h
libstdc++-v3/include/bits/regex_compiler.h
libstdc++-v3/include/bits/regex_grep_matcher.h
libstdc++-v3/include/bits/regex_grep_matcher.tcc
libstdc++-v3/include/bits/regex_nfa.h
libstdc++-v3/include/std/regex
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc [new file with mode: 0644]

index edb17eae0c0cc5564f1682dfb21be97accb57e2f..db9a0358afc5962a155aea72dedacf695c26f29e 100644 (file)
@@ -1,3 +1,20 @@
+2013-07-30  Tim Shen  <timshen91@gmail.com>
+
+       Thompson matcher refactored. Fix grouping problem.
+       * include/bits/regex.h: Use a dispatcher _M_get_matcher().
+       * include/bits/regex_compiler.h: Tweak for auto switching.
+       * include/bits/regex_grep_matcher.h: Class structure.
+       * include/bits/regex_grep_matcher.tcc: _BFSMatcher(Thompson
+       matcher) refactoring.
+       * include/bits/regex_nfa.h: Change _Results's interfaces.
+       * include/std/regex: Includes <map> and <queue>.
+       * testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
+       For both matchers.
+       * testsuite/28_regex/algorithms/regex_match/extended/57173.cc:
+       For both matchers.
+       * testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc:
+       New.
+
 2013-07-29  Nathan Froyd  <froydnj@gcc.gnu.org>
 
        * include/std/atomic (compare_exchange_weak, compare_exchange_strong):
index 569284847855936afb8ebbdd4322f06b22413c4f..c1f65339848e2df5d9832442506d5340a53cbb26 100644 (file)
@@ -2175,7 +2175,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     bool
     regex_match(_Bi_iter                                 __s,
                 _Bi_iter                                 __e,
-                match_results<_Bi_iter, _Alloc>&     __m,
+                match_results<_Bi_iter, _Alloc>&         __m,
                 const basic_regex<_Ch_type, _Rx_traits>& __re,
                 regex_constants::match_flag_type         __flags
                                = regex_constants::match_default)
@@ -2184,8 +2184,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
       __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
       __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
-      __detail::_Grep_matcher __matcher(__cs, __r, __a, __flags);
-      return __matcher._M_dfs_match();
+      return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match();
     }
 
   /**
@@ -2336,8 +2335,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
         {
           __detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
-          __detail::_Grep_matcher __matcher(__curs, __r, __a, __flags);
-          if (__matcher._M_dfs_search_from_first())
+          auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags);
+          if (__matcher->_M_search_from_first())
             {
               __r._M_set_range(__m.size(),
                                __detail::_SpecializedCursor<_Bi_iter>
index dae0948404c7c2ad64b7a639775409862d1c6d7c..badea8c641e29c6483cbedc42b56e40c7d5626fb 100644 (file)
@@ -936,7 +936,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (_M_match_token(_ScannerT::_S_token_backref))
        {
          // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
-         return true;
+          _M_state_store._M_set_back_ref(true);
+         //return true;
        }
       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
        {
index 8686cc933be38e1243bfbd17b8eca28c078a9527..d9270e5dc94733f944861760f6256a39ff2d2c96 100644 (file)
@@ -60,6 +60,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                          const _SpecializedCursor<_FwdIterT>& __cursor,
                          match_results<_FwdIterT, _Alloc>& __m);
 
+      ~_SpecializedResults()
+      {
+        if (_M_managed)
+          delete &_M_results;
+      }
+
+    private:
+      _SpecializedResults(const _SpecializedResults& __rhs)
+      : _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
+      _M_managed(true)
+      { }
+
+    public:
       void
       _M_set_pos(int __i, int __j, const _PatternCursor& __pc);
 
@@ -76,8 +89,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_set_matched(int __i, bool __is_matched)
       { _M_results.at(__i).matched = __is_matched; }
 
+      std::unique_ptr<_Results>
+      _M_clone() const
+      { return unique_ptr<_Results>(new _SpecializedResults(*this)); }
+
+      void
+      _M_assign(const _Results& __rhs)
+      {
+        auto __r = static_cast<const _SpecializedResults*>(&__rhs);
+        _M_results = __r->_M_results;
+      }
+
     private:
       match_results<_FwdIterT, _Alloc>& _M_results;
+      bool                              _M_managed;
     };
 
   template<typename _FwdIterT, typename _Alloc>
@@ -85,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _SpecializedResults(const _Automaton::_SizeT __size,
                        const _SpecializedCursor<_FwdIterT>& __cursor,
                         match_results<_FwdIterT, _Alloc>& __m)
-    : _M_results(__m)
+    : _M_results(__m), _M_managed(false)
     {
       _M_results.clear();
       _M_results.reserve(__size + 2);
@@ -105,14 +130,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
       _CursorT __c = static_cast<_CursorT>(__pc);
       if (__j == 0)
-       _M_results.at(__i).first = __c._M_pos();
+        _M_results.at(__i).first = __c._M_pos();
       else
         _M_results.at(__i).second = __c._M_pos();
     }
 
-  /// A stack of states used in evaluating the NFA.
-  typedef std::stack<_StateIdT, std::vector<_StateIdT> > _StateStack;
-
   /// Executes a regular expression NFA/DFA over a range using a
   /// variant of the parallel execution algorithm featured in the grep
   /// utility, modified to use Laurikari tags.
@@ -124,47 +146,126 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                   const _AutomatonPtr&              __automaton,
                   regex_constants::match_flag_type  __flags)
     : _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
-      _M_pattern(__p), _M_results(__r)
+      _M_str_cur(__p), _M_results(__r)
+    { }
+
+    virtual
+    ~_Grep_matcher()
     { }
 
     // Set matched when string exactly match the pattern.
-    void
-    _M_match();
+    virtual bool
+    _M_match() = 0;
 
     // Set matched when some prefix of the string matches the pattern.
-    void
-    _M_search_from_first();
+    virtual bool
+    _M_search_from_first() = 0;
+
+  protected:
+    const std::shared_ptr<_Nfa>        _M_nfa;
+    _PatternCursor&                    _M_str_cur;
+    _Results&                          _M_results;
+  };
+
+  // Time complexity: exponential
+  // Space complexity: O(_M_str_cur.size())
+  // _M_dfs() take a state, along with current string cursor(_M_str_cur),
+  // trying to match current state with current character.
+  // Only _S_opcode_match will consume a character.
+  class _DFSMatcher
+  : public _Grep_matcher
+  {
+  public:
+    _DFSMatcher(_PatternCursor&                   __p,
+                _Results&                         __r,
+                const _AutomatonPtr&              __automaton,
+                regex_constants::match_flag_type  __flags)
+    : _Grep_matcher(__p, __r, __automaton, __flags)
+    { }
 
-    // TODO: in the future this function will be _M_match, in another class.
     bool
-    _M_dfs_match()
+    _M_match()
     { return _M_dfs<true>(_M_nfa->_M_start()); }
 
-    // TODO: in the future this function will be _M_search_from_first,
-    // in another class.
     bool
-    _M_dfs_search_from_first()
+    _M_search_from_first()
     { return _M_dfs<false>(_M_nfa->_M_start()); }
 
   private:
-    _StateSet
-    _M_e_closure(_StateIdT __i);
+    template<bool __match_mode>
+      bool
+      _M_dfs(_StateIdT __i);
+  };
+
+  // It's essentially a variant of Single-Source-Shortest-Path problem, where,
+  // the matching results is the final distance and should be minimized.
+  // Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
+  // (BFS-like) Bellman-Ford algorithm,
+  // SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
+  //
+  // Every entry of _M_current saves the solution(grouping status) for every
+  // matching head. When states transfer, solutions will be compared and
+  // deduplicated(based on which greedy mode we have).
+  //
+  // Time complexity: O(_M_str_cur.size() * _M_nfa.size())
+  // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
+  class _BFSMatcher
+  : public _Grep_matcher
+  {
+  public:
+    _BFSMatcher(_PatternCursor&                   __p,
+                _Results&                         __r,
+                const _AutomatonPtr&              __automaton,
+                regex_constants::match_flag_type  __flags)
+    : _Grep_matcher(__p, __r, __automaton, __flags)
+    {
+      if (_M_nfa->_M_start() != _S_invalid_state_id)
+        _M_current[_M_nfa->_M_start()] = _M_results._M_clone();
+      _M_e_closure();
+    }
 
-    _StateSet
-    _M_e_closure(const _StateSet& __s);
+    bool
+    _M_match()
+    { return _M_main_loop<true>(); }
 
-    _StateSet
-    _M_e_closure(_StateStack& __stack, const _StateSet& __s);
+    bool
+    _M_search_from_first()
+    { return _M_main_loop<false>(); }
 
+  private:
     template<bool __match_mode>
       bool
-      _M_dfs(_StateIdT __i);
+      _M_main_loop();
 
-    const std::shared_ptr<_Nfa>        _M_nfa;
-    _PatternCursor&                    _M_pattern;
-    _Results&                          _M_results;
+    void
+    _M_e_closure();
+
+    void
+    _M_move();
+
+    bool
+    _M_match_less_than(_StateIdT __u, _StateIdT __v) const;
+
+    bool
+    _M_includes_some() const;
+
+    std::map<_StateIdT, std::unique_ptr<_Results>>     _M_current;
   };
 
+  std::unique_ptr<_Grep_matcher> _Nfa::
+  _M_get_matcher(_PatternCursor&                   __p,
+                 _Results&                         __r,
+                 const _AutomatonPtr&              __a,
+                 regex_constants::match_flag_type  __flags)
+  {
+    if (_M_has_back_ref)
+      return unique_ptr<_Grep_matcher>(
+        new _DFSMatcher(__p, __r, __a, __flags));
+    else
+      return unique_ptr<_Grep_matcher>(
+        new _BFSMatcher(__p, __r, __a, __flags));
+  }
+
  //@} regex-detail
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace __detail
index dccdfda0bc18651611c95ffac7ec1ae6a7a08bf8..2cfc790b6e66248d788cbf5c072fb56e73845812 100644 (file)
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
-namespace
-{
-  // A stack of states used in evaluating the NFA.
-  typedef std::stack<std::__detail::_StateIdT,
-                     std::vector<std::__detail::_StateIdT>
-                    > _StateStack;
-
-  // Obtains the next state set given the current state set __s and the current
-  // input character.
-  inline std::__detail::_StateSet
-  __move(const std::__detail::_PatternCursor& __p,
-         const std::__detail::_Nfa& __nfa,
-         const std::__detail::_StateSet& __s)
-  {
-    std::__detail::_StateSet __m;
-    for (std::__detail::_StateSet::const_iterator __i = __s.begin();
-        __i != __s.end(); ++__i)
-      {
-       if (*__i == std::__detail::_S_invalid_state_id)
-         continue;
-
-       const std::__detail::_State& __state = __nfa[*__i];
-       if (__state._M_opcode == std::__detail::_S_opcode_match
-           && __state._M_matches(__p))
-         __m.insert(__state._M_next);
-      }
-    return __m;
-  }
-
-  // returns true if (__s intersect __t) is not empty
-  inline bool
-  __includes_some(const std::__detail::_StateSet& __s,
-                  const std::__detail::_StateSet& __t)
-  {
-    if (__s.size() > 0 && __t.size() > 0)
-      {
-       std::__detail::_StateSet::const_iterator __first = __s.begin();
-       std::__detail::_StateSet::const_iterator __second = __t.begin();
-       while (__first != __s.end() && __second != __t.end())
-         {
-           if (*__first < *__second)
-             ++__first;
-           else if (*__second < *__first)
-             ++__second;
-           else
-             return true;
-         }
-      }
-    return false;
-  }
-
-  // If an identified state __u is not already in the current state set __e,
-  // insert it and push it on the current state stack __s.
-  inline void
-  __add_visited_state(const std::__detail::_StateIdT __u,
-                      _StateStack&                  __s,
-                      std::__detail::_StateSet&      __e)
-  {
-    if (__e.count(__u) == 0)
-      {
-       __e.insert(__u);
-       __s.push(__u);
-      }
-  }
-
-} // anonymous namespace
-
 namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-  // _M_dfs() take a state, along with current string cursor(_M_pattern),
-  // trying to match current state with current character.
-  // Only _S_opcode_match will consume a character.
   // TODO: This is too slow. Try to compile the NFA to a DFA.
   template<bool __match_mode>
-    bool _Grep_matcher::
+    bool _DFSMatcher::
     _M_dfs(_StateIdT __i)
     {
       if (__i == _S_invalid_state_id)
@@ -126,116 +56,162 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
             || _M_dfs<__match_mode>(__state._M_next);
           break;
         case _S_opcode_subexpr_begin:
-          __state._M_tagger(_M_pattern, _M_results);
+          __state._M_tagger(_M_str_cur, _M_results);
           __ret = _M_dfs<__match_mode>(__state._M_next);
           break;
         case _S_opcode_subexpr_end:
-          __state._M_tagger(_M_pattern, _M_results);
+          __state._M_tagger(_M_str_cur, _M_results);
           __ret = _M_dfs<__match_mode>(__state._M_next);
           _M_results._M_set_matched(__state._M_subexpr, __ret);
           break;
         case _S_opcode_match:
-          if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern))
+          if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur))
             {
-              _M_pattern._M_next();
+              _M_str_cur._M_next();
               __ret = _M_dfs<__match_mode>(__state._M_next);
-              _M_pattern._M_prev();
+              _M_str_cur._M_prev();
             }
           break;
         case _S_opcode_accept:
           if (__match_mode)
-            __ret = _M_pattern._M_at_end();
+            __ret = _M_str_cur._M_at_end();
           else
             __ret = true;
           break;
         default:
-          _GLIBCXX_DEBUG_ASSERT( false );
+          _GLIBCXX_DEBUG_ASSERT(false);
         }
       return __ret;
     }
 
-  inline void _Grep_matcher::
-  _M_match()
-  {
-    __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
-    for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
-      __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
-
-    _M_results._M_set_matched(0,
-                              __includes_some(_M_nfa->_M_final_states(), __t));
-  }
+  template<bool __match_mode>
+    bool _BFSMatcher::
+    _M_main_loop()
+    {
+      while (!_M_str_cur._M_at_end())
+        {
+          if (!__match_mode)
+            if (_M_includes_some())
+              return true;
+          _M_move();
+          _M_str_cur._M_next();
+          _M_e_closure();
+        }
+      return _M_includes_some();
+    }
 
-  inline void _Grep_matcher::
-  _M_search_from_first()
+  // The SPFA approach.
+  void _BFSMatcher::
+  _M_e_closure()
   {
-    __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
-    for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
+    std::queue<_StateIdT> __q;
+    std::vector<bool> __in_q(_M_nfa->size(), false);
+    for (auto& __it : _M_current)
       {
-        if (__includes_some(_M_nfa->_M_final_states(), __t)) // KISS
-          {
-            _M_results._M_set_matched(0, true);
+        __in_q[__it.first] = true;
+        __q.push(__it.first);
+      }
+    while (!__q.empty())
+      {
+        auto __u = __q.front();
+        __q.pop();
+        __in_q[__u] = false;
+        const auto& __state = (*_M_nfa)[__u];
+
+        // Can be implemented using method, but there're too much arguments.
+        auto __add_visited_state = [&](_StateIdT __v)
+        {
+          if (__v == _S_invalid_state_id)
             return;
+          if (_M_match_less_than(__u, __v))
+            {
+              _M_current[__v] = _M_current[__u]->_M_clone();
+              // if a state is updated, it's outgoing neighbors should be
+              // reconsidered too. Push them to the queue.
+              if (!__in_q[__v])
+                {
+                  __in_q[__v] = true;
+                  __q.push(__v);
+                }
+            }
+        };
+
+        switch (__state._M_opcode)
+          {
+            case _S_opcode_alternative:
+              __add_visited_state(__state._M_next);
+              __add_visited_state(__state._M_alt);
+              break;
+            case _S_opcode_subexpr_begin:
+              __state._M_tagger(_M_str_cur, *_M_current[__u]);
+              __add_visited_state(__state._M_next);
+              break;
+            case _S_opcode_subexpr_end:
+              __state._M_tagger(_M_str_cur, *_M_current[__u]);
+              _M_current[__u]->_M_set_matched(__state._M_subexpr, true);
+              __add_visited_state(__state._M_next);
+              break;
+            case _S_opcode_match:
+              break;
+            case _S_opcode_accept:
+              __add_visited_state(__state._M_next);
+              break;
+            default:
+              _GLIBCXX_DEBUG_ASSERT(false);
           }
-        __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
       }
-    _M_results._M_set_matched(0, false);
   }
 
-  // Creates the e-closure set for the initial state __i.
-  inline _StateSet _Grep_matcher::
-  _M_e_closure(_StateIdT __i)
+  void _BFSMatcher::
+  _M_move()
   {
-    _StateSet __s;
-    __s.insert(__i);
-    _StateStack __stack;
-    __stack.push(__i);
-    return this->_M_e_closure(__stack, __s);
+    decltype(_M_current) __next;
+    for (auto& __it : _M_current)
+      {
+        const auto& __state = (*_M_nfa)[__it.first];
+        if (__state._M_opcode == _S_opcode_match
+            && __state._M_matches(_M_str_cur))
+          if (_M_match_less_than(__it.first, __state._M_next)
+              && __state._M_next != _S_invalid_state_id)
+            __next[__state._M_next] = __it.second->_M_clone();
+      }
+    _M_current = move(__next);
   }
 
-  // Creates the e-closure set for an arbitrary state set __s.
-  inline _StateSet _Grep_matcher::
-  _M_e_closure(const _StateSet& __s)
+  bool _BFSMatcher::
+  _M_match_less_than(_StateIdT __u, _StateIdT __v) const
   {
-    _StateStack __stack;
-    for (_StateSet::const_iterator __i = __s.begin(); __i != __s.end(); ++__i)
-      __stack.push(*__i);
-    return this->_M_e_closure(__stack, __s);
+    if (_M_current.count(__u) == 0)
+      return false;
+    if (_M_current.count(__v) > 0)
+      return true;
+    // TODO: Greedy and Non-greedy support
+    return true;
   }
 
-  inline _StateSet _Grep_matcher::
-  _M_e_closure(_StateStack& __stack, const _StateSet& __s)
+  bool _BFSMatcher::
+  _M_includes_some() const
   {
-    _StateSet __e = __s;
-    while (!__stack.empty())
+    auto& __s = _M_nfa->_M_final_states();
+    auto& __t = _M_current;
+    if (__s.size() > 0 && __t.size() > 0)
       {
-       _StateIdT __t = __stack.top(); __stack.pop();
-       if (__t == _S_invalid_state_id)
-         continue;
-       // for each __u with edge from __t to __u labeled e do ...
-       const _State& __state = _M_nfa->operator[](__t);
-       switch (__state._M_opcode)
+       auto __first = __s.begin();
+       auto __second = __t.begin();
+       while (__first != __s.end() && __second != __t.end())
          {
-         case _S_opcode_alternative:
-           __add_visited_state(__state._M_next, __stack, __e);
-           __add_visited_state(__state._M_alt, __stack, __e);
-           break;
-         case _S_opcode_subexpr_begin:
-           __add_visited_state(__state._M_next, __stack, __e);
-           __state._M_tagger(_M_pattern, _M_results);
-           break;
-         case _S_opcode_subexpr_end:
-           __add_visited_state(__state._M_next, __stack, __e);
-           __state._M_tagger(_M_pattern, _M_results);
-           _M_results._M_set_matched(__state._M_subexpr, true);
-           break;
-         case _S_opcode_accept:
-           __add_visited_state(__state._M_next, __stack, __e);
-           break;
-         default:
-           break;
+           if (*__first < __second->first)
+             ++__first;
+           else if (__second->first < *__first)
+             ++__second;
+           else
+              {
+                _M_results._M_assign(*__second->second);
+                return true;
+              }
          }
       }
-    return __e;
+    return false;
   }
 
 _GLIBCXX_END_NAMESPACE_VERSION
index fc30237436a7c271e5e262193658a2c3994850f6..1bae1b78ed4c9d1b63a9925975d6ce5c5942e72f 100644 (file)
@@ -39,6 +39,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * @{
    */
 
+  /// Provides a generic facade for a templated match_results.
+  struct _Results
+  {
+    virtual
+    ~_Results()
+    { }
+    virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
+    virtual void _M_set_matched(int __i, bool __is_matched) = 0;
+    virtual std::unique_ptr<_Results> _M_clone() const = 0;
+    virtual void _M_assign(const _Results& __rhs) = 0;
+  };
+
+  class _Grep_matcher;
+  class _Automaton;
+
+  /// Generic shared pointer to an automaton.
+  typedef std::shared_ptr<_Automaton> _AutomatonPtr;
+
   /// Base class for, um, automata.  Could be an NFA or a DFA.  Your choice.
   class _Automaton
   {
@@ -52,15 +70,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     virtual _SizeT
     _M_sub_count() const = 0;
 
+    virtual std::unique_ptr<_Grep_matcher>
+    _M_get_matcher(_PatternCursor&                   __p,
+                   _Results&                         __r,
+                   const _AutomatonPtr&              __automaton,
+                   regex_constants::match_flag_type  __flags) = 0;
+
 #ifdef _GLIBCXX_DEBUG
     virtual std::ostream&
     _M_dot(std::ostream& __ostr) const = 0;
 #endif
   };
 
-  /// Generic shared pointer to an automaton.  
-  typedef std::shared_ptr<_Automaton> _AutomatonPtr;
-
   /// Operation codes that define the type of transitions within the base NFA
   /// that represents the regular expression.
   enum _Opcode
@@ -73,13 +94,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _S_opcode_accept        = 255
   };
 
-  /// Provides a generic facade for a templated match_results.
-  struct _Results
-  {
-    virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
-    virtual void _M_set_matched(int __i, bool __is_matched) = 0;
-  };
-
   /// Tags current state (for subexpr begin/end).
   typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
 
@@ -113,7 +127,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { __r._M_set_pos(_M_index, 1, __pc); }
 
       int       _M_index;
-      _FwdIterT _M_pos;
     };
 
   /// Indicates if current state matches cursor current.
@@ -275,7 +288,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     typedef regex_constants::syntax_option_type _FlagT;
 
     _Nfa(_FlagT __f)
-    : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
+    : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
+    // TODO: BFS by default. Your choice. Need to be set by the compiler.
+    _M_has_back_ref(false)
     { }
 
     ~_Nfa()
@@ -334,6 +349,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return this->size()-1;
     }
 
+    void
+    _M_set_back_ref(bool __b)
+    { _M_has_back_ref = __b; }
+
+    std::unique_ptr<_Grep_matcher>
+    _M_get_matcher(_PatternCursor&                   __p,
+                   _Results&                         __r,
+                   const _AutomatonPtr&              __automaton,
+                   regex_constants::match_flag_type  __flags);
+
 #ifdef _GLIBCXX_DEBUG
     std::ostream&
     _M_dot(std::ostream& __ostr) const;
@@ -344,6 +369,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _StateIdT  _M_start_state;
     _StateSet  _M_accepting_states;
     _SizeT     _M_subexpr_count;
+    bool       _M_has_back_ref;
   };
 
   /// Describes a sequence of one or more %_State, its current start
index 907f5bb65d89d92713b0ccb58c45769803ba2c4f..b0918ed4afe8e50414cf2aafc269cbd6b0d8319b 100644 (file)
@@ -44,6 +44,8 @@
 #include <iterator>
 #include <locale>
 #include <memory>
+#include <map>
+#include <queue>
 #include <set>
 #include <sstream>
 #include <stack>
index 383ed054a9073095fab3c9a0c710e877d3f1f3e8..10e2ff43765b31847e38d1ffdfdda190c2818a75 100644 (file)
@@ -32,16 +32,31 @@ test01()
 {
   bool test __attribute__((unused)) = true;
 
-  std::regex  re("zxcv/(one.*)abc", std::regex::extended);
-  std::string target("zxcv/onetwoabc");
-  std::smatch m;
-
-  VERIFY( std::regex_search(target, m, re) );
-  VERIFY( m.size() == 2 );
-  VERIFY( m[0].matched == true );
-  VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
-  VERIFY( m[1].matched == true );
-  VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
+  {
+    std::regex  re("zxcv/(one.*)abc", std::regex::extended);
+    std::string target("zxcv/onetwoabc");
+    std::smatch m;
+
+    VERIFY( std::regex_search(target, m, re) );
+    VERIFY( m.size() == 2 );
+    VERIFY( m[0].matched == true );
+    VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
+    VERIFY( m[1].matched == true );
+    VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
+  }
+
+  {
+    std::regex  re("zxcv/(one.*)abc()\\2", std::regex::extended);
+    std::string target("zxcv/onetwoabc");
+    std::smatch m;
+
+    VERIFY( std::regex_search(target, m, re) );
+    VERIFY( m.size() == 3 );
+    VERIFY( m[0].matched == true );
+    VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
+    VERIFY( m[1].matched == true );
+    VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
+  }
 }
 
 int
index 3031c43d188bb392da6de4d2399b9f45ad78ceb5..cb3a54f4d88203186445cacac3ea64ec8c8ccd16 100644 (file)
@@ -33,13 +33,24 @@ test01()
 {
   bool test __attribute__((unused)) = true;
 
-  std::regex  re("/asdf(/.*)", std::regex::extended);
-  std::string target("/asdf/qwerty");
-  std::smatch m;
+  {
+    std::regex  re("/asdf(/.*)", std::regex::extended);
+    std::string target("/asdf/qwerty");
+    std::smatch m;
 
-  VERIFY( std::regex_match(target, m, re) );
-  VERIFY( m.size() == 2 );
-  VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
+    VERIFY( std::regex_match(target, m, re) );
+    VERIFY( m.size() == 2 );
+    VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
+  }
+  {
+    std::regex  re("/asdf(/.*)()\\2", std::regex::extended);
+    std::string target("/asdf/qwerty");
+    std::smatch m;
+
+    VERIFY( std::regex_match(target, m, re) );
+    VERIFY( m.size() == 3 );
+    VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
+  }
 }
 
 int
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc
new file mode 100644 (file)
index 0000000..86fab85
--- /dev/null
@@ -0,0 +1,71 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-07-29  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended automatic matcher dispatching against a std::string target.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+template<typename _Bi_iter, typename _Alloc,
+         typename _Ch_type, typename _Rx_traits>
+  void
+  fake_match(_Bi_iter                                 __s,
+             _Bi_iter                                 __e,
+             match_results<_Bi_iter, _Alloc>&         __m,
+             const basic_regex<_Ch_type, _Rx_traits>& __re,
+             regex_constants::match_flag_type         __flags
+                            = regex_constants::match_default)
+  {
+    __detail::_AutomatonPtr __a = __re._M_get_automaton();
+    __detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
+    __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
+    __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
+    VERIFY( dynamic_cast<__detail::_DFSMatcher *>(
+              &*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr );
+  }
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  regex  re("()(one(.*))abc\\1"); // backref cause DFS
+  const string target("onetwoabc");
+  smatch m;
+  fake_match(target.begin(), target.end(), m, re);
+
+  regex_match(target, m, re);
+  VERIFY( m[2].matched );
+  VERIFY( m[3].matched );
+  VERIFY( std::string(m[2].first, m[2].second) == "onetwo" );
+  VERIFY( std::string(m[3].first, m[3].second) == "two" );
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}