]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
regex_executor.h: Add _TodoList class.
authorTim Shen <timshen91@gmail.com>
Tue, 8 Oct 2013 03:41:14 +0000 (03:41 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Tue, 8 Oct 2013 03:41:14 +0000 (03:41 +0000)
2013-10-08  Tim Shen  <timshen91@gmail.com>

* include/bits/regex_executor.h: Add _TodoList class.
* include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add
_M_match_stack and _M_stack to make everything faster. Break if
_M_stack is empty, to reduce unnecessary idling.
* testsuite/performance/28_regex/split.cc: New.

From-SVN: r203261

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex_executor.h
libstdc++-v3/include/bits/regex_executor.tcc
libstdc++-v3/testsuite/performance/28_regex/split.cc [new file with mode: 0644]

index 377ac0e7e9fd634df9d32993fa4f817d1d799fe4..4d6906d5ef4f6b2fcd61194ebbd31e87f21b7dfc 100644 (file)
@@ -1,3 +1,11 @@
+2013-10-08  Tim Shen  <timshen91@gmail.com>
+
+       * include/bits/regex_executor.h: Add _TodoList class.
+       * include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add
+       _M_match_stack and _M_stack to make everything faster. Break if
+       _M_stack is empty, to reduce unnecessary idling.
+       * testsuite/performance/28_regex/split.cc: New.
+
 2013-10-06  Tim Shen  <timshen91@gmail.com>
 
        * include/bits/regex.h: (regex_token_iterator<>::regex_token_iterator):
index 2770098152b8dc64e971d37bede0a184a6c0a382..462643779f2e650eee039b23d442a03dbd6cfbe7 100644 (file)
@@ -102,22 +102,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 
       bool
-      _M_search()
-      {
-       if (_M_flags & regex_constants::match_continuous)
-         return _M_search_from_first();
-       auto __cur = _M_begin;
-       do
-         {
-           _M_match_mode = false;
-           _M_init(__cur);
-           if (_M_main())
-             return true;
-         }
-       // Continue when __cur == _M_end
-       while (__cur++ != _M_end);
-       return false;
-      }
+      _M_search();
 
       bool
       _M_is_word(_CharT __ch) const
@@ -346,6 +331,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       };
       typedef std::unique_ptr<_ResultsEntry>               _ResultsPtr;
 
+      class _TodoList
+      {
+      public:
+       explicit
+       _TodoList(size_t __sz)
+       : _M_states(), _M_exists(__sz, false)
+       { }
+
+       void _M_push(_StateIdT __u)
+       {
+         _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
+         if (!_M_exists[__u])
+           {
+             _M_exists[__u] = true;
+             _M_states.push_back(__u);
+           }
+       }
+
+       _StateIdT _M_pop()
+       {
+         auto __ret = _M_states.back();
+         _M_states.pop_back();
+         _M_exists[__ret] = false;
+         return __ret;
+       }
+
+       bool _M_empty() const
+       { return _M_states.empty(); }
+
+       void _M_clear()
+       {
+         _M_states.clear();
+         _M_exists.assign(_M_exists.size(), false);
+       }
+
+      private:
+       std::vector<_StateIdT>         _M_states;
+       std::vector<bool>              _M_exists;
+      };
+
     public:
       _BFSExecutor(_BiIter         __begin,
                   _BiIter         __end,
@@ -355,6 +380,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       : _BaseT(__begin, __end, __results, __re, __flags),
       _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
             (__re._M_automaton)),
+      _M_match_stack(_M_nfa.size()),
+      _M_stack(_M_nfa.size()),
       _M_start_state(_M_nfa._M_start())
       { }
 
@@ -362,14 +389,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_init(_BiIter __cur)
       {
-       _GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
        this->_M_current = __cur;
        _M_covered.clear();
        _ResultsVec& __res(this->_M_results);
        _M_covered[this->_M_start_state] =
          _ResultsPtr(new _ResultsEntry(__res.size(),
                                        _M_nfa._M_quant_count));
-       _M_e_closure();
+       _M_stack._M_push(this->_M_start_state);
       }
 
       void
@@ -398,11 +424,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                                        this->_M_flags));
       }
 
+      const _NFAT&                     _M_nfa;
       std::map<_StateIdT, _ResultsPtr> _M_covered;
+      _TodoList                        _M_match_stack;
+      _TodoList                        _M_stack;
+      _StateIdT                        _M_start_state;
       // To record global optimal solution.
       _ResultsPtr                      _M_cur_results;
-      const _NFAT&                     _M_nfa;
-      _StateIdT                        _M_start_state;
     };
 
  //@} regex-detail
index 3b1fcbc9e38f489ac85f27b5e11f88e9a15fa7d5..59b082e2ac3be1f89c5dde02a558a149ed55bf53 100644 (file)
@@ -34,14 +34,31 @@ namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+  template<typename _BiIter, typename _Alloc,
+    typename _CharT, typename _TraitsT>
+    bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
+    _M_search()
+    {
+      if (_M_flags & regex_constants::match_continuous)
+       return _M_search_from_first();
+      auto __cur = _M_begin;
+      do
+       {
+         _M_match_mode = false;
+         _M_init(__cur);
+         if (_M_main())
+           return true;
+       }
+      // Continue when __cur == _M_end
+      while (__cur++ != _M_end);
+      return false;
+    }
+
   template<typename _BiIter, typename _Alloc,
     typename _CharT, typename _TraitsT>
     bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
     _M_dfs(_StateIdT __i)
     {
-      if (__i == _S_invalid_state_id)
-       // This is not that certain. Need deeper investigate.
-       return false;
       auto& __current = this->_M_current;
       const auto& __state = _M_nfa[__i];
       bool __ret = false;
@@ -161,6 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
     _M_main()
     {
+      _M_e_closure();
       bool __ret = false;
       if (!this->_M_match_mode
          && !(this->_M_flags & regex_constants::match_not_null))
@@ -169,6 +187,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        {
          _M_move();
          ++this->_M_current;
+         if (_M_stack._M_empty())
+           break;
          _M_e_closure();
          if (!this->_M_match_mode)
            // To keep regex_search greedy, no "return true" here.
@@ -178,6 +198,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        __ret = _M_includes_some();
       if (__ret)
        this->_M_set_results(_M_cur_results->_M_get());
+      _M_match_stack._M_clear();
+      _GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty());
       return __ret;
     }
 
@@ -186,42 +208,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
     _M_e_closure()
     {
-      std::queue<_StateIdT> __q;
-      std::vector<bool> __in_q(_M_nfa.size(), false);
       auto& __current = this->_M_current;
 
-      for (auto& __it : _M_covered)
-       {
-         __in_q[__it.first] = true;
-         __q.push(__it.first);
-       }
-      while (!__q.empty())
+      while (!_M_stack._M_empty())
        {
-         auto __u = __q.front();
-         __q.pop();
-         __in_q[__u] = false;
+         auto __u = _M_stack._M_pop();
+         _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
          const auto& __state = _M_nfa[__u];
 
          // Can be implemented using method, but there will be too many
          // arguments. I would use macro function before C++11, but lambda is
          // a better choice, since hopefully compiler can inline it.
-         auto __add_visited_state = [&](_StateIdT __v)
+         auto __add_visited_state = [=](_StateIdT __v)
          {
-           if (__v == _S_invalid_state_id)
-             return;
-           if (_M_covered.count(__u) != 0
-               && (_M_covered.count(__v) == 0
-                   || *_M_covered[__u] < *_M_covered[__v]))
+           if (_M_covered.count(__v) == 0)
              {
                _M_covered[__v] =
                  _ResultsPtr(new _ResultsEntry(*_M_covered[__u]));
+               _M_stack._M_push(__v);
+               return;
+             }
+           auto& __cu = _M_covered[__u];
+           auto& __cv = _M_covered[__v];
+           if (*__cu < *__cv)
+             {
+               __cv = _ResultsPtr(new _ResultsEntry(*__cu));
                // if a state is updated, it's outgoing neighbors should be
                // reconsidered too. Push them to the queue.
-               if (!__in_q[__v])
-                 {
-                   __in_q[__v] = true;
-                   __q.push(__v);
-                 }
+               _M_stack._M_push(__v);
              }
          };
 
@@ -233,13 +247,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
              case _S_opcode_alternative:
                {
                  __add_visited_state(__state._M_next);
-                 auto __back =
-                   _M_covered[__u]->_M_quant_keys[__state._M_quant_index];
-                 _M_covered[__u]->_M_inc(__state._M_quant_index,
-                                         __state._M_neg);
+                 auto& __cu = *_M_covered[__u];
+                 auto __back = __cu._M_quant_keys[__state._M_quant_index];
+                 __cu._M_inc(__state._M_quant_index, __state._M_neg);
                  __add_visited_state(__state._M_alt);
-                 _M_covered[__u]->_M_quant_keys[__state._M_quant_index]
-                   = __back;
+                 __cu._M_quant_keys[__state._M_quant_index] = __back;
                }
                break;
              case _S_opcode_subexpr_begin:
@@ -281,6 +293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                  __add_visited_state(__state._M_next);
                break;
              case _S_opcode_match:
+               _M_match_stack._M_push(__u);
                break;
              case _S_opcode_accept:
                break;
@@ -296,15 +309,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_move()
     {
       decltype(_M_covered) __next;
-      for (auto& __it : _M_covered)
+      while (!_M_match_stack._M_empty())
        {
-         const auto& __state = _M_nfa[__it.first];
-         if (__state._M_opcode == _S_opcode_match
-             && __state._M_matches(*this->_M_current))
-           if (__state._M_next != _S_invalid_state_id)
-             if (__next.count(__state._M_next) == 0
-                 || *__it.second < *__next[__state._M_next])
-               __next[__state._M_next] = move(__it.second);
+         auto __u = _M_match_stack._M_pop();
+         const auto& __state = _M_nfa[__u];
+         auto& __cu = _M_covered[__u];
+         if (__state._M_matches(*this->_M_current)
+             && (__next.count(__state._M_next) == 0
+                 || *__cu < *__next[__state._M_next]))
+           {
+             __next[__state._M_next] = std::move(__cu);
+             _M_stack._M_push(__state._M_next);
+           }
        }
       _M_covered = move(__next);
     }
@@ -314,31 +330,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
     _M_includes_some()
     {
-      auto& __s = _M_nfa._M_final_states();
-      auto& __t = _M_covered;
       bool __succ = false;
-      if (__s.size() > 0 && __t.size() > 0)
-       {
-         auto __first = __s.begin();
-         auto __second = __t.begin();
-         while (__first != __s.end() && __second != __t.end())
-           {
-             if (*__first < __second->first)
-               ++__first;
-             else if (*__first > __second->first)
-               ++__second;
-             else
-               {
-                 if (_M_cur_results == nullptr
-                     || *__second->second < *_M_cur_results)
-                   _M_cur_results =
-                     _ResultsPtr(new _ResultsEntry(*__second->second));
-                 __succ = true;
-                 ++__first;
-                 ++__second;
-               }
-           }
-       }
+      for (auto __u : _M_nfa._M_final_states())
+       if (_M_covered.count(__u))
+         {
+           __succ = true;
+           auto& __cu = _M_covered[__u];
+           if (_M_cur_results == nullptr || *__cu < *_M_cur_results)
+             _M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu));
+         }
       return __succ;
     }
 
diff --git a/libstdc++-v3/testsuite/performance/28_regex/split.cc b/libstdc++-v3/testsuite/performance/28_regex/split.cc
new file mode 100644 (file)
index 0000000..5b972e4
--- /dev/null
@@ -0,0 +1,115 @@
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 2013-10-08  Tim Shen  <timshen91@gmail.com>
+
+#include <testsuite_performance.h>
+#include <regex>
+
+using namespace __gnu_test;
+using namespace std;
+
+void split(string s)
+{
+    regex re("\\s+");
+    for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);
+        it != sregex_token_iterator();
+        ++it)
+      {
+      }
+}
+
+int main()
+{
+  string source = "\
+// Copyright (C) 2013 Free Software Foundation, Inc.\n\
+//\n\
+// This file is part of the GNU ISO C++ Library.  This library is free\n\
+// software; you can redistribute it and/or modify it under the\n\
+// terms of the GNU General Public License as published by the\n\
+// Free Software Foundation; either version 3, or (at your option)\n\
+// any later version.\n\
+\n\
+// This library is distributed in the hope that it will be useful,\n\
+// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
+// GNU General Public License for more details.\n\
+\n\
+// You should have received a copy of the GNU General Public License along\n\
+// with this library; see the file COPYING3.  If not see\n\
+// <http://www.gnu.org/licenses/>.\n\
+\n\
+// 2013-10-08  Tim Shen  <timshen91@gmail.com>\n\
+\n\
+#include <testsuite_performance.h>\n\
+#include <regex>\n\
+\n\
+using namespace __gnu_test;\n\
+using namespace std;\n\
+\n\
+void split(string s)\n\
+{\n\
+    regex re(\"\\s+\");\n\
+    for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\
+        it != sregex_token_iterator();\n\
+        ++it)\n\
+      {\n\
+      }\n\
+}\n\
+\n\
+int main()\n\
+{\n\
+  string source = \"\";\n\
+  time_counter time;\n\
+  resource_counter resource;\n\
+\n\
+  source = source + source;\n\
+  source = source + source;\n\
+  source = source + source;\n\
+  source = source + source;\n\
+  source = source + source;\n\
+  source = source + source;\n\
+  source = source + source;\n\
+  source = source + source;\n\
+\n\
+  start_counters(time, resource);\n\
+  split(source);\n\
+  stop_counters(time, resource);\n\
+  report_performance(__FILE__, \"\", time, resource);\n\
+\n\
+  return 0;\n\
+}\n";
+
+  time_counter time;
+  resource_counter resource;
+
+  source = source + source;
+  source = source + source;
+  source = source + source;
+  source = source + source;
+  source = source + source;
+  source = source + source;
+  source = source + source;
+  source = source + source;
+
+  start_counters(time, resource);
+  split(source);
+  stop_counters(time, resource);
+  report_performance(__FILE__, "", time, resource);
+
+  return 0;
+}