]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - libstdc++-v3/include/bits/regex_executor.h
Update copyright years.
[thirdparty/gcc.git] / libstdc++-v3 / include / bits / regex_executor.h
index 462643779f2e650eee039b23d442a03dbd6cfbe7..610547428704cc2f4963553767f2a3396f6770ab 100644 (file)
@@ -1,6 +1,6 @@
 // class template regex -*- C++ -*-
 
-// Copyright (C) 2013 Free Software Foundation, Inc.
+// Copyright (C) 2013-2017 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
 
 // FIXME convert comments to doxygen format.
 
-// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
-// much more similar. Also, make grouping seperated. The
-// regex_constants::nosubs enables much more simpler execution.
-
 namespace std _GLIBCXX_VISIBILITY(default)
 {
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
-  template<typename, typename>
-    class basic_regex;
-
-  template<typename>
-    class sub_match;
-
-  template<typename, typename>
-    class match_results;
-_GLIBCXX_END_NAMESPACE_VERSION
-
 namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -56,15 +41,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * @{
    */
 
-  template<typename _BiIter, typename _Alloc,
-    typename _CharT, typename _TraitsT>
+  /**
+   * @brief Takes a regex and an input string and does the matching.
+   *
+   * The %_Executor class has two modes: DFS mode and BFS mode, controlled
+   * by the template parameter %__dfs_mode.
+   */
+  template<typename _BiIter, typename _Alloc, typename _TraitsT,
+          bool __dfs_mode>
     class _Executor
     {
+      using __search_mode = integral_constant<bool, __dfs_mode>;
+      using __dfs = true_type;
+      using __bfs = false_type;
+
+      enum class _Match_mode : unsigned char { _Exact, _Prefix };
+
     public:
-      typedef basic_regex<_CharT, _TraitsT>           _RegexT;
-      typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
-      typedef regex_constants::match_flag_type        _FlagT;
-      typedef typename _TraitsT::char_class_type      _ClassT;
+      typedef typename iterator_traits<_BiIter>::value_type _CharT;
+      typedef basic_regex<_CharT, _TraitsT>                 _RegexT;
+      typedef std::vector<sub_match<_BiIter>, _Alloc>       _ResultsVec;
+      typedef regex_constants::match_flag_type              _FlagT;
+      typedef typename _TraitsT::char_class_type            _ClassT;
+      typedef _NFA<_TraitsT>                                _NFAT;
 
     public:
       _Executor(_BiIter         __begin,
@@ -74,8 +73,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                _FlagT          __flags)
       : _M_begin(__begin),
       _M_end(__end),
-      _M_results(__results),
       _M_re(__re),
+      _M_nfa(*__re._M_automaton),
+      _M_results(__results),
+      _M_rep_count(_M_nfa.size()),
+      _M_states(_M_nfa._M_start(), _M_nfa.size()),
       _M_flags((__flags & regex_constants::match_prev_avail)
               ? (__flags
                  & ~regex_constants::match_not_bol
@@ -83,354 +85,169 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
               : __flags)
       { }
 
-      // Set matched when string exactly match the pattern.
+      // Set matched when string exactly matches the pattern.
       bool
       _M_match()
       {
-       _M_match_mode = true;
-       _M_init(_M_begin);
-       return _M_main();
+       _M_current = _M_begin;
+       return _M_main(_Match_mode::_Exact);
       }
 
       // Set matched when some prefix of the string matches the pattern.
       bool
       _M_search_from_first()
       {
-       _M_match_mode = false;
-       _M_init(_M_begin);
-       return _M_main();
+       _M_current = _M_begin;
+       return _M_main(_Match_mode::_Prefix);
       }
 
       bool
       _M_search();
 
-      bool
-      _M_is_word(_CharT __ch) const
-      {
-       static const _CharT __s = 'w';
-       return _M_re._M_traits.isctype
-         (__ch, _M_re._M_traits.lookup_classname(&__s, &__s+1));
-      }
+    private:
+      void
+      _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
 
-      bool
-      _M_at_begin() const
-      {
-       return _M_current == _M_begin
-         && !(_M_flags & (regex_constants::match_not_bol
-                          | regex_constants::match_prev_avail));
-      }
+      void
+      _M_handle_repeat(_Match_mode, _StateIdT);
 
-      bool
-      _M_at_end() const
-      {
-       return _M_current == _M_end
-         && !(_M_flags & regex_constants::match_not_eol);
-      }
+      void
+      _M_handle_subexpr_begin(_Match_mode, _StateIdT);
 
-      bool
-      _M_word_boundry(_State<_CharT, _TraitsT> __state) const;
+      void
+      _M_handle_subexpr_end(_Match_mode, _StateIdT);
 
-      virtual std::unique_ptr<_Executor>
-      _M_clone() const = 0;
+      void
+      _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
 
-      // Return whether now match the given sub-NFA.
-      bool
-      _M_lookahead(_State<_CharT, _TraitsT> __state) const
-      {
-       auto __sub = this->_M_clone();
-       __sub->_M_set_start(__state._M_alt);
-       return __sub->_M_search_from_first();
-      }
+      void
+      _M_handle_line_end_assertion(_Match_mode, _StateIdT);
 
       void
-      _M_set_results(_ResultsVec& __cur_results);
+      _M_handle_word_boundary(_Match_mode, _StateIdT);
 
-    public:
-      virtual void
-      _M_init(_BiIter __cur) = 0;
-
-      virtual void
-      _M_set_start(_StateIdT __start) = 0;
-
-      virtual bool
-      _M_main() = 0;
-
-      _BiIter         _M_current;
-      const _BiIter   _M_begin;
-      const _BiIter   _M_end;
-      const _RegexT&  _M_re;
-      _ResultsVec&    _M_results;
-      _FlagT          _M_flags;
-      bool            _M_match_mode;
-    };
+      void
+      _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
 
-  // A _DFSExecutor perform a DFS on given NFA and input string. At the very
-  // beginning the executor stands in the start state, then it try every
-  // possible state transition in current state recursively. Some state
-  // transitions consume input string, say, a single-char-matcher or a
-  // back-reference matcher; some not, like assertion or other anchor nodes.
-  // When the input is exhausted and the current state is an accepting state,
-  // the whole executor return true.
-  //
-  // TODO: This approach is exponentially slow for certain input.
-  //       Try to compile the NFA to a DFA.
-  //
-  // Time complexity: exponential
-  // Space complexity: O(__end - __begin)
-  template<typename _BiIter, typename _Alloc,
-    typename _CharT, typename _TraitsT>
-    class _DFSExecutor
-    : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
-    {
-    public:
-      typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
-      typedef _NFA<_CharT, _TraitsT>                       _NFAT;
-      typedef typename _BaseT::_RegexT                     _RegexT;
-      typedef typename _BaseT::_ResultsVec                 _ResultsVec;
-      typedef typename _BaseT::_FlagT                      _FlagT;
+      void
+      _M_handle_match(_Match_mode, _StateIdT);
 
-    public:
-      _DFSExecutor(_BiIter         __begin,
-                  _BiIter         __end,
-                  _ResultsVec&    __results,
-                  const _RegexT&  __re,
-                  _FlagT          __flags)
-      : _BaseT(__begin, __end, __results, __re, __flags),
-      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
-            (__re._M_automaton)),
-      _M_start_state(_M_nfa._M_start())
-      { }
+      void
+      _M_handle_backref(_Match_mode, _StateIdT);
 
-    private:
       void
-      _M_init(_BiIter __cur)
-      {
-       _M_cur_results.resize(_M_nfa._M_sub_count() + 2);
-       this->_M_current = __cur;
-      }
+      _M_handle_accept(_Match_mode, _StateIdT);
+
+      void
+      _M_handle_alternative(_Match_mode, _StateIdT);
 
       void
-      _M_set_start(_StateIdT __start)
-      { _M_start_state = __start; }
+      _M_dfs(_Match_mode __match_mode, _StateIdT __start);
 
       bool
-      _M_main()
-      { return _M_dfs(this->_M_start_state); }
+      _M_main(_Match_mode __match_mode)
+      { return _M_main_dispatch(__match_mode, __search_mode{}); }
 
       bool
-      _M_dfs(_StateIdT __start);
+      _M_main_dispatch(_Match_mode __match_mode, __dfs);
 
-      std::unique_ptr<_BaseT>
-      _M_clone() const
+      bool
+      _M_main_dispatch(_Match_mode __match_mode, __bfs);
+
+      bool
+      _M_is_word(_CharT __ch) const
       {
-       return std::unique_ptr<_BaseT>(new _DFSExecutor(this->_M_current,
-                                                       this->_M_end,
-                                                       this->_M_results,
-                                                       this->_M_re,
-                                                       this->_M_flags));
+       static const _CharT __s[2] = { 'w' };
+       return _M_re._M_automaton->_M_traits.isctype
+         (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
       }
 
-      // To record current solution.
-      _ResultsVec     _M_cur_results;
-      const _NFAT&    _M_nfa;
-      _StateIdT       _M_start_state;
-    };
-
-  // Like the DFS approach, it try every possible state transition; Unlike DFS,
-  // it uses a queue instead of a stack to store matching states. It's a BFS
-  // approach.
-  //
-  // Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) explained
-  // this algorithm clearly.
-  //
-  // Every entry of _M_covered saves the solution(grouping status) for every
-  // matching head. When states transit, solutions will be compared and
-  // deduplicated(based on which greedy mode we have).
-  //
-  // Time complexity: O((__end - __begin) * _M_nfa.size())
-  // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
-  template<typename _BiIter, typename _Alloc,
-    typename _CharT, typename _TraitsT>
-    class _BFSExecutor
-    : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
-    {
-    public:
-      typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
-      typedef _NFA<_CharT, _TraitsT>                       _NFAT;
-      typedef typename _BaseT::_RegexT                     _RegexT;
-      typedef typename _BaseT::_ResultsVec                 _ResultsVec;
-      typedef typename _BaseT::_FlagT                      _FlagT;
-      // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
-      // carefully work out how to compare to conflict matching states.
-      //
-      // A matching state is a pair(where, when); `where` is a NFA node; `when`
-      // is a _BiIter, indicating which char is the next to be matched. Two
-      // matching states conflict if they have equivalent `where` and `when`.
-      //
-      // Now we need to drop one and keep another, because at most one of them
-      // could be the final optimal solution. This behavior is affected by
-      // greedy policy.
-      //
-      // The definition of `greedy`:
-      // For the sequence of quantifiers in NFA sorted by their start positions,
-      // now maintain a vector in every matching state, with length equal to
-      // quantifier seq, recording repeating times of every quantifier. Now to
-      // compare two matching states, we just lexically compare these two
-      // vectors. To win the compare(to survive), one matching state needs to
-      // make its greedy quantifier count larger, and ungreedy quantifiers
-      // count smaller.
-      //
-      // In the implementation, we recorded negtive counts for greedy
-      // quantifiers and positive counts of ungreedy ones. Now the implicit
-      // operator<() for lexicographical_compare will emit the answer.
-      //
-      // When two vectors equal, it means the `where`, `when` and quantifier
-      // counts are identical, and indicates the same solution; so
-      // _ResultsEntry::operator<() just return false.
-      struct _ResultsEntry
-      : private _ResultsVec
+      bool
+      _M_at_begin() const
       {
-      public:
-       _ResultsEntry(size_t __res_sz, size_t __sz)
-       : _ResultsVec(__res_sz), _M_quant_keys(__sz)
-       { }
+       return _M_current == _M_begin
+         && !(_M_flags & (regex_constants::match_not_bol
+                          | regex_constants::match_prev_avail));
+      }
 
-       void
-       resize(size_t __n)
-       { _ResultsVec::resize(__n); }
+      bool
+      _M_at_end() const
+      {
+       return _M_current == _M_end
+         && !(_M_flags & regex_constants::match_not_eol);
+      }
 
-       size_t
-       size()
-       { return _ResultsVec::size(); }
+      bool
+      _M_word_boundary() const;
 
-       sub_match<_BiIter>&
-       operator[](size_t __idx)
-       { return _ResultsVec::operator[](__idx); }
+      bool
+      _M_lookahead(_StateIdT __next);
 
-       bool
-       operator<(const _ResultsEntry& __rhs) const
-       {
-         _GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size()
-                               == __rhs._M_quant_keys.size());
-         return lexicographical_compare(_M_quant_keys.begin(),
-                                        _M_quant_keys.end(),
-                                        __rhs._M_quant_keys.begin(),
-                                        __rhs._M_quant_keys.end());
-       }
-
-       void
-       _M_inc(size_t __idx, bool __neg)
-       { _M_quant_keys[__idx] += __neg ? 1 : -1; }
-
-       _ResultsVec&
-       _M_get()
-       { return *this; }
-
-      public:
-       std::vector<int> _M_quant_keys;
-      };
-      typedef std::unique_ptr<_ResultsEntry>               _ResultsPtr;
-
-      class _TodoList
-      {
-      public:
-       explicit
-       _TodoList(size_t __sz)
-       : _M_states(), _M_exists(__sz, false)
-       { }
+       // Holds additional information used in BFS-mode.
+      template<typename _SearchMode, typename _ResultsVec>
+       struct _State_info;
 
-       void _M_push(_StateIdT __u)
+      template<typename _ResultsVec>
+       struct _State_info<__bfs, _ResultsVec>
        {
-         _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
-         if (!_M_exists[__u])
-           {
-             _M_exists[__u] = true;
-             _M_states.push_back(__u);
-           }
-       }
-
-       _StateIdT _M_pop()
+         explicit
+         _State_info(_StateIdT __start, size_t __n)
+         : _M_visited_states(new bool[__n]()), _M_start(__start)
+         { }
+
+         bool _M_visited(_StateIdT __i)
+         {
+           if (_M_visited_states[__i])
+             return true;
+           _M_visited_states[__i] = true;
+           return false;
+         }
+
+         void _M_queue(_StateIdT __i, const _ResultsVec& __res)
+         { _M_match_queue.emplace_back(__i, __res); }
+
+         // Dummy implementations for BFS mode.
+         _BiIter* _M_get_sol_pos() { return nullptr; }
+
+         // Saves states that need to be considered for the next character.
+         vector<pair<_StateIdT, _ResultsVec>>  _M_match_queue;
+         // Indicates which states are already visited.
+         unique_ptr<bool[]>                    _M_visited_states;
+         // To record current solution.
+         _StateIdT _M_start;
+       };
+
+      template<typename _ResultsVec>
+       struct _State_info<__dfs, _ResultsVec>
        {
-         auto __ret = _M_states.back();
-         _M_states.pop_back();
-         _M_exists[__ret] = false;
-         return __ret;
-       }
+         explicit
+         _State_info(_StateIdT __start, size_t) : _M_start(__start)
+         { }
 
-       bool _M_empty() const
-       { return _M_states.empty(); }
+         // Dummy implementations for DFS mode.
+         bool _M_visited(_StateIdT) const { return false; }
+         void _M_queue(_StateIdT, const _ResultsVec&) { }
 
-       void _M_clear()
-       {
-         _M_states.clear();
-         _M_exists.assign(_M_exists.size(), false);
-       }
+         _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
 
-      private:
-       std::vector<_StateIdT>         _M_states;
-       std::vector<bool>              _M_exists;
-      };
+         // To record current solution.
+         _StateIdT _M_start;
+         _BiIter   _M_sol_pos;
+       };
 
     public:
-      _BFSExecutor(_BiIter         __begin,
-                  _BiIter         __end,
-                  _ResultsVec&    __results,
-                  const _RegexT&  __re,
-                  _FlagT          __flags)
-      : _BaseT(__begin, __end, __results, __re, __flags),
-      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
-            (__re._M_automaton)),
-      _M_match_stack(_M_nfa.size()),
-      _M_stack(_M_nfa.size()),
-      _M_start_state(_M_nfa._M_start())
-      { }
-
-    private:
-      void
-      _M_init(_BiIter __cur)
-      {
-       this->_M_current = __cur;
-       _M_covered.clear();
-       _ResultsVec& __res(this->_M_results);
-       _M_covered[this->_M_start_state] =
-         _ResultsPtr(new _ResultsEntry(__res.size(),
-                                       _M_nfa._M_quant_count));
-       _M_stack._M_push(this->_M_start_state);
-      }
-
-      void
-      _M_set_start(_StateIdT __start)
-      { _M_start_state = __start; }
-
-      bool
-      _M_main();
-
-      void
-      _M_e_closure();
-
-      void
-      _M_move();
-
-      bool
-      _M_includes_some();
-
-      std::unique_ptr<_BaseT>
-      _M_clone() const
-      {
-       return std::unique_ptr<_BaseT>(new _BFSExecutor(this->_M_current,
-                                                       this->_M_end,
-                                                       this->_M_results,
-                                                       this->_M_re,
-                                                       this->_M_flags));
-      }
-
-      const _NFAT&                     _M_nfa;
-      std::map<_StateIdT, _ResultsPtr> _M_covered;
-      _TodoList                        _M_match_stack;
-      _TodoList                        _M_stack;
-      _StateIdT                        _M_start_state;
-      // To record global optimal solution.
-      _ResultsPtr                      _M_cur_results;
+      _ResultsVec                                           _M_cur_results;
+      _BiIter                                               _M_current;
+      _BiIter                                               _M_begin;
+      const _BiIter                                         _M_end;
+      const _RegexT&                                        _M_re;
+      const _NFAT&                                          _M_nfa;
+      _ResultsVec&                                          _M_results;
+      vector<pair<_BiIter, int>>                            _M_rep_count;
+      _State_info<__search_mode, _ResultsVec>              _M_states;
+      _FlagT                                                _M_flags;
+      // Do we have a solution so far?
+      bool                                                  _M_has_sol;
     };
 
  //@} regex-detail