]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - libstdc++-v3/include/bits/regex_automaton.h
Update copyright years.
[thirdparty/gcc.git] / libstdc++-v3 / include / bits / regex_automaton.h
index 35cfc1be92f94149ca7749ec304ddbe804b9de5a..a5fab6356cc3404ca8cc7ab353eca8ec117a833f 100644 (file)
@@ -1,6 +1,6 @@
 // class template regex -*- C++ -*-
 
-// Copyright (C) 2013 Free Software Foundation, Inc.
+// Copyright (C) 2013-2017 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
  *  Do not attempt to use it directly. @headername{regex}
  */
 
+// This macro defines the maximal state number a NFA can have.
+#ifndef _GLIBCXX_REGEX_STATE_LIMIT
+#define _GLIBCXX_REGEX_STATE_LIMIT 100000
+#endif
+
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 namespace __detail
@@ -41,7 +46,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    */
 
   typedef long _StateIdT;
-  typedef std::set<_StateIdT> _StateSet;
   static const _StateIdT _S_invalid_state_id  = -1;
 
   template<typename _CharT>
@@ -53,10 +57,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   {
       _S_opcode_unknown,
       _S_opcode_alternative,
+      _S_opcode_repeat,
       _S_opcode_backref,
       _S_opcode_line_begin_assertion,
       _S_opcode_line_end_assertion,
-      _S_opcode_word_boundry,
+      _S_opcode_word_boundary,
       _S_opcode_subexpr_lookahead,
       _S_opcode_subexpr_begin,
       _S_opcode_subexpr_end,
@@ -65,144 +70,225 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _S_opcode_accept,
   };
 
-  template<typename _CharT, typename _TraitsT>
-    class _State
-    {
-    public:
-      typedef _Matcher<_CharT>           _MatcherT;
+  struct _State_base
+  {
+  protected:
+    _Opcode      _M_opcode;           // type of outgoing transition
 
-      _Opcode      _M_opcode;           // type of outgoing transition
-      _StateIdT    _M_next;             // outgoing transition
-      union // Since they are mutually exclusive.
+  public:
+    _StateIdT    _M_next;             // outgoing transition
+    union // Since they are mutually exclusive.
+    {
+      size_t _M_subexpr;        // for _S_opcode_subexpr_*
+      size_t _M_backref_index;  // for _S_opcode_backref
+      struct
       {
-       size_t _M_subexpr;        // for _S_opcode_subexpr_*
-       size_t _M_backref_index;  // for _S_opcode_backref
-       struct
-       {
-         // for _S_opcode_alternative.
-         _StateIdT  _M_quant_index;
-         // for _S_opcode_alternative or _S_opcode_subexpr_lookahead
-         _StateIdT  _M_alt;
-         // for _S_opcode_word_boundry or _S_opcode_subexpr_lookahead or
-         // quantifiers(ungreedy if set true)
-         bool       _M_neg;
-       };
+       // for _S_opcode_alternative, _S_opcode_repeat and
+       // _S_opcode_subexpr_lookahead
+       _StateIdT  _M_alt;
+       // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
+       // quantifiers (ungreedy if set true)
+       bool       _M_neg;
       };
-      _MatcherT      _M_matches;        // for _S_opcode_match
+      // For _S_opcode_match
+      __gnu_cxx::__aligned_membuf<_Matcher<char>> _M_matcher_storage;
+    };
 
-      explicit _State(_Opcode  __opcode)
-      : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
-      { }
+  protected:
+    explicit _State_base(_Opcode __opcode)
+    : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
+    { }
+
+  public:
+    bool
+    _M_has_alt()
+    {
+      return _M_opcode == _S_opcode_alternative
+       || _M_opcode == _S_opcode_repeat
+       || _M_opcode == _S_opcode_subexpr_lookahead;
+    }
 
 #ifdef _GLIBCXX_DEBUG
-      std::ostream&
-      _M_print(std::ostream& ostr) const;
+    std::ostream&
+    _M_print(std::ostream& ostr) const;
 
-      // Prints graphviz dot commands for state.
-      std::ostream&
-      _M_dot(std::ostream& __ostr, _StateIdT __id) const;
+    // Prints graphviz dot commands for state.
+    std::ostream&
+    _M_dot(std::ostream& __ostr, _StateIdT __id) const;
 #endif
-    };
+  };
 
-  /// Base class for, um, automata.  Could be an NFA or a DFA.  Your choice.
-  template<typename _CharT, typename _TraitsT>
-    class _Automaton
+  template<typename _Char_type>
+    struct _State : _State_base
     {
-    public:
-      typedef size_t _SizeT;
+      typedef _Matcher<_Char_type> _MatcherT;
+      static_assert(sizeof(_MatcherT) == sizeof(_Matcher<char>),
+                   "std::function<bool(T)> has the same size as "
+                   "std::function<bool(char)>");
+      static_assert(alignof(_MatcherT) == alignof(_Matcher<char>),
+                   "std::function<bool(T)> has the same alignment as "
+                   "std::function<bool(char)>");
+
+      explicit
+      _State(_Opcode __opcode) : _State_base(__opcode)
+      {
+       if (_M_opcode() == _S_opcode_match)
+         new (this->_M_matcher_storage._M_addr()) _MatcherT();
+      }
 
-    public:
-      virtual
-      ~_Automaton()
-      { }
+      _State(const _State& __rhs) : _State_base(__rhs)
+      {
+       if (__rhs._M_opcode() == _S_opcode_match)
+         new (this->_M_matcher_storage._M_addr())
+           _MatcherT(__rhs._M_get_matcher());
+      }
 
-      virtual _SizeT
-      _M_sub_count() const = 0;
+      _State(_State&& __rhs) : _State_base(__rhs)
+      {
+       if (__rhs._M_opcode() == _S_opcode_match)
+         new (this->_M_matcher_storage._M_addr())
+           _MatcherT(std::move(__rhs._M_get_matcher()));
+      }
 
-#ifdef _GLIBCXX_DEBUG
-      virtual std::ostream&
-      _M_dot(std::ostream& __ostr) const = 0;
-#endif
-    };
+      _State&
+      operator=(const _State&) = delete;
 
-  template<typename _CharT, typename _TraitsT>
-    class _NFA
-    : public _Automaton<_CharT, _TraitsT>,
-      public std::vector<_State<_CharT, _TraitsT>>
-    {
-    public:
-      typedef _State<_CharT, _TraitsT>            _StateT;
-      typedef const _Matcher<_CharT>&             _MatcherT;
-      typedef size_t                              _SizeT;
-      typedef regex_constants::syntax_option_type _FlagT;
-
-      _NFA(_FlagT __f)
-      : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
-      _M_has_backref(false), _M_quant_count(0)
-      { }
+      ~_State()
+      {
+       if (_M_opcode() == _S_opcode_match)
+         _M_get_matcher().~_MatcherT();
+      }
 
-      _FlagT
-      _M_options() const
-      { return _M_flags; }
+      // Since correct ctor and dtor rely on _M_opcode, it's better not to
+      // change it over time.
+      _Opcode
+      _M_opcode() const
+      { return _State_base::_M_opcode; }
 
-      _StateIdT
-      _M_start() const
-      { return _M_start_state; }
+      bool
+      _M_matches(_Char_type __char) const
+      { return _M_get_matcher()(__char); }
+
+      _MatcherT&
+      _M_get_matcher()
+      { return *static_cast<_MatcherT*>(this->_M_matcher_storage._M_addr()); }
+
+      const _MatcherT&
+      _M_get_matcher() const
+      {
+       return *static_cast<const _MatcherT*>(
+           this->_M_matcher_storage._M_addr());
+      }
+    };
+
+  struct _NFA_base
+  {
+    typedef size_t                              _SizeT;
+    typedef regex_constants::syntax_option_type _FlagT;
+
+    explicit
+    _NFA_base(_FlagT __f)
+    : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
+    _M_has_backref(false)
+    { }
+
+    _NFA_base(_NFA_base&&) = default;
+
+  protected:
+    ~_NFA_base() = default;
+
+  public:
+    _FlagT
+    _M_options() const
+    { return _M_flags; }
+
+    _StateIdT
+    _M_start() const
+    { return _M_start_state; }
+
+    _SizeT
+    _M_sub_count() const
+    { return _M_subexpr_count; }
+
+    std::vector<size_t>       _M_paren_stack;
+    _FlagT                    _M_flags;
+    _StateIdT                 _M_start_state;
+    _SizeT                    _M_subexpr_count;
+    bool                      _M_has_backref;
+  };
+
+  template<typename _TraitsT>
+    struct _NFA
+    : _NFA_base, std::vector<_State<typename _TraitsT::char_type>>
+    {
+      typedef typename _TraitsT::char_type     _Char_type;
+      typedef _State<_Char_type>               _StateT;
+      typedef _Matcher<_Char_type>             _MatcherT;
 
-      const _StateSet&
-      _M_final_states() const
-      { return _M_accepting_states; }
+      _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags)
+      : _NFA_base(__flags)
+      { _M_traits.imbue(__loc); }
 
-      _SizeT
-      _M_sub_count() const
-      { return _M_subexpr_count; }
+      // for performance reasons _NFA objects should only be moved not copied
+      _NFA(const _NFA&) = delete;
+      _NFA(_NFA&&) = default;
 
       _StateIdT
       _M_insert_accept()
       {
        auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
-       _M_accepting_states.insert(__ret);
        return __ret;
       }
 
       _StateIdT
-      _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
+      _M_insert_alt(_StateIdT __next, _StateIdT __alt,
+                   bool __neg __attribute__((__unused__)))
       {
        _StateT __tmp(_S_opcode_alternative);
        // It labels every quantifier to make greedy comparison easier in BFS
        // approach.
-       __tmp._M_quant_index = _M_quant_count++;
+       __tmp._M_next = __next;
+       __tmp._M_alt = __alt;
+       return _M_insert_state(std::move(__tmp));
+      }
+
+      _StateIdT
+      _M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg)
+      {
+       _StateT __tmp(_S_opcode_repeat);
+       // It labels every quantifier to make greedy comparison easier in BFS
+       // approach.
        __tmp._M_next = __next;
        __tmp._M_alt = __alt;
        __tmp._M_neg = __neg;
-       return _M_insert_state(__tmp);
+       return _M_insert_state(std::move(__tmp));
       }
 
       _StateIdT
       _M_insert_matcher(_MatcherT __m)
       {
        _StateT __tmp(_S_opcode_match);
-       __tmp._M_matches = __m;
-       return _M_insert_state(__tmp);
+       __tmp._M_get_matcher() = std::move(__m);
+       return _M_insert_state(std::move(__tmp));
       }
 
       _StateIdT
       _M_insert_subexpr_begin()
       {
-       auto __id = _M_subexpr_count++;
-       _M_paren_stack.push_back(__id);
+       auto __id = this->_M_subexpr_count++;
+       this->_M_paren_stack.push_back(__id);
        _StateT __tmp(_S_opcode_subexpr_begin);
        __tmp._M_subexpr = __id;
-       return _M_insert_state(__tmp);
+       return _M_insert_state(std::move(__tmp));
       }
 
       _StateIdT
       _M_insert_subexpr_end()
       {
        _StateT __tmp(_S_opcode_subexpr_end);
-       __tmp._M_subexpr = _M_paren_stack.back();
-       _M_paren_stack.pop_back();
-       return _M_insert_state(__tmp);
+       __tmp._M_subexpr = this->_M_paren_stack.back();
+       this->_M_paren_stack.pop_back();
+       return _M_insert_state(std::move(__tmp));
       }
 
       _StateIdT
@@ -219,9 +305,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StateIdT
       _M_insert_word_bound(bool __neg)
       {
-       _StateT __tmp(_S_opcode_word_boundry);
+       _StateT __tmp(_S_opcode_word_boundary);
        __tmp._M_neg = __neg;
-       return _M_insert_state(__tmp);
+       return _M_insert_state(std::move(__tmp));
       }
 
       _StateIdT
@@ -230,7 +316,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _StateT __tmp(_S_opcode_subexpr_lookahead);
        __tmp._M_alt = __alt;
        __tmp._M_neg = __neg;
-       return _M_insert_state(__tmp);
+       return _M_insert_state(std::move(__tmp));
       }
 
       _StateIdT
@@ -240,7 +326,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StateIdT
       _M_insert_state(_StateT __s)
       {
-       this->push_back(__s);
+       this->push_back(std::move(__s));
+       if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT)
+         __throw_regex_error(
+           regex_constants::error_space,
+           "Number of NFA states exceeds limit. Please use shorter regex "
+           "string, or use smaller brace expression, or make "
+           "_GLIBCXX_REGEX_STATE_LIMIT larger.");
        return this->size()-1;
       }
 
@@ -252,28 +344,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       std::ostream&
       _M_dot(std::ostream& __ostr) const;
 #endif
-
-      std::vector<size_t>       _M_paren_stack;
-      _StateSet                 _M_accepting_states;
-      _FlagT                    _M_flags;
-      _StateIdT                 _M_start_state;
-      _SizeT                    _M_subexpr_count;
-      _SizeT                    _M_quant_count;
-      bool                      _M_has_backref;
+    public:
+      _TraitsT                  _M_traits;
     };
 
   /// Describes a sequence of one or more %_State, its current start
   /// and end(s).  This structure contains fragments of an NFA during
   /// construction.
-  template<typename _CharT, typename _TraitsT>
+  template<typename _TraitsT>
     class _StateSeq
     {
     public:
-      typedef _NFA<_CharT, _TraitsT> _RegexT;
+      typedef _NFA<_TraitsT> _RegexT;
 
     public:
       _StateSeq(_RegexT& __nfa, _StateIdT __s)
-      : _StateSeq(__nfa, __s, __s)
+      : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
       { }
 
       _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)