]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - libstdc++-v3/include/bits/regex_scanner.h
Update copyright years.
[thirdparty/gcc.git] / libstdc++-v3 / include / bits / regex_scanner.h
index 824d6ce1081513387cb8135985065a6d9ddac249..0fc3ed41078c137801c1dcebb14666f36c068c99 100644 (file)
@@ -1,6 +1,6 @@
 // class template regex -*- C++ -*-
 
-// Copyright (C) 2013 Free Software Foundation, Inc.
+// Copyright (C) 2013-2020 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
-namespace __detail
-{
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+namespace __detail
+{
   /**
    * @addtogroup regex-detail
    * @{
    */
 
+  struct _ScannerBase
+  {
+  public:
+    /// Token types returned from the scanner.
+    enum _TokenT : unsigned
+    {
+      _S_token_anychar,
+      _S_token_ord_char,
+      _S_token_oct_num,
+      _S_token_hex_num,
+      _S_token_backref,
+      _S_token_subexpr_begin,
+      _S_token_subexpr_no_group_begin,
+      _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
+      _S_token_subexpr_end,
+      _S_token_bracket_begin,
+      _S_token_bracket_neg_begin,
+      _S_token_bracket_end,
+      _S_token_interval_begin,
+      _S_token_interval_end,
+      _S_token_quoted_class,
+      _S_token_char_class_name,
+      _S_token_collsymbol,
+      _S_token_equiv_class_name,
+      _S_token_opt,
+      _S_token_or,
+      _S_token_closure0,
+      _S_token_closure1,
+      _S_token_line_begin,
+      _S_token_line_end,
+      _S_token_word_bound, // neg if _M_value[0] == 'n'
+      _S_token_comma,
+      _S_token_dup_count,
+      _S_token_eof,
+      _S_token_bracket_dash,
+      _S_token_unknown = -1u
+    };
+
+  protected:
+    typedef regex_constants::syntax_option_type _FlagT;
+
+    enum _StateT
+    {
+      _S_state_normal,
+      _S_state_in_brace,
+      _S_state_in_bracket,
+    };
+
+  protected:
+    _ScannerBase(_FlagT __flags)
+    : _M_state(_S_state_normal),
+    _M_flags(__flags),
+    _M_escape_tbl(_M_is_ecma()
+                 ? _M_ecma_escape_tbl
+                 : _M_awk_escape_tbl),
+    _M_spec_char(_M_is_ecma()
+                ? _M_ecma_spec_char
+                : _M_flags & regex_constants::basic
+                ? _M_basic_spec_char
+                : _M_flags & regex_constants::extended
+                ? _M_extended_spec_char
+                : _M_flags & regex_constants::grep
+                ?  ".[\\*^$\n"
+                : _M_flags & regex_constants::egrep
+                ? ".[\\()*+?{|^$\n"
+                : _M_flags & regex_constants::awk
+                ? _M_extended_spec_char
+                : nullptr),
+    _M_at_bracket_start(false)
+    { __glibcxx_assert(_M_spec_char); }
+
+  protected:
+    const char*
+    _M_find_escape(char __c)
+    {
+      auto __it = _M_escape_tbl;
+      for (; __it->first != '\0'; ++__it)
+       if (__it->first == __c)
+         return &__it->second;
+      return nullptr;
+    }
+
+    bool
+    _M_is_ecma() const
+    { return _M_flags & regex_constants::ECMAScript; }
+
+    bool
+    _M_is_basic() const
+    { return _M_flags & (regex_constants::basic | regex_constants::grep); }
+
+    bool
+    _M_is_extended() const
+    {
+      return _M_flags & (regex_constants::extended
+                        | regex_constants::egrep
+                        | regex_constants::awk);
+    }
+
+    bool
+    _M_is_grep() const
+    { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
+
+    bool
+    _M_is_awk() const
+    { return _M_flags & regex_constants::awk; }
+
+  protected:
+    // TODO: Make them static in the next abi change.
+    const std::pair<char, _TokenT> _M_token_tbl[9] =
+      {
+       {'^', _S_token_line_begin},
+       {'$', _S_token_line_end},
+       {'.', _S_token_anychar},
+       {'*', _S_token_closure0},
+       {'+', _S_token_closure1},
+       {'?', _S_token_opt},
+       {'|', _S_token_or},
+       {'\n', _S_token_or}, // grep and egrep
+       {'\0', _S_token_or},
+      };
+    const std::pair<char, char> _M_ecma_escape_tbl[8] =
+      {
+       {'0', '\0'},
+       {'b', '\b'},
+       {'f', '\f'},
+       {'n', '\n'},
+       {'r', '\r'},
+       {'t', '\t'},
+       {'v', '\v'},
+       {'\0', '\0'},
+      };
+    const std::pair<char, char> _M_awk_escape_tbl[11] =
+      {
+       {'"', '"'},
+       {'/', '/'},
+       {'\\', '\\'},
+       {'a', '\a'},
+       {'b', '\b'},
+       {'f', '\f'},
+       {'n', '\n'},
+       {'r', '\r'},
+       {'t', '\t'},
+       {'v', '\v'},
+       {'\0', '\0'},
+      };
+    const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
+    const char* _M_basic_spec_char = ".[\\*^$";
+    const char* _M_extended_spec_char = ".[\\()*+?{|^$";
+
+    _StateT                       _M_state;
+    _FlagT                        _M_flags;
+    _TokenT                       _M_token;
+    const std::pair<char, char>*  _M_escape_tbl;
+    const char*                   _M_spec_char;
+    bool                          _M_at_bracket_start;
+  };
+
   /**
-   * @brief struct _Scanner. Scans an input range for regex tokens.
+   * @brief Scans an input range for regex tokens.
    *
    * The %_Scanner class interprets the regular expression pattern in
    * the input range passed to its constructor as a sequence of parse
@@ -49,51 +206,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * constructor: different regular expression grammars will interpret
    * the same input pattern in syntactically different ways.
    */
-  template<typename _FwdIter>
+  template<typename _CharT>
     class _Scanner
+    : public _ScannerBase
     {
     public:
-      typedef typename std::iterator_traits<_FwdIter>::value_type _CharT;
+      typedef const _CharT*                                       _IterT;
       typedef std::basic_string<_CharT>                           _StringT;
       typedef regex_constants::syntax_option_type                 _FlagT;
       typedef const std::ctype<_CharT>                            _CtypeT;
 
-      /// Token types returned from the scanner.
-      enum _TokenT
-      {
-       _S_token_anychar,
-       _S_token_ord_char,
-       _S_token_oct_num,
-       _S_token_hex_num,
-       _S_token_backref,
-       _S_token_subexpr_begin,
-       _S_token_subexpr_no_group_begin,
-       _S_token_subexpr_lookahead_begin,
-       _S_token_subexpr_end,
-       _S_token_bracket_begin,
-       _S_token_bracket_neg_begin,
-       _S_token_bracket_end,
-       _S_token_interval_begin,
-       _S_token_interval_end,
-       _S_token_quoted_class,
-       _S_token_char_class_name,
-       _S_token_collsymbol,
-       _S_token_equiv_class_name,
-       _S_token_opt,
-       _S_token_or,
-       _S_token_closure0,
-       _S_token_closure1,
-       _S_token_ungreedy,
-       _S_token_line_begin,
-       _S_token_line_end,
-       _S_token_word_bound,
-       _S_token_comma,
-       _S_token_dup_count,
-       _S_token_eof,
-       _S_token_unknown
-      };
-
-      _Scanner(_FwdIter __begin, _FwdIter __end,
+      _Scanner(_IterT __begin, _IterT __end,
               _FlagT __flags, std::locale __loc);
 
       void
@@ -113,13 +236,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
     private:
-      enum _StateT
-      {
-       _S_state_normal,
-       _S_state_in_brace,
-       _S_state_in_bracket,
-      };
-
       void
       _M_scan_normal();
 
@@ -141,55 +257,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_eat_class(char);
 
-      constexpr bool
-      _M_is_ecma()
-      { return _M_flags & regex_constants::ECMAScript; }
-
-      constexpr bool
-      _M_is_basic()
-      { return _M_flags & (regex_constants::basic | regex_constants::grep); }
-
-      constexpr bool
-      _M_is_extended()
-      {
-       return _M_flags & (regex_constants::extended
-                          | regex_constants::egrep
-                          | regex_constants::awk);
-      }
-
-      constexpr bool
-      _M_is_grep()
-      { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
-
-      constexpr bool
-      _M_is_awk()
-      { return _M_flags & regex_constants::awk; }
-
-      _StateT                       _M_state;
-      _FwdIter                      _M_current;
-      _FwdIter                      _M_end;
-      _FlagT                        _M_flags;
+      _IterT                        _M_current;
+      _IterT                        _M_end;
       _CtypeT&                      _M_ctype;
-      _TokenT                       _M_token;
       _StringT                      _M_value;
-      bool                          _M_at_bracket_start;
-    public:
-      // TODO: make them static when this file is stable.
-      const std::map<char, _TokenT> _M_token_map;
-      const std::map<char, char>    _M_ecma_escape_map;
-      const std::map<char, char>    _M_awk_escape_map;
-      const std::set<char>          _M_ecma_spec_char;
-      const std::set<char>          _M_basic_spec_char;
-      const std::set<char>          _M_extended_spec_char;
-
-      const std::map<char, char>&   _M_escape_map;
-      const std::set<char>&         _M_spec_char;
       void (_Scanner::* _M_eat_escape)();
     };
 
  //@} regex-detail
-_GLIBCXX_END_NAMESPACE_VERSION
 } // namespace __detail
+_GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
 
 #include <bits/regex_scanner.tcc>