]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
regex_automaton.h: Rearrange _NFA's layout.
authorTim Shen <timshen91@gmail.com>
Mon, 2 Sep 2013 22:20:56 +0000 (22:20 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Mon, 2 Sep 2013 22:20:56 +0000 (22:20 +0000)
2013-09-02  Tim Shen  <timshen91@gmail.com>

* regex_automaton.h: Rearrange _NFA's layout.
* include/bits/regex_compiler.h: Add _AnyMatcher and _CharMatcher.
  Rearrange _BracketMatcher's layout.
  (_BracketMatcher<>::_M_add_char): Use set instead of vector for
  _M_char_set.
  (_BracketMatcher<>::_M_add_collating_element): Likewise.
  (_BracketMatcher<>::_M_make_range): Likewise.
* include/bits/regex_compiler.tcc (_Compiler<>::_M_atom): Use
  apropriate constructors of matchers above.
* testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc:
  New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc: New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc:
  New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc:
  New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc: New.
* testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc:
  New.
* testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc: New.

From-SVN: r202189

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex_automaton.h
libstdc++-v3/include/bits/regex_compiler.h
libstdc++-v3/include/bits/regex_compiler.tcc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc [new file with mode: 0644]
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc [moved from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc with 97% similarity]
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc [moved from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc with 96% similarity]
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc [moved from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc with 96% similarity]
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc [moved from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc with 92% similarity]
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc [new file with mode: 0644]
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc [new file with mode: 0644]

index 898f0319a20b82f04b24a5c8343ea95fe3063363..cd365692b5c9fe9b8fb29cf006a5defdff6c5d95 100644 (file)
@@ -1,3 +1,26 @@
+2013-09-02  Tim Shen  <timshen91@gmail.com>
+
+       * regex_automaton.h: Rearrange _NFA's layout.
+       * include/bits/regex_compiler.h: Add _AnyMatcher and _CharMatcher.
+         Rearrange _BracketMatcher's layout.
+         (_BracketMatcher<>::_M_add_char): Use set instead of vector for
+         _M_char_set.
+         (_BracketMatcher<>::_M_add_collating_element): Likewise.
+         (_BracketMatcher<>::_M_make_range): Likewise.
+       * include/bits/regex_compiler.tcc (_Compiler<>::_M_atom): Use
+         apropriate constructors of matchers above.
+       * testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc:
+         New.
+       * testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc: New.
+       * testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc:
+         New.
+       * testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc:
+         New.
+       * testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc: New.
+       * testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc:
+         New.
+       * testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc: New.
+
 2013-08-30  François Dumont  <fdumont@gcc.gnu.org>
 
        PR libstdc++/58148
index f9e9630636b26a74ab188954fbe307fab335afd3..2c872aa94822e71ea854f733e433fda816232021 100644 (file)
@@ -206,12 +206,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_dot(std::ostream& __ostr) const;
 #endif
 
+      std::vector<unsigned int> _M_paren_stack;
+      _StateSet                 _M_accepting_states;
       _FlagT                    _M_flags;
       _StateIdT                 _M_start_state;
-      _StateSet                 _M_accepting_states;
       _SizeT                    _M_subexpr_count;
       bool                      _M_has_backref;
-      std::vector<unsigned int> _M_paren_stack;
     };
 
   /// Describes a sequence of one or more %_State, its current start
index a1107bb7eeb632756c06ccfbb42671a4fa0d9aaa..55ecdb92d41a8bfe99239b580450657c116d55b8 100644 (file)
@@ -125,12 +125,60 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       const _TraitsT& _M_traits;
       _ScannerT       _M_scanner;
-      _StringT        _M_value;
       _RegexT         _M_state_store;
+      _StringT        _M_value;
       _StackT         _M_stack;
       _FlagT          _M_flags;
     };
 
+  template<typename _CharT, typename _TraitsT>
+    struct _AnyMatcher
+    {
+      explicit
+      _AnyMatcher(const _TraitsT& __traits)
+      : _M_traits(__traits)
+      { }
+
+      bool
+      operator()(_CharT __ch) const
+      {
+       return _M_traits.translate(__ch) != '\n'
+         && _M_traits.translate(__ch) != '\r'
+         && _M_traits.translate(__ch) != u'\u2028'
+         && _M_traits.translate(__ch) != u'\u2029';
+      }
+
+      const _TraitsT& _M_traits;
+    };
+
+  template<typename _CharT, typename _TraitsT>
+    struct _CharMatcher
+    {
+      typedef regex_constants::syntax_option_type _FlagT;
+
+      explicit
+      _CharMatcher(_CharT __ch, const _TraitsT& __traits, _FlagT __flags)
+      : _M_ch(_M_translate(__ch)), _M_traits(__traits), _M_flags(__flags)
+      { }
+
+      bool
+      operator()(_CharT __ch) const
+      { return _M_ch == _M_translate(__ch); }
+
+      _CharT
+      _M_translate(_CharT __ch) const
+      {
+       if (_M_flags & regex_constants::icase)
+         return _M_traits.translate_nocase(__ch);
+       else
+         return _M_traits.translate(__ch);
+      }
+
+      const _TraitsT& _M_traits;
+      _FlagT          _M_flags;
+      _CharT          _M_ch;
+    };
+
   /// Matches a character range (bracket expression)
   template<typename _CharT, typename _TraitsT>
     struct _BracketMatcher
@@ -141,9 +189,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       explicit
       _BracketMatcher(bool __is_non_matching,
-                     const _TraitsT& __t,
+                     const _TraitsT& __traits,
                      _FlagT __flags)
-      : _M_is_non_matching(__is_non_matching), _M_traits(__t),
+      : _M_is_non_matching(__is_non_matching), _M_traits(__traits),
        _M_flags(__flags), _M_class_set(0)
       { }
 
@@ -152,7 +200,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       void
       _M_add_char(_CharT __c)
-      { _M_char_set.push_back(_M_translate(__c)); }
+      { _M_char_set.insert(_M_translate(__c)); }
 
       void
       _M_add_collating_element(const _StringT& __s)
@@ -162,7 +210,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        if (__st.empty())
          __throw_regex_error(regex_constants::error_collate);
        // TODO: digraph
-       _M_char_set.push_back(__st[0]);
+       _M_char_set.insert(_M_translate(__st[0]));
       }
 
       void
@@ -186,21 +234,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_make_range(_CharT __l, _CharT __r)
       {
-       _M_range_set.push_back(
-         make_pair(_M_get_str(_M_translate(__l)),
-                   _M_get_str(_M_translate(__r))));
+       if (_M_flags & regex_constants::collate)
+         _M_range_set.insert(
+           make_pair(_M_get_str(_M_translate(__l)),
+                     _M_get_str(_M_translate(__r))));
+       else
+         _M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
       }
 
       _CharT
       _M_translate(_CharT __c) const
       {
-       if (_M_flags & regex_constants::collate)
-         if (_M_is_icase())
-           return _M_traits.translate_nocase(__c);
-         else
-           return _M_traits.translate(__c);
+       if (_M_is_icase())
+         return _M_traits.translate_nocase(__c);
        else
-         return __c;
+         return _M_traits.translate(__c);
       }
 
       bool
@@ -214,12 +262,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        return _M_traits.transform(__s.begin(), __s.end());
       }
 
-      const _TraitsT&                       _M_traits;
-      _FlagT                                _M_flags;
-      bool                                  _M_is_non_matching;
-      std::vector<_CharT>                   _M_char_set;
-      std::vector<pair<_StringT, _StringT>> _M_range_set;
-      _CharClassT                           _M_class_set;
+      std::set<_CharT>                   _M_char_set;
+      std::set<pair<_StringT, _StringT>> _M_range_set;
+      const _TraitsT&                    _M_traits;
+      _CharClassT                        _M_class_set;
+      _FlagT                             _M_flags;
+      bool                               _M_is_non_matching;
     };
 
  //@} regex-detail
index bed091a4486deb5bbfb2db51f6236c21ef7e7b70..e41b251c2578c94c030dccc727c35e678a8703ef 100644 (file)
@@ -204,32 +204,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       if (_M_match_token(_ScannerT::_S_token_anychar))
        {
-         const static auto&
-         __any_matcher = [](_CharT __ch) -> bool
-         { return true; };
-
          _M_stack.push(_StateSeqT(_M_state_store,
                                  _M_state_store._M_insert_matcher
-                                 (__any_matcher)));
+                                 (_AnyMatcher<_CharT, _TraitsT>(_M_traits))));
          return true;
        }
       if (_M_try_char())
        {
-         _CharT __c = _M_value[0];
-         __detail::_Matcher<_CharT> f;
-         if (_M_flags & regex_constants::icase)
-           {
-             auto __traits = this->_M_traits;
-             __c = __traits.translate_nocase(__c);
-             f = [__traits, __c](_CharT __ch) -> bool
-             { return __traits.translate_nocase(__ch) == __c; };
-           }
-         else
-           f = [__c](_CharT __ch) -> bool
-           { return __ch == __c; };
-
          _M_stack.push(_StateSeqT(_M_state_store,
-                                  _M_state_store._M_insert_matcher(f)));
+                                  _M_state_store._M_insert_matcher
+                                  (_CharMatcher<_CharT, _TraitsT>(_M_value[0],
+                                                                  _M_traits,
+                                                                  _M_flags))));
          return true;
        }
       if (_M_match_token(_ScannerT::_S_token_backref))
@@ -374,26 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       bool __ret = false;
       if (_M_traits.isctype(__ch, _M_class_set))
        __ret = true;
+      else if (_M_char_set.count(_M_translate(__ch)))
+       __ret = true;
       else
        {
-         __ch = _M_translate(__ch);
-
-         for (auto __c : _M_char_set)
-           if (__c == __ch)
+         _StringT __s = _M_get_str(_M_flags & regex_constants::collate
+                                   ? _M_translate(__ch) : __ch);
+         for (auto& __it : _M_range_set)
+           if (__it.first <= __s && __s <= __it.second)
              {
                __ret = true;
                break;
              }
-         if (!__ret)
-           {
-             _StringT __s = _M_get_str(__ch);
-             for (auto& __it : _M_range_set)
-               if (__it.first <= __s && __s <= __it.second)
-                 {
-                   __ret = true;
-                   break;
-                 }
-           }
        }
       if (_M_is_non_matching)
        return !__ret;
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc
new file mode 100644 (file)
index 0000000..6e6095b
--- /dev/null
@@ -0,0 +1,52 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-09-02  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests ECMAScript "." against a std::string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+#define TEST(res, s) \
+  {\
+    regex re(res);\
+    string st(s);\
+    VERIFY(!regex_match(st, re));\
+  }
+  TEST(".", "\0");
+  TEST(".", "\n");
+  TEST(".", "\r");
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
similarity index 97%
rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc
rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc
index a828fea93c677eae54c830cb771b1177ea6700b1..321ce35a0389a90b44c65584f71f280674c72e78 100644 (file)
@@ -1,7 +1,7 @@
 // { dg-options "-std=gnu++11" }
 
 //
-// 2013-08-10  Tim Shen <timshen91@gmail.com>
+// 2013-09-02  Tim Shen <timshen91@gmail.com>
 //
 // Copyright (C) 2013 Free Software Foundation, Inc.
 //
similarity index 96%
rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc
rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc
index 93bca45bf9d50d12b026cb634adc1700541aaf7a..3c48d3521a5e2cc804a4cfbbc1ddcf8de8ffc95d 100644 (file)
@@ -1,7 +1,7 @@
 // { dg-options "-std=gnu++11" }
 
 //
-// 2013-08-26  Tim Shen <timshen91@gmail.com>
+// 2013-09-02  Tim Shen <timshen91@gmail.com>
 //
 // Copyright (C) 2013 Free Software Foundation, Inc.
 //
similarity index 96%
rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc
rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc
index e112db55e4ad54e2b70a3657c067d21415090163..1dc8f63f7891c0d60fb16a5fbc0c82fdfd99bb51 100644 (file)
@@ -1,7 +1,7 @@
 // { dg-options "-std=gnu++11" }
 
 //
-// 2013-08-22  Tim Shen <timshen91@gmail.com>
+// 2013-09-02  Tim Shen <timshen91@gmail.com>
 //
 // Copyright (C) 2013 Free Software Foundation, Inc.
 //
similarity index 92%
rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc
rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc
index a7ef0fb36cc358925ab7e3d9adf7a4235c997842..a73b742a5e43dfdafe61a8a98910f2b9a350383a 100644 (file)
@@ -1,7 +1,7 @@
 // { dg-options "-std=gnu++11" }
 
 //
-// 2013-08-26  Tim Shen <timshen91@gmail.com>
+// 2013-09-02  Tim Shen <timshen91@gmail.com>
 //
 // Copyright (C) 2013 Free Software Foundation, Inc.
 //
@@ -34,7 +34,6 @@ test01()
   bool test __attribute__((unused)) = true;
 
   VERIFY(regex_match(":", regex("\\x3a")));
-  VERIFY(regex_match(L"\u1234", wregex(L"\\u1234")));
   try
     {
       regex("\\u400x");
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc
new file mode 100644 (file)
index 0000000..c574908
--- /dev/null
@@ -0,0 +1,51 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-09-02  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests ECMAScript "." against a std::string.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+#define TESTL(res, s) \
+  {\
+    wregex re(res);\
+    wstring st(s);\
+    VERIFY(!regex_match(st, re));\
+  }
+  TESTL(L".", L"\u2028");
+  TESTL(L".", L"\u2029");
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc
new file mode 100644 (file)
index 0000000..f9561be
--- /dev/null
@@ -0,0 +1,44 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-09-02  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests ECMAScript \x and \u.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  VERIFY(regex_match(L"\u1234", wregex(L"\\u1234")));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}