]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR libstdc++/66359 (Regex Fails to match)
authorTim Shen <timshen@google.com>
Fri, 5 Jun 2015 04:58:26 +0000 (04:58 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Fri, 5 Jun 2015 04:58:26 +0000 (04:58 +0000)
PR libstdc++/66359
Backport from mainline
2014-11-13  Tim Shen  <timshen@google.com>

PR libstdc++/63775
* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
_BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
like [z-a]. Change _M_expression_term interface.
* include/bits/regex_compiler.tcc (
_Compiler<>::_M_insert_bracket_matcher,
_Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
Add testcases and move file out of extended.

From-SVN: r224144

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex_compiler.h
libstdc++-v3/include/bits/regex_compiler.tcc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc [moved from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc with 67% similarity]

index ed9cd1654946d5a2f7557c1161ce3df9272c780d..a607facab6689624c16274cf1fdec17eb1dacc6f 100644 (file)
@@ -1,3 +1,19 @@
+2015-06-05  Tim Shen  <timshen@google.com>
+
+       PR libstdc++/66359
+       Backport from mainline
+       2014-11-13  Tim Shen  <timshen@google.com>
+
+       PR libstdc++/63775
+       * include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
+       _BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
+       like [z-a]. Change _M_expression_term interface.
+       * include/bits/regex_compiler.tcc (
+       _Compiler<>::_M_insert_bracket_matcher,
+       _Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
+       * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
+       Add testcases and move file out of extended.
+
 2015-06-04  Renlin Li  <renlin.li@arm.com>
 
        Backported from mainline
index af76f55054acbdaabf37fc97a4242a02cb6ff159..bcbe1179987751a1db6663f83d5f88f760b02ada 100644 (file)
@@ -118,7 +118,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       template<bool __icase, bool __collate>
        void
-       _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
+       _M_expression_term(pair<bool, _CharT>& __last_char,
+                          _BracketMatcher<_TraitsT, __icase, __collate>&
                           __matcher);
 
       int
@@ -390,6 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       _M_make_range(_CharT __l, _CharT __r)
       {
+       if (__l > __r)
+         __throw_regex_error(regex_constants::error_range);
        _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
                                         _M_translator._M_transform(__r)));
 #ifdef _GLIBCXX_DEBUG
index ffe01705fe6160bcf021f79702c2dac00c52b52a..8551e0d29a7c8a72de150c4988a95e4e06b01210 100644 (file)
@@ -410,18 +410,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_insert_bracket_matcher(bool __neg)
     {
       _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
+      pair<bool, _CharT> __last_char; // Optional<_CharT>
+      __last_char.first = false;
+      if (!(_M_flags & regex_constants::ECMAScript))
+       if (_M_try_char())
+         {
+           __matcher._M_add_char(_M_value[0]);
+           __last_char.first = true;
+           __last_char.second = _M_value[0];
+         }
       while (!_M_match_token(_ScannerT::_S_token_bracket_end))
-       _M_expression_term(__matcher);
+       _M_expression_term(__last_char, __matcher);
       __matcher._M_ready();
-      _M_stack.push(_StateSeqT(_M_nfa,
-                              _M_nfa._M_insert_matcher(std::move(__matcher))));
+      _M_stack.push(_StateSeqT(
+                     _M_nfa,
+                     _M_nfa._M_insert_matcher(std::move(__matcher))));
     }
 
   template<typename _TraitsT>
   template<bool __icase, bool __collate>
     void
     _Compiler<_TraitsT>::
-    _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
+    _M_expression_term(pair<bool, _CharT>& __last_char,
+                      _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
+
     {
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
        __matcher._M_add_collating_element(_M_value);
@@ -429,27 +441,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        __matcher._M_add_equivalence_class(_M_value);
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
        __matcher._M_add_character_class(_M_value, false);
-      else if (_M_try_char()) // [a
+      // POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
+      // except when the '-' is the first character in the bracket expression
+      // ([--0]). ECMAScript treats all '-' after a range as a normal character.
+      // Also see above, where _M_expression_term gets called.
+      //
+      // As a result, POSIX rejects [-----], but ECMAScript doesn't.
+      // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
+      // Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
+      //
+      // It turns out that no one reads BNFs ;)
+      else if (_M_try_char())
        {
-         auto __ch = _M_value[0];
-         if (_M_try_char())
+         if (!__last_char.first)
+           {
+             if (_M_value[0] == '-'
+                 && !(_M_flags & regex_constants::ECMAScript))
+               __throw_regex_error(regex_constants::error_range);
+             __matcher._M_add_char(_M_value[0]);
+             __last_char.first = true;
+             __last_char.second = _M_value[0];
+           }
+         else
            {
-             if (_M_value[0] == '-') // [a-
+             if (_M_value[0] == '-')
                {
-                 if (_M_try_char()) // [a-z]
+                 if (_M_try_char())
+                   {
+                     __matcher._M_make_range(__last_char.second , _M_value[0]);
+                     __last_char.first = false;
+                   }
+                 else
                    {
-                     __matcher._M_make_range(__ch, _M_value[0]);
-                     return;
+                     if (_M_scanner._M_get_token()
+                         != _ScannerT::_S_token_bracket_end)
+                       __throw_regex_error(regex_constants::error_range);
+                     __matcher._M_add_char(_M_value[0]);
                    }
-                 // If the dash is the last character in the bracket
-                 // expression, it is not special.
-                 if (_M_scanner._M_get_token()
-                     != _ScannerT::_S_token_bracket_end)
-                   __throw_regex_error(regex_constants::error_range);
                }
-             __matcher._M_add_char(_M_value[0]);
+             else
+               {
+                 __matcher._M_add_char(_M_value[0]);
+                 __last_char.second = _M_value[0];
+               }
            }
-         __matcher._M_add_char(__ch);
        }
       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
        __matcher._M_add_character_class(_M_value,
similarity index 67%
rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc
rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc
index ca2a5f556fcc14c9a7d14a482613292316b233c8..f7653c6dc9d67afae4d12191624e40b45c6c1ce2 100644 (file)
@@ -3,7 +3,7 @@
 //
 // 2013-08-01  Tim Shen <timshen91@gmail.com>
 //
-// Copyright (C) 2013-2014 Free Software Foundation, Inc.
+// Copyright (C) 2013-2015 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
@@ -67,9 +67,60 @@ test01()
   }
 }
 
+void
+test02()
+{
+  bool test __attribute__((unused)) = true;
+
+  try
+  {
+    std::regex re("[-----]", std::regex::extended);
+    VERIFY(false);
+  }
+  catch (const std::regex_error& e)
+  {
+    VERIFY(e.code() == std::regex_constants::error_range);
+  }
+  std::regex re("[-----]", std::regex::ECMAScript);
+}
+
+void
+test03()
+{
+  bool test __attribute__((unused)) = true;
+
+  try
+  {
+    std::regex re("[z-a]", std::regex::extended);
+    VERIFY(false);
+  }
+  catch (const std::regex_error& e)
+  {
+    VERIFY(e.code() == std::regex_constants::error_range);
+  }
+}
+
+void
+test04()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::regex re("[-0-9a-z]");
+  VERIFY(regex_match_debug("-", re));
+  VERIFY(regex_match_debug("1", re));
+  VERIFY(regex_match_debug("w", re));
+  re.assign("[-0-9a-z]", regex_constants::basic);
+  VERIFY(regex_match_debug("-", re));
+  VERIFY(regex_match_debug("1", re));
+  VERIFY(regex_match_debug("w", re));
+}
+
 int
 main()
 {
   test01();
+  test02();
+  test03();
+  test04();
   return 0;
 }