regex.h: Add friend classes.

author Tim Shen <timshen91@gmail.com>

Wed, 18 Sep 2013 15:56:20 +0000 (15:56 +0000)

committer Tim Shen <timshen@gcc.gnu.org>

Wed, 18 Sep 2013 15:56:20 +0000 (15:56 +0000)
author Tim Shen <timshen91@gmail.com>
Wed, 18 Sep 2013 15:56:20 +0000 (15:56 +0000)
committer Tim Shen <timshen@gcc.gnu.org>
Wed, 18 Sep 2013 15:56:20 +0000 (15:56 +0000)
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog

index 5758e22acb7a8a229127a269b4fd3ad4f6a18b7d..cf411e6c96349758db3d2f181aea9fbf7203e2d8 100644 (file)
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,28 @@
+2013-09-18  Tim Shen  <timshen91@gmail.com>
+
+       * include/bits/regex.h: Add friend classes.
+       (match_results<>::position, regex_iterator<>::operator++):
+       Implement position specification in regex_iterator.
+       (regex_match<>, regex_search<>):
+       Move match_results initializations to these function. Remove `todo`.
+       * include/bits/regex_compiler.tcc:
+       (_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
+       * include/bits/regex_constants.h:
+       Fix indentation. Change match_flag_type to enum type.
+       * include/bits/regex_executor.h:
+       Merge identical code to the base class _Executor.
+       Support flags in regex_constants.
+       * include/bits/regex_executor.tcc: Likewise.
+       * include/bits/regex_scanner.h: Add comments.
+       * include/bits/regex_scanner.tcc: Same.
+       * testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
+       Add a testcase.
+       * testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
+       * testsuite/28_regex/iterators/regex_iterator/char/
+       string_position_01.cc: Remove `xfail`.
+       * testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
+       Remove `xfail` and make the case really work.
+
  2013-09-18  Paolo Carlini  <paolo.carlini@oracle.com>
  
         * testsuite/performance/25_algorithms/search_n.cc: Fix typo.
diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h

index 659bee13120819e28ffc0142347149eb592baa7b..9d1438aab2391b6883492336d4e5c10fdb99ce58 100644 (file)
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -1004,6 +1004,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                      const basic_regex<_Cp, _Rp>&,
                      regex_constants::match_flag_type);
  
+      template<typename, typename, typename, typename>
+       friend class __detail::_Executor;
+
+      template<typename, typename, typename, typename>
+       friend class __detail::_DFSExecutor;
+
+      template<typename, typename, typename, typename>
+       friend class __detail::_BFSExecutor;
+
        flag_type     _M_flags;
        _Rx_traits    _M_traits;
        _AutomatonPtr _M_automaton;
@@ -1783,21 +1792,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         */
        explicit
        match_results(const _Alloc& __a = _Alloc())
-      : _Base_type(__a)
+      : _Base_type(__a), _M_in_iterator(false)
        { }
  
        /**
         * @brief Copy constructs a %match_results.
         */
        match_results(const match_results& __rhs)
-      : _Base_type(__rhs)
+      : _Base_type(__rhs), _M_in_iterator(false)
        { }
  
        /**
         * @brief Move constructs a %match_results.
         */
        match_results(match_results&& __rhs) noexcept
-      : _Base_type(std::move(__rhs))
+      : _Base_type(std::move(__rhs)), _M_in_iterator(false)
        { }
  
        /**
@@ -1905,8 +1914,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        difference_type
        position(size_type __sub = 0) const
        {
-       return __sub < size() ? std::distance(this->prefix().first,
-                                             (*this)[__sub].first) : -1;
+       // [28.12.1.4.5]
+       if (_M_in_iterator)
+         return __sub < size() ? std::distance(_M_begin,
+                                               (*this)[__sub].first) : -1;
+       else
+         return __sub < size() ? std::distance(this->prefix().first,
+                                               (*this)[__sub].first) : -1;
        }
  
        /**
@@ -2106,6 +2120,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        template<typename, typename, typename, typename>
         friend class __detail::_BFSExecutor;
  
+      template<typename, typename, typename>
+       friend class regex_iterator;
+
        template<typename _Bp, typename _Ap,
         typename _Ch_type, typename _Rx_traits>
         friend bool
@@ -2121,6 +2138,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                      const basic_regex<_Ch_type,
                      _Rx_traits>&,
                      regex_constants::match_flag_type);
+
+      _Bi_iter _M_begin;
+      bool     _M_in_iterator;
      };
  
    typedef match_results<const char*>             cmatch;
@@ -2200,8 +2220,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     * @retval false Otherwise.
     *
     * @throws an exception of type regex_error.
-   *
-   * @todo Implement this function.
     */
    template<typename _Bi_iter, typename _Alloc,
            typename _Ch_type, typename _Rx_traits>
@@ -2215,6 +2233,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      {
        if (__re._M_automaton == nullptr)
         return false;
+
+      auto __size = __re._M_automaton->_M_sub_count();
+      __size += 2;
+      __m.resize(__size);
+      for (decltype(__size) __i = 0; __i < __size; ++__i)
+       __m.at(__i).matched = false;
+
        if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
         {
           for (auto __it : __m)
@@ -2360,8 +2385,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     *               undefined.
     *
     * @throws an exception of type regex_error.
-   *
-   * @todo Implement this function.
     */
    template<typename _Bi_iter, typename _Alloc,
            typename _Ch_type, typename _Rx_traits>
@@ -2374,6 +2397,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      {
        if (__re._M_automaton == nullptr)
         return false;
+
+      auto __size = __re._M_automaton->_M_sub_count();
+      __size += 2;
+      __m.resize(__size);
+      for (decltype(__size) __i = 0; __i < __size; ++__i)
+       __m.at(__i).matched = false;
+
        if (__detail::__get_executor(__first, __last, __m, __re, __flags)
           ->_M_search())
         {
@@ -2677,7 +2707,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
      operator++()
      {
-      // FIXME: In all cases in which the call to regex_search returns true,
+      // In all cases in which the call to regex_search returns true,
        // match.prefix().first shall be equal to the previous value of
        // match[0].second, and for each index i in the half-open range
        // [0, match.size()) for which match[i].matched is true,
@@ -2697,12 +2727,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags
                                  | regex_constants::match_not_null
                                  | regex_constants::match_continuous))
-                 return *this;
+                 {
+                   _M_match._M_in_iterator = true;
+                   _M_match._M_begin = _M_begin;
+                   return *this;
+                 }
                 else
                   ++__start;
               }
           _M_flags |= regex_constants::match_prev_avail;
-         if (!regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
+         if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
+           {
+             _M_match._M_in_iterator = true;
+             _M_match._M_begin = _M_begin;
+           }
+         else
             _M_match = value_type();
         }
        return *this;
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc

index 8dc779b68e176b51912b8ea89248412ba52c7e35..7f9a19af2d96aa0491a466d4e01cda93ab39f677 100644 (file)
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -28,7 +28,7 @@
   *  Do not attempt to use it directly. @headername{regex}
   */
  
-// TODO make comments doxygen format.
+// FIXME make comments doxygen format.
  
  // This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
  // (http://swtch.com/~rsc/regexp/regexp1.html"),
@@ -223,16 +223,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                 if (__n < 0)
                   __throw_regex_error(regex_constants::error_badbrace);
                 auto __end = _M_nfa._M_insert_dummy();
+               // _M_alt is the "match more" branch, and _M_next is the
+               // "match less" one. Switch _M_alt and _M_next of all created
+               // nodes. This is a hacking but IMO works well.
+               std::stack<_StateIdT> __stack;
                 for (int __i = 0; __i < __n; ++__i)
                   {
                     auto __tmp = __r._M_clone();
-                   __e._M_append
-                     (_StateSeqT(_M_nfa,
-                                 _M_nfa._M_insert_alt(__tmp._M_start,
-                                                      __end, __neg),
-                                 __tmp._M_end));
+                   auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
+                                                     __end, __neg);
+                   __stack.push(__alt);
+                   __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
                   }
                 __e._M_append(__end);
+               while (!__stack.empty())
+                 {
+                   auto& __tmp = _M_nfa[__stack.top()];
+                   __stack.pop();
+                   swap(__tmp._M_next, __tmp._M_alt);
+                 }
               }
             else // {3,}
               {
diff --git a/libstdc++-v3/include/bits/regex_constants.h b/libstdc++-v3/include/bits/regex_constants.h

index 10b962ad21a2f655817236b42752b4a2e25765c5..94c25e531b32360d8c4b24b214303a901f62bad7 100644 (file)
--- a/libstdc++-v3/include/bits/regex_constants.h
+++ b/libstdc++-v3/include/bits/regex_constants.h
@@ -52,19 +52,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     */
    //@{
    enum __syntax_option
-    {
-      _S_icase,
-      _S_nosubs,
-      _S_optimize,
-      _S_collate,
-      _S_ECMAScript,
-      _S_basic,
-      _S_extended,
-      _S_awk,
-      _S_grep,
-      _S_egrep,
-      _S_syntax_last
-    };
+  {
+    _S_icase,
+    _S_nosubs,
+    _S_optimize,
+    _S_collate,
+    _S_ECMAScript,
+    _S_basic,
+    _S_extended,
+    _S_awk,
+    _S_grep,
+    _S_egrep,
+    _S_syntax_last
+  };
  
    /**
     * @brief This is a bitmask type indicating how to interpret the regex.
@@ -211,20 +211,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    //@{
  
    enum __match_flag
-    {
-      _S_not_bol,
-      _S_not_eol,
-      _S_not_bow,
-      _S_not_eow,
-      _S_any,
-      _S_not_null,
-      _S_continuous,
-      _S_prev_avail,
-      _S_sed,
-      _S_no_copy,
-      _S_first_only,
-      _S_match_flag_last
-    };
+  {
+    _S_not_bol,
+    _S_not_eol,
+    _S_not_bow,
+    _S_not_eow,
+    _S_any,
+    _S_not_null,
+    _S_continuous,
+    _S_prev_avail,
+    _S_sed,
+    _S_no_copy,
+    _S_first_only,
+    _S_match_flag_last
+  };
  
    /**
     * @brief This is a bitmask type indicating regex matching rules.
@@ -233,110 +233,148 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     * perform bitwise operations on these values and expect the right thing to
     * happen.
     */
-  typedef std::bitset<_S_match_flag_last> match_flag_type;
+  enum match_flag_type : unsigned int
+  {
+    /**
+     * The default matching rules.
+     */
+    match_default     = 0,
  
-  /**
-   * The default matching rules.
-   */
-  constexpr match_flag_type match_default     = 0;
+    /**
+     * The first character in the sequence [first, last) is treated as though it
+     * is not at the beginning of a line, so the character (^) in the regular
+     * expression shall not match [first, first).
+     */
+    match_not_bol     = 1 << _S_not_bol,
  
-  /**
-   * The first character in the sequence [first, last) is treated as though it
-   * is not at the beginning of a line, so the character (^) in the regular
-   * expression shall not match [first, first).
-   */
-  constexpr match_flag_type match_not_bol     = 1 << _S_not_bol;
+    /**
+     * The last character in the sequence [first, last) is treated as though it
+     * is not at the end of a line, so the character ($) in the regular
+     * expression shall not match [last, last).
+     */
+    match_not_eol     = 1 << _S_not_eol,
  
-  /**
-   * The last character in the sequence [first, last) is treated as though it
-   * is not at the end of a line, so the character ($) in the regular
-   * expression shall not match [last, last).
-   */
-  constexpr match_flag_type match_not_eol     = 1 << _S_not_eol;
+    /**
+     * The expression \\b is not matched against the sub-sequence
+     * [first,first).
+     */
+    match_not_bow     = 1 << _S_not_bow,
  
-  /**
-   * The expression \\b is not matched against the sub-sequence
-   * [first,first).
-   */
-  constexpr match_flag_type match_not_bow     = 1 << _S_not_bow;
+    /**
+     * The expression \\b should not be matched against the sub-sequence
+     * [last,last).
+     */
+    match_not_eow     = 1 << _S_not_eow,
  
-  /**
-   * The expression \\b should not be matched against the sub-sequence
-   * [last,last).
-   */
-  constexpr match_flag_type match_not_eow     = 1 << _S_not_eow;
+    /**
+     * If more than one match is possible then any match is an acceptable
+     * result.
+     */
+    match_any         = 1 << _S_any,
  
-  /**
-   * If more than one match is possible then any match is an acceptable
-   * result.
-   */
-  constexpr match_flag_type match_any         = 1 << _S_any;
+    /**
+     * The expression does not match an empty sequence.
+     */
+    match_not_null    = 1 << _S_not_null,
  
-  /**
-   * The expression does not match an empty sequence.
-   */
-  constexpr match_flag_type match_not_null    = 1 << _S_not_null;
+    /**
+     * The expression only matches a sub-sequence that begins at first .
+     */
+    match_continuous  = 1 << _S_continuous,
  
-  /**
-   * The expression only matches a sub-sequence that begins at first .
-   */
-  constexpr match_flag_type match_continuous  = 1 << _S_continuous;
+    /**
+     * --first is a valid iterator position.  When this flag is set then the
+     * flags match_not_bol and match_not_bow are ignored by the regular
+     * expression algorithms 28.11 and iterators 28.12.
+     */
+    match_prev_avail  = 1 << _S_prev_avail,
  
-  /**
-   * --first is a valid iterator position.  When this flag is set then the
-   * flags match_not_bol and match_not_bow are ignored by the regular
-   * expression algorithms 28.11 and iterators 28.12.
-   */
-  constexpr match_flag_type match_prev_avail  = 1 << _S_prev_avail;
+    /**
+     * When a regular expression match is to be replaced by a new string, the
+     * new string is constructed using the rules used by the ECMAScript replace
+     * function in ECMA- 262 [Ecma International, ECMAScript Language
+     * Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
+     * String.prototype.replace. In addition, during search and replace
+     * operations all non-overlapping occurrences of the regular expression
+     * are located and replaced, and sections of the input that did not match
+     * the expression are copied unchanged to the output string.
+     *
+     * Format strings (from ECMA-262 [15.5.4.11]):
+     * @li $$  The dollar-sign itself ($)
+     * @li $&  The matched substring.
+     * @li $`  The portion of @a string that precedes the matched substring.
+     *         This would be match_results::prefix().
+     * @li $'  The portion of @a string that follows the matched substring.
+     *         This would be match_results::suffix().
+     * @li $n  The nth capture, where n is in [1,9] and $n is not followed by a
+     *         decimal digit.  If n <= match_results::size() and the nth capture
+     *         is undefined, use the empty string instead.  If n >
+     *         match_results::size(), the result is implementation-defined.
+     * @li $nn The nnth capture, where nn is a two-digit decimal number on
+     *         [01, 99].  If nn <= match_results::size() and the nth capture is
+     *         undefined, use the empty string instead. If
+     *         nn > match_results::size(), the result is implementation-defined.
+     */
+    format_default    = 0,
  
-  /**
-   * When a regular expression match is to be replaced by a new string, the
-   * new string is constructed using the rules used by the ECMAScript replace
-   * function in ECMA- 262 [Ecma International, ECMAScript Language
-   * Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
-   * String.prototype.replace. In addition, during search and replace
-   * operations all non-overlapping occurrences of the regular expression
-   * are located and replaced, and sections of the input that did not match
-   * the expression are copied unchanged to the output string.
-   *
-   * Format strings (from ECMA-262 [15.5.4.11]):
-   * @li $$  The dollar-sign itself ($)
-   * @li $&  The matched substring.
-   * @li $`  The portion of @a string that precedes the matched substring.
-   *         This would be match_results::prefix().
-   * @li $'  The portion of @a string that follows the matched substring.
-   *         This would be match_results::suffix().
-   * @li $n  The nth capture, where n is in [1,9] and $n is not followed by a
-   *         decimal digit.  If n <= match_results::size() and the nth capture
-   *         is undefined, use the empty string instead.  If n >
-   *         match_results::size(), the result is implementation-defined.
-   * @li $nn The nnth capture, where nn is a two-digit decimal number on
-   *         [01, 99].  If nn <= match_results::size() and the nth capture is
-   *         undefined, use the empty string instead. If
-   *         nn > match_results::size(), the result is implementation-defined.
-   */
-  constexpr match_flag_type format_default    = 0;
+    /**
+     * When a regular expression match is to be replaced by a new string, the
+     * new string is constructed using the rules used by the POSIX sed utility
+     * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
+     * Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
+     */
+    format_sed        = 1 << _S_sed,
  
-  /**
-   * When a regular expression match is to be replaced by a new string, the
-   * new string is constructed using the rules used by the POSIX sed utility
-   * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
-   * Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
-   */
-  constexpr match_flag_type format_sed        = 1 << _S_sed;
+    /**
+     * During a search and replace operation, sections of the character
+     * container sequence being searched that do not match the regular
+     * expression shall not be copied to the output string.
+     */
+    format_no_copy    = 1 << _S_no_copy,
  
-  /**
-   * During a search and replace operation, sections of the character
-   * container sequence being searched that do not match the regular
-   * expression shall not be copied to the output string.
-   */
-  constexpr match_flag_type format_no_copy    = 1 << _S_no_copy;
+    /**
+     * When specified during a search and replace operation, only the first
+     * occurrence of the regular expression shall be replaced.
+     */
+    format_first_only = 1 << _S_first_only,
+  };
  
-  /**
-   * When specified during a search and replace operation, only the first
-   * occurrence of the regular expression shall be replaced.
-   */
-  constexpr match_flag_type format_first_only = 1 << _S_first_only;
+  constexpr inline match_flag_type
+  operator&(match_flag_type __a, match_flag_type __b)
+  {
+    return (match_flag_type)(static_cast<unsigned int>(__a)
+                               & static_cast<unsigned int>(__b));
+  }
+
+  constexpr inline match_flag_type
+  operator|(match_flag_type __a, match_flag_type __b)
+  {
+    return (match_flag_type)(static_cast<unsigned int>(__a)
+                               | static_cast<unsigned int>(__b));
+  }
+
+  constexpr inline match_flag_type
+  operator^(match_flag_type __a, match_flag_type __b)
+  {
+    return (match_flag_type)(static_cast<unsigned int>(__a)
+                               ^ static_cast<unsigned int>(__b));
+  }
+
+  constexpr inline match_flag_type
+  operator~(match_flag_type __a)
+  { return (match_flag_type)(~static_cast<unsigned int>(__a)); }
+
+  inline match_flag_type&
+  operator&=(match_flag_type& __a, match_flag_type __b)
+  { return __a = __a & __b; }
+
+  inline match_flag_type&
+  operator|=(match_flag_type& __a, match_flag_type __b)
+  { return __a = __a | __b; }
+
+  inline match_flag_type&
+  operator^=(match_flag_type& __a, match_flag_type __b)
+  { return __a = __a ^ __b; }
  
    //@}
  
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h

index 3df33e030245ef33e2daf022d724937bea19b2d3..b8e9266f910255db59078be4bc1b7592feb97d3b 100644 (file)
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -28,7 +28,11 @@
   *  Do not attempt to use it directly. @headername{regex}
   */
  
-// TODO: convert comments to doxygen format.
+// FIXME convert comments to doxygen format.
+
+// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
+// much more similar. Also, make grouping seperated. The
+// regex_constants::nosubs enables much more simpler execution.
  
  namespace std _GLIBCXX_VISIBILITY(default)
  {
@@ -57,55 +61,107 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      class _Executor
      {
      public:
+      typedef basic_regex<_CharT, _TraitsT>           _RegexT;
        typedef match_results<_BiIter, _Alloc>          _ResultsT;
        typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
        typedef regex_constants::match_flag_type        _FlagT;
+      typedef typename _TraitsT::char_class_type      _ClassT;
  
-      virtual
-      ~_Executor()
+    public:
+      _Executor(_BiIter         __begin,
+               _BiIter         __end,
+               _ResultsT&      __results,
+               const _RegexT&  __re,
+               _FlagT          __flags)
+      : _M_begin(__begin),
+      _M_end(__end),
+      _M_results(__results),
+      _M_re(__re),
+      _M_flags(__flags)
        { }
  
        // Set matched when string exactly match the pattern.
-      virtual bool
-      _M_match() = 0;
+      bool
+      _M_match()
+      {
+       _M_match_mode = true;
+       _M_init(_M_begin);
+       return _M_main();
+      }
  
        // Set matched when some prefix of the string matches the pattern.
-      virtual bool
-      _M_search() = 0;
-
-    protected:
-      typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
-      typedef typename _TraitsT::char_class_type      _ClassT;
+      bool
+      _M_search_from_first()
+      {
+       _M_match_mode = false;
+       _M_init(_M_begin);
+       return _M_main();
+      }
  
-      _Executor(_BiIter         __begin,
-               _BiIter         __end,
-               _ResultsT&      __results,
-               _FlagT          __flags,
-               _SizeT          __size,
-               const _TraitsT& __traits)
-      : _M_current(__begin), _M_begin(__begin), _M_end(__end),
-      _M_results(__results), _M_flags(__flags), _M_traits(__traits)
+      bool
+      _M_search()
        {
-       __size += 2;
-       _M_results.resize(__size);
-       for (_SizeT __i = 0; __i < __size; ++__i)
-         _M_results[__i].matched = false;
+       if (_M_flags & regex_constants::match_continuous)
+         return _M_search_from_first();
+       auto __cur = _M_begin;
+       do
+         {
+           _M_match_mode = false;
+           _M_init(__cur);
+           if (_M_main())
+             return true;
+         }
+       // Continue when __cur == _M_end
+       while (__cur++ != _M_end);
+       return false;
        }
  
        bool
-      _M_is_word(_CharT __ch)
+      _M_is_word(_CharT __ch) const
        {
         static const _CharT __s = 'w';
-       return _M_traits.isctype(__ch,
-                                _M_traits.lookup_classname(&__s, &__s+1));
+       return _M_re._M_traits.isctype
+         (__ch, _M_re._M_traits.lookup_classname(&__s, &__s+1));
+      }
+
+      bool
+      _M_at_begin() const
+      {
+       return _M_current == _M_begin
+         && !(_M_flags & (regex_constants::match_not_bol
+                          | regex_constants::match_prev_avail));
        }
  
+      bool
+      _M_at_end() const
+      {
+       return _M_current == _M_end
+         && !(_M_flags & regex_constants::match_not_eol);
+      }
+
+      bool
+      _M_word_boundry(_State<_CharT, _TraitsT> __state) const;
+
+      bool
+      _M_lookahead(_State<_CharT, _TraitsT> __state) const;
+
+    public:
+      virtual void
+      _M_init(_BiIter __cur) = 0;
+
+      virtual void
+      _M_set_start(_StateIdT __start) = 0;
+
+      virtual bool
+      _M_main() = 0;
+
        _BiIter         _M_current;
        const _BiIter   _M_begin;
        const _BiIter   _M_end;
-      _ResultsVec&    _M_results;
-      const _TraitsT& _M_traits;
-      _FlagT          _M_flags;
+      const _RegexT&  _M_re;
+      _ResultsT&      _M_results;
+      const _FlagT    _M_flags;
+      bool            _M_match_mode;
      };
  
    // A _DFSExecutor perform a DFS on given NFA and input string. At the very
@@ -128,61 +184,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      {
      public:
        typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
-      typedef _NFA<_CharT, _TraitsT>                       _RegexT;
+      typedef _NFA<_CharT, _TraitsT>                       _NFAT;
+      typedef typename _BaseT::_RegexT                     _RegexT;
        typedef typename _BaseT::_ResultsT                   _ResultsT;
        typedef typename _BaseT::_ResultsVec                 _ResultsVec;
-      typedef regex_constants::match_flag_type             _FlagT;
+      typedef typename _BaseT::_FlagT                      _FlagT;
  
+    public:
        _DFSExecutor(_BiIter         __begin,
                    _BiIter         __end,
                    _ResultsT&      __results,
-                  const _RegexT&  __nfa,
-                  const _TraitsT& __traits,
+                  const _RegexT&  __re,
                    _FlagT          __flags)
-      : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
-              __traits),
-       _M_traits(__traits), _M_nfa(__nfa), _M_cur_results(this->_M_results),
-       _M_start_state(__nfa._M_start())
+      : _BaseT(__begin, __end, __results, __re, __flags),
+      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+            (__re._M_automaton)),
+      _M_start_state(_M_nfa._M_start())
        { }
  
-      bool
-      _M_match()
+    private:
+      void
+      _M_init(_BiIter __cur)
        {
-       this->_M_current = this->_M_begin;
-       return _M_dfs<true>(_M_start_state);
+       _M_cur_results.resize(_M_nfa._M_sub_count() + 2);
+       this->_M_current = __cur;
        }
  
-      bool
-      _M_search_from_first()
-      {
-       this->_M_current = this->_M_begin;
-       return _M_dfs<false>(_M_start_state);
-      }
+      void
+      _M_set_start(_StateIdT __start)
+      { _M_start_state = __start; }
  
        bool
-      _M_search()
-      {
-       auto __cur = this->_M_begin;
-       do
-         {
-           this->_M_current = __cur;
-           if (_M_dfs<false>(_M_start_state))
-             return true;
-         }
-       // Continue when __cur == _M_end
-       while (__cur++ != this->_M_end);
-       return false;
-      }
+      _M_main()
+      { return _M_dfs(this->_M_start_state); }
  
-    private:
-      template<bool __match_mode>
-       bool
-       _M_dfs(_StateIdT __i);
+      bool
+      _M_dfs(_StateIdT __start);
  
        // To record current solution.
        _ResultsVec     _M_cur_results;
-      const _TraitsT& _M_traits;
-      const _RegexT&  _M_nfa;
+      const _NFAT&    _M_nfa;
        _StateIdT       _M_start_state;
      };
  
@@ -206,47 +247,57 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      {
      public:
        typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
-      typedef _NFA<_CharT, _TraitsT>                       _RegexT;
+      typedef _NFA<_CharT, _TraitsT>                       _NFAT;
+      typedef typename _BaseT::_RegexT                     _RegexT;
        typedef typename _BaseT::_ResultsT                   _ResultsT;
+      typedef typename _BaseT::_ResultsVec                 _ResultsVec;
+      typedef typename _BaseT::_FlagT                      _FlagT;
        // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
        // carefully work out how to compare to conflict matching states.
        //
        // A matching state is a pair(where, when); `where` is a NFA node; `when`
-      // is a _BiIter, indicating which char is the next to be mathed one.  Two
-      // matching states conflict means that they have equivalent `where` and
-      // `when`.
+      // is a _BiIter, indicating which char is the next to be matched. Two
+      // matching states conflict if they have equivalent `where` and `when`.
        //
-      // Now since we need to drop one and keep another, because at most one of
-      // them could be the final optimal solution. This behavior is affected by
+      // Now we need to drop one and keep another, because at most one of them
+      // could be the final optimal solution. This behavior is affected by
        // greedy policy.
        //
        // The definition of `greedy`:
        // For the sequence of quantifiers in NFA sorted by there start position,
-      // now maintain a vector in a matching state, with equal length to
+      // now maintain a vector in every matching state, with equal length to
        // quantifier seq, recording repeating times of every quantifier. Now to
        // compare two matching states, we just lexically compare these two
        // vectors. To win the compare(to survive), one matching state needs to
        // make its greedy quantifier count larger, and ungreedy quantifiers
        // count smaller.
        //
-      // In the implementation, we recorded negtive numbers for greedy
-      // quantifiers and positive numbers of ungreedy ones. Now a simple
+      // In the implementation, we recorded negtive counts for greedy
+      // quantifiers and positive counts of ungreedy ones. Now the implicit
        // operator<() for lexicographical_compare will emit the answer.
        //
        // When two vectors equal, it means the `where`, `when` and quantifier
-      // counts are identical, it indicates the same answer, so just return
+      // counts are identical, and indicates the same solution; so just return
        // false.
        struct _ResultsEntry
-      : private _BaseT::_ResultsVec
+      : private _ResultsVec
        {
        public:
         _ResultsEntry(unsigned int __res_sz, unsigned int __sz)
-       : _BaseT::_ResultsVec(__res_sz), _M_quant_keys(__sz)
+       : _ResultsVec(__res_sz), _M_quant_keys(__sz)
         { }
  
+       void
+       resize(unsigned int __n)
+       { _ResultsVec::resize(__n); }
+
+       unsigned int
+       size()
+       { return _ResultsVec::size(); }
+
         sub_match<_BiIter>&
         operator[](unsigned int __idx)
-       { return this->_BaseT::_ResultsVec::operator[](__idx); }
+       { return _ResultsVec::operator[](__idx); }
  
         bool
         operator<(const _ResultsEntry& __rhs) const
@@ -263,75 +314,47 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         _M_inc(unsigned int __idx, bool __neg)
         { _M_quant_keys[__idx] += __neg ? 1 : -1; }
  
-       typename _BaseT::_ResultsVec
+       _ResultsVec
         _M_get()
         { return *this; }
  
        public:
         std::vector<int> _M_quant_keys;
        };
-
        typedef std::unique_ptr<_ResultsEntry>               _ResultsPtr;
-      typedef regex_constants::match_flag_type             _FlagT;
  
+    public:
        _BFSExecutor(_BiIter         __begin,
                    _BiIter         __end,
                    _ResultsT&      __results,
-                  const _RegexT&  __nfa,
-                  const _TraitsT& __traits,
+                  const _RegexT&  __re,
                    _FlagT          __flags)
-      : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
-              __traits),
-       _M_nfa(__nfa),
-       _M_cur_results(nullptr),
-       _M_start_state(__nfa._M_start())
+      : _BaseT(__begin, __end, __results, __re, __flags),
+      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+            (__re._M_automaton)),
+      _M_start_state(_M_nfa._M_start())
        { }
  
-      bool
-      _M_match()
-      {
-       _M_init(this->_M_begin);
-       return _M_main_loop<true>();
-      }
-
-      bool
-      _M_search_from_first()
-      {
-       _M_init(this->_M_begin);
-       return _M_main_loop<false>();
-      }
-
-      bool
-      _M_search()
-      {
-       auto __cur = this->_M_begin;
-       do
-         {
-           _M_init(__cur);
-           if (_M_main_loop<false>())
-             return true;
-         }
-       // Continue when __cur == _M_end
-       while (__cur++ != this->_M_end);
-       return false;
-      }
-
      private:
        void
        _M_init(_BiIter __cur)
        {
-       _GLIBCXX_DEBUG_ASSERT(_M_start_state != _S_invalid_state_id);
+       _GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
         this->_M_current = __cur;
         _M_covered.clear();
-       _M_covered[_M_start_state] =
-         _ResultsPtr(new _ResultsEntry(this->_M_results.size(),
+       _ResultsVec& __res(this->_M_results);
+       _M_covered[this->_M_start_state] =
+         _ResultsPtr(new _ResultsEntry(__res.size(),
                                         _M_nfa._M_quant_count));
         _M_e_closure();
        }
  
-      template<bool __match_mode>
-       bool
-       _M_main_loop();
+      void
+      _M_set_start(_StateIdT __start)
+      { _M_start_state = __start; }
+
+      bool
+      _M_main();
  
        void
        _M_e_closure();
@@ -345,10 +368,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        std::map<_StateIdT, _ResultsPtr> _M_covered;
        // To record global optimal solution.
        _ResultsPtr                      _M_cur_results;
-      const _RegexT&                   _M_nfa;
+      const _NFAT&                     _M_nfa;
        _StateIdT                        _M_start_state;
      };
  
+  template<typename _BiIter, typename _Alloc,
+    typename _CharT, typename _TraitsT>
+    std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
+    __get_executor(_BiIter __b,
+                  _BiIter __e,
+                  match_results<_BiIter, _Alloc>& __m,
+                  const basic_regex<_CharT, _TraitsT>& __re,
+                  regex_constants::match_flag_type __flags);
+
   //@} regex-detail
  _GLIBCXX_END_NAMESPACE_VERSION
  } // namespace __detail
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc

index b110c5dc2f0ae5d35d1ada6c66e1a486cb7f1434..af2455b8a4e366605fd3b89894d586982e550357 100644 (file)
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -36,7 +36,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  
    template<typename _BiIter, typename _Alloc,
      typename _CharT, typename _TraitsT>
-  template<bool __match_mode>
      bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
      _M_dfs(_StateIdT __i)
      {
@@ -44,9 +43,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         // This is not that certain. Need deeper investigate.
         return false;
        auto& __current = this->_M_current;
-      auto& __begin = this->_M_begin;
-      auto& __end = this->_M_end;
-      auto& __results = _M_cur_results;
        const auto& __state = _M_nfa[__i];
        bool __ret = false;
        switch (__state._M_opcode)
@@ -54,129 +50,115 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         case _S_opcode_alternative:
           // Greedy or not, this is a question ;)
           if (!__state._M_neg)
-           __ret = _M_dfs<__match_mode>(__state._M_alt)
-             || _M_dfs<__match_mode>(__state._M_next);
+           __ret = _M_dfs(__state._M_alt)
+             || _M_dfs(__state._M_next);
           else
-           __ret = _M_dfs<__match_mode>(__state._M_next)
-             || _M_dfs<__match_mode>(__state._M_alt);
+           __ret = _M_dfs(__state._M_next)
+             || _M_dfs(__state._M_alt);
           break;
         case _S_opcode_subexpr_begin:
           // Here's the critical part: if there's nothing changed since last
           // visit, do NOT continue. This prevents the executor from get into
           // infinite loop when use "()*" to match "".
           //
-         // Every change on __results will be roll back after the recursion
-         // step finished.
-         if (!__results[__state._M_subexpr].matched
-             || __results[__state._M_subexpr].first != __current)
+         // Every change on _M_cur_results will be roll back after the
+         // recursion step finished.
+         if (!_M_cur_results[__state._M_subexpr].matched
+             || _M_cur_results[__state._M_subexpr].first != __current)
             {
               auto __back = __current;
-             __results[__state._M_subexpr].first = __current;
-             __ret = _M_dfs<__match_mode>(__state._M_next);
-             __results[__state._M_subexpr].first = __back;
+             _M_cur_results[__state._M_subexpr].first = __current;
+             __ret = _M_dfs(__state._M_next);
+             _M_cur_results[__state._M_subexpr].first = __back;
             }
           break;
         case _S_opcode_subexpr_end:
-         if (__results[__state._M_subexpr].second != __current
-             || __results[__state._M_subexpr].matched != true)
+         if (_M_cur_results[__state._M_subexpr].second != __current
+             || _M_cur_results[__state._M_subexpr].matched != true)
             {
-             auto __back = __results[__state._M_subexpr];
-             __results[__state._M_subexpr].second = __current;
-             __results[__state._M_subexpr].matched = true;
-             __ret = _M_dfs<__match_mode>(__state._M_next);
-             __results[__state._M_subexpr] = __back;
+             auto __back = _M_cur_results[__state._M_subexpr];
+             _M_cur_results[__state._M_subexpr].second = __current;
+             _M_cur_results[__state._M_subexpr].matched = true;
+             __ret = _M_dfs(__state._M_next);
+             _M_cur_results[__state._M_subexpr] = __back;
             }
           else
-           __ret = _M_dfs<__match_mode>(__state._M_next);
+           __ret = _M_dfs(__state._M_next);
           break;
         case _S_opcode_line_begin_assertion:
-         if (__current == __begin)
-           __ret = _M_dfs<__match_mode>(__state._M_next);
+         if (this->_M_at_begin())
+           __ret = _M_dfs(__state._M_next);
           break;
         case _S_opcode_line_end_assertion:
-         if (__current == __end)
-           __ret = _M_dfs<__match_mode>(__state._M_next);
+         if (this->_M_at_end())
+           __ret = _M_dfs(__state._M_next);
           break;
-         // By definition.
         case _S_opcode_word_boundry:
-           {
-             bool __ans = false;
-             if (__current == __begin && this->_M_is_word(*__current))
-               __ans = true;
-             else if (__current == __end && this->_M_is_word(*__current))
-               __ans = true;
-             else
-               {
-                 auto __pre = __current;
-                 --__pre;
-                 if (this->_M_is_word(*__current)
-                     != this->_M_is_word(*__pre))
-                   __ans = true;
-               }
-             if (__ans == !__state._M_neg)
-               __ret = _M_dfs<__match_mode>(__state._M_next);
-           }
+         if (this->_M_word_boundry(__state) == !__state._M_neg)
+           __ret = _M_dfs(__state._M_next);
           break;
           // Here __state._M_alt offers a single start node for a sub-NFA.
           // We recursivly invoke our algorithm to match the sub-NFA.
         case _S_opcode_subexpr_lookahead:
-           {
-             _ResultsT __m;
-             // FIXME Here's not necessarily a DFSExecutor. But we need to
-             // refactor the whole NFA to a recursive tree structure first.
-             _DFSExecutor __sub(this->_M_current,
-                                this->_M_end,
-                                __m,
-                                this->_M_nfa,
-                                this->_M_traits,
-                                this->_M_flags);
-             __sub._M_start_state = __state._M_alt;
-             if (__sub._M_search_from_first() == !__state._M_neg)
-               __ret = _M_dfs<__match_mode>(__state._M_next);
-           }
+         if (this->_M_lookahead(__state) == !__state._M_neg)
+           __ret = _M_dfs(__state._M_next);
           break;
         case _S_opcode_match:
-         if (__current != __end && __state._M_matches(*__current))
+         if (__current != this->_M_end && __state._M_matches(*__current))
             {
               ++__current;
-             __ret = _M_dfs<__match_mode>(__state._M_next);
+             __ret = _M_dfs(__state._M_next);
               --__current;
             }
           break;
-       // First fetch the matched result from __results as __submatch;
+       // First fetch the matched result from _M_cur_results as __submatch;
         // then compare it with
         // (__current, __current + (__submatch.second - __submatch.first))
         // If matched, keep going; else just return to try another state.
         case _S_opcode_backref:
           {
-           auto& __submatch = __results[__state._M_backref_index];
+           auto& __submatch = _M_cur_results[__state._M_backref_index];
             if (!__submatch.matched)
               break;
             auto __last = __current;
             for (auto __tmp = __submatch.first;
-                __last != __end && __tmp != __submatch.second;
+                __last != this->_M_end && __tmp != __submatch.second;
                  ++__tmp)
               ++__last;
-           if (_M_traits.transform(__submatch.first, __submatch.second)
-               == _M_traits.transform(__current, __last))
+           if (this->_M_re._M_traits.transform(__submatch.first,
+                                               __submatch.second)
+               == this->_M_re._M_traits.transform(__current, __last))
               if (__last != __current)
                 {
                   auto __backup = __current;
                   __current = __last;
-                 __ret = _M_dfs<__match_mode>(__state._M_next);
+                 __ret = _M_dfs(__state._M_next);
                   __current = __backup;
                 }
               else
-               __ret = _M_dfs<__match_mode>(__state._M_next);
+               __ret = _M_dfs(__state._M_next);
           }
           break;
         case _S_opcode_accept:
-         if (__match_mode)
-           __ret = __current == __end;
+         if (this->_M_match_mode)
+           __ret = __current == this->_M_end;
           else
             __ret = true;
+         if (__current == this->_M_begin
+             && (this->_M_flags & regex_constants::match_not_null))
+           __ret = false;
           if (__ret)
-           this->_M_results = __results;
+           {
+             _ResultsVec& __res(this->_M_results);
+             if (this->_M_re.flags() & regex_constants::nosubs)
+               {
+                 _M_cur_results.resize(3); // truncate
+                 __res.resize(3);
+               }
+             for (unsigned int __i = 0; __i < _M_cur_results.size(); ++__i)
+               if (_M_cur_results[__i].matched)
+                 __res[__i] = _M_cur_results[__i];
+           }
           break;
         default:
           _GLIBCXX_DEBUG_ASSERT(false);
@@ -186,23 +168,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  
    template<typename _BiIter, typename _Alloc,
      typename _CharT, typename _TraitsT>
-  template<bool __match_mode>
      bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
-    _M_main_loop()
+    _M_main()
      {
        bool __ret = false;
+      if (!this->_M_match_mode
+         && !(this->_M_flags & regex_constants::match_not_null))
+       __ret = _M_includes_some() || __ret;
        while (this->_M_current != this->_M_end)
         {
-         if (!__match_mode)
-           // To keep regex_search greedy, no "return true" here.
-           __ret = _M_includes_some() || __ret;
           _M_move();
           ++this->_M_current;
           _M_e_closure();
+         if (!this->_M_match_mode)
+           // To keep regex_search greedy, no "return true" here.
+           __ret = _M_includes_some() || __ret;
         }
-      __ret = _M_includes_some() || __ret;
+      if (this->_M_match_mode)
+       __ret = _M_includes_some();
        if (__ret)
-       this->_M_results = _M_cur_results->_M_get();
+       {
+         _ResultsVec& __res(this->_M_results);
+         if (this->_M_re.flags() & regex_constants::nosubs)
+           {
+             // truncate
+             _M_cur_results->resize(3);
+             __res.resize(3);
+           }
+         for (unsigned int __i = 0; __i < _M_cur_results->size(); ++__i)
+           if ((*_M_cur_results)[__i].matched)
+             __res[__i] = (*_M_cur_results)[__i];
+       }
        return __ret;
      }
  
@@ -211,11 +207,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
      _M_e_closure()
      {
-      auto& __current = this->_M_current;
        std::queue<_StateIdT> __q;
        std::vector<bool> __in_q(_M_nfa.size(), false);
-      auto& __begin = this->_M_begin;
-      auto& __end = this->_M_end;
+      auto& __current = this->_M_current;
  
        for (auto& __it : _M_covered)
         {
@@ -292,46 +286,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                 }
                 break;
               case _S_opcode_line_begin_assertion:
-               if (__current == __begin)
+               if (this->_M_at_begin())
                   __add_visited_state(__state._M_next);
                 break;
               case _S_opcode_line_end_assertion:
-               if (__current == __end)
+               if (this->_M_at_end())
                   __add_visited_state(__state._M_next);
                 break;
               case _S_opcode_word_boundry:
-                 {
-                   bool __ans = false;
-                   if (__current == __begin && this->_M_is_word(*__current))
-                     __ans = true;
-                   else if (__current == __end && this->_M_is_word(*__current))
-                     __ans = true;
-                   else
-                     {
-                       auto __pre = __current;
-                       --__pre;
-                       if (this->_M_is_word(*__current)
-                           != this->_M_is_word(*__pre))
-                         __ans = true;
-                     }
-                   if (__ans == !__state._M_neg)
-                     __add_visited_state(__state._M_next);
-                 }
+               if (this->_M_word_boundry(__state) == !__state._M_neg)
+                 __add_visited_state(__state._M_next);
                 break;
               case _S_opcode_subexpr_lookahead:
-                 {
-                   _ResultsT __m;
-                   // Same comment as in DFS.
-                   _BFSExecutor __sub(this->_M_current,
-                                      this->_M_end,
-                                      __m,
-                                      this->_M_nfa,
-                                      this->_M_traits,
-                                      this->_M_flags);
-                   __sub._M_start_state = __state._M_alt;
-                   if (__sub._M_search_from_first() == !__state._M_neg)
-                     __add_visited_state(__state._M_next);
-                 }
+               if (this->_M_lookahead(__state) == !__state._M_neg)
+                 __add_visited_state(__state._M_next);
                 break;
               case _S_opcode_match:
                 break;
@@ -395,6 +363,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        return __succ;
      }
  
+  // Return whether now is at some word boundry.
+  template<typename _BiIter, typename _Alloc,
+    typename _CharT, typename _TraitsT>
+    bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
+    _M_word_boundry(_State<_CharT, _TraitsT> __state) const
+    {
+      // By definition.
+      bool __ans = false;
+      auto __pre = _M_current;
+      --__pre;
+      if (!(_M_at_begin() && _M_at_end()))
+       if (_M_at_begin())
+         __ans = _M_is_word(*_M_current)
+           && !(_M_flags & regex_constants::match_not_bow);
+       else if (_M_at_end())
+         __ans = _M_is_word(*__pre)
+           && !(_M_flags & regex_constants::match_not_eow);
+       else
+         __ans = _M_is_word(*_M_current)
+           != _M_is_word(*__pre);
+      return __ans;
+    }
+
+  // Return whether now match the given sub-NFA.
+  template<typename _BiIter, typename _Alloc,
+    typename _CharT, typename _TraitsT>
+    bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
+    _M_lookahead(_State<_CharT, _TraitsT> __state) const
+    {
+      auto __sub = __get_executor(this->_M_current,
+                                 this->_M_end,
+                                 this->_M_results,
+                                 this->_M_re,
+                                 this->_M_flags);
+      __sub->_M_set_start(__state._M_alt);
+      return __sub->_M_search_from_first();
+    }
+
    template<typename _BiIter, typename _Alloc,
      typename _CharT, typename _TraitsT>
      std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
@@ -411,10 +417,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
         (__re._M_automaton);
        if (__p->_M_has_backref)
-       return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p,
-                                             __re._M_traits, __flags));
-      return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p,
-                                           __re._M_traits, __flags));
+       return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
+      return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
      }
  
  _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h

index 824d6ce1081513387cb8135985065a6d9ddac249..09a18f634a09a7b3adebaa389b87431375243fac 100644 (file)
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -68,7 +68,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         _S_token_backref,
         _S_token_subexpr_begin,
         _S_token_subexpr_no_group_begin,
-       _S_token_subexpr_lookahead_begin,
+       _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
         _S_token_subexpr_end,
         _S_token_bracket_begin,
         _S_token_bracket_neg_begin,
@@ -86,7 +86,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         _S_token_ungreedy,
         _S_token_line_begin,
         _S_token_line_end,
-       _S_token_word_bound,
+       _S_token_word_bound, // neg if _M_value[0] == 'n'
         _S_token_comma,
         _S_token_dup_count,
         _S_token_eof,
@@ -174,7 +174,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _StringT                      _M_value;
        bool                          _M_at_bracket_start;
      public:
-      // TODO: make them static when this file is stable.
+      // FIXME: make them static when this file is stable.
        const std::map<char, _TokenT> _M_token_map;
        const std::map<char, char>    _M_ecma_escape_map;
        const std::map<char, char>    _M_awk_escape_map;
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc

index 4b66157278b1691f4027838c056aa870580ed0cb..abdbcc64f1f3b4d9d41b728268f3e2b581a75b0f 100644 (file)
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -28,7 +28,7 @@
   *  Do not attempt to use it directly. @headername{regex}
   */
  
-// TODO make comments doxygen format.
+// FIXME make comments doxygen format.
  
  // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
  // and awk
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc

index 82e99058743cd2aee8ebbd62a8a438448ff4a9d5..3064b3b26e4bbe68e34b148629422aa5792f383c 100644 (file)
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc
@@ -1,5 +1,4 @@
  // { dg-options "-std=gnu++11" }
-// { dg-do run { xfail *-*-* } }
  
  //
  // 2013-09-14  Tim Shen <timshen91@gmail.com>
@@ -54,22 +53,37 @@ test01()
    string sol[] =
      {
        "This",
+      "",
        "is",
+      "",
        "a",
+      "",
        "regular",
+      "",
        "expression",
+      "",
      };
  
    regex re("\\b\\w*\\b");
    int i = 0;
    for (auto it = sregex_iterator(s.begin(), s.end(), re);
-       it != sregex_iterator() && i < 5;
+       it != sregex_iterator();
         ++it)
      {
        string s((*it)[0].first, (*it)[0].second);
        VERIFY(s == sol[i++]);
      }
-  VERIFY(i == 5);
+  VERIFY(i == 10);
+
+  {
+    cmatch m;
+    regex re("(?=(as)df)as(df)");
+    regex_search("asdf", m, re);
+    VERIFY(m.size() == 3);
+    VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
+    VERIFY(m[1].matched && string(m[1].first, m[1].second) == "as");
+    VERIFY(m[2].matched && string(m[2].first, m[2].second) == "df");
+  }
  }
  
  int
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc

new file mode 100644 (file)

index 0000000..4be406c
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc
@@ -0,0 +1,71 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-09-18  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.3 regex_search
+// Tests ECMAScript flags.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  cmatch m;
+  regex re("((as)(df))", regex_constants::ECMAScript | regex_constants::nosubs);
+  VERIFY(regex_search("asdf", m, re));
+  VERIFY(m.size() == 1);
+  VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
+
+  VERIFY( regex_search("a", regex("^a")));
+  VERIFY(!regex_search("a", regex("^a"), regex_constants::match_not_bol));
+  VERIFY( regex_search("a", regex("a$")));
+  VERIFY(!regex_search("a", regex("a$"), regex_constants::match_not_eol));
+  VERIFY( regex_search("a", regex("\\ba")));
+  VERIFY(!regex_search("a", regex("\\ba"), regex_constants::match_not_bow));
+  VERIFY( regex_search("a", regex("a\\b")));
+  VERIFY(!regex_search("a", regex("a\\b"), regex_constants::match_not_eow));
+  VERIFY( regex_search("", regex("")));
+  VERIFY(!regex_search("", regex(""), regex_constants::match_not_null));
+  VERIFY( regex_search("", regex("^$")));
+  VERIFY(!regex_search("", regex("^$"), regex_constants::match_not_null));
+  VERIFY( regex_search("aaa", m, regex("a*?"),
+                      regex_constants::match_not_null));
+  VERIFY(m[0].matched && string(m[0].first, m[0].second) == "a");
+  VERIFY( regex_search("asdf", regex("sdf")));
+  VERIFY(!regex_search("asdf", regex("sdf"),
+                      regex_constants::match_continuous));
+  VERIFY( regex_search(" a"+1, regex("\\ba"),
+                      regex_constants::match_prev_avail));
+  VERIFY( regex_search("ba"+1, regex("\\Ba"),
+                      regex_constants::match_prev_avail));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/char/string_position_01.cc b/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/char/string_position_01.cc

index 75ef0584b8ab8a24e5bbb80ff4bb6d5dd3c69140..978565021ccb94ec30fca6def9a263d587fca93a 100644 (file)
--- a/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/char/string_position_01.cc
+++ b/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/char/string_position_01.cc
@@ -1,5 +1,4 @@
  // { dg-options "-std=gnu++11" }
-// { dg-do run { xfail *-*-* } }
  
  //
  // 2013-07-25  Tim Shen <timshen91@gmail.com>
diff --git a/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc b/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc

index cd2c68e33eeb4a6d6c6f9da12c81d99b84a2ad7b..9cb96f7a162976ab70c63971fbcba18ca1204a68 100644 (file)
--- a/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc
+++ b/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc
@@ -1,6 +1,5 @@
  // { dg-options "-std=gnu++11" }
  // { dg-require-namedlocale "en_US.UTF-8" }
-// { dg-do run { xfail *-*-* } }
  
  //
  // 2013-09-05  Tim Shen <timshen91@gmail.com>
@@ -42,13 +41,19 @@ test01()
  
    re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
  
-  std::wsregex_iterator p(str2.begin(), str2.end(), re2);
-  auto a = p;
-  ++p;
-  VERIFY(a != p);
-  //for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
-  //    p != std::wsregex_iterator{}; ++p)
-  //  std::wcout << (*p)[1] << std::endl;
+  std::wstring sol[] =
+    {
+      L"ä\u2009Ä",
+      L"\u2009",
+      L"ö\u2009Ö",
+      L"\u2009",
+      L"ü\u2009Ü",
+      L"",
+    };
+  int i = 0;
+  for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
+      p != std::wsregex_iterator{}; ++p)
+    VERIFY(std::wstring((*p)[1].first, (*p)[1].second) == sol[i++]);
  }
  
  int
author	Tim Shen <timshen91@gmail.com>
	Wed, 18 Sep 2013 15:56:20 +0000 (15:56 +0000)
committer	Tim Shen <timshen@gcc.gnu.org>
	Wed, 18 Sep 2013 15:56:20 +0000 (15:56 +0000)
libstdc++-v3/ChangeLog		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex.h		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_compiler.tcc		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_constants.h		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_executor.h		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_executor.tcc		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_scanner.h		patch \| blob \| blame \| history
libstdc++-v3/include/bits/regex_scanner.tcc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc	[new file with mode: 0644]	patch \| blob
libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/char/string_position_01.cc		patch \| blob \| blame \| history
libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc		patch \| blob \| blame \| history