]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
regex.h: Add friend classes.
authorTim Shen <timshen91@gmail.com>
Wed, 18 Sep 2013 15:56:20 +0000 (15:56 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Wed, 18 Sep 2013 15:56:20 +0000 (15:56 +0000)
2013-09-18  Tim Shen  <timshen91@gmail.com>

* include/bits/regex.h: Add friend classes.
(match_results<>::position, regex_iterator<>::operator++):
Implement position specification in regex_iterator.
(regex_match<>, regex_search<>):
Move match_results initializations to these function. Remove `todo`.
* include/bits/regex_compiler.tcc:
(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
* include/bits/regex_constants.h:
Fix indentation. Change match_flag_type to enum type.
* include/bits/regex_executor.h:
Merge identical code to the base class _Executor.
Support flags in regex_constants.
* include/bits/regex_executor.tcc: Likewise.
* include/bits/regex_scanner.h: Add comments.
* include/bits/regex_scanner.tcc: Same.
* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
Add a testcase.
* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
* testsuite/28_regex/iterators/regex_iterator/char/
string_position_01.cc: Remove `xfail`.
* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
Remove `xfail` and make the case really work.

From-SVN: r202706

12 files changed:
libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex.h
libstdc++-v3/include/bits/regex_compiler.tcc
libstdc++-v3/include/bits/regex_constants.h
libstdc++-v3/include/bits/regex_executor.h
libstdc++-v3/include/bits/regex_executor.tcc
libstdc++-v3/include/bits/regex_scanner.h
libstdc++-v3/include/bits/regex_scanner.tcc
libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc
libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc [new file with mode: 0644]
libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/char/string_position_01.cc
libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc

index 5758e22acb7a8a229127a269b4fd3ad4f6a18b7d..cf411e6c96349758db3d2f181aea9fbf7203e2d8 100644 (file)
@@ -1,3 +1,28 @@
+2013-09-18  Tim Shen  <timshen91@gmail.com>
+
+       * include/bits/regex.h: Add friend classes.
+       (match_results<>::position, regex_iterator<>::operator++):
+       Implement position specification in regex_iterator.
+       (regex_match<>, regex_search<>):
+       Move match_results initializations to these function. Remove `todo`.
+       * include/bits/regex_compiler.tcc:
+       (_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
+       * include/bits/regex_constants.h:
+       Fix indentation. Change match_flag_type to enum type.
+       * include/bits/regex_executor.h:
+       Merge identical code to the base class _Executor.
+       Support flags in regex_constants.
+       * include/bits/regex_executor.tcc: Likewise.
+       * include/bits/regex_scanner.h: Add comments.
+       * include/bits/regex_scanner.tcc: Same.
+       * testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
+       Add a testcase.
+       * testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
+       * testsuite/28_regex/iterators/regex_iterator/char/
+       string_position_01.cc: Remove `xfail`.
+       * testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
+       Remove `xfail` and make the case really work.
+
 2013-09-18  Paolo Carlini  <paolo.carlini@oracle.com>
 
        * testsuite/performance/25_algorithms/search_n.cc: Fix typo.
index 659bee13120819e28ffc0142347149eb592baa7b..9d1438aab2391b6883492336d4e5c10fdb99ce58 100644 (file)
@@ -1004,6 +1004,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                     const basic_regex<_Cp, _Rp>&,
                     regex_constants::match_flag_type);
 
+      template<typename, typename, typename, typename>
+       friend class __detail::_Executor;
+
+      template<typename, typename, typename, typename>
+       friend class __detail::_DFSExecutor;
+
+      template<typename, typename, typename, typename>
+       friend class __detail::_BFSExecutor;
+
       flag_type     _M_flags;
       _Rx_traits    _M_traits;
       _AutomatonPtr _M_automaton;
@@ -1783,21 +1792,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        */
       explicit
       match_results(const _Alloc& __a = _Alloc())
-      : _Base_type(__a)
+      : _Base_type(__a), _M_in_iterator(false)
       { }
 
       /**
        * @brief Copy constructs a %match_results.
        */
       match_results(const match_results& __rhs)
-      : _Base_type(__rhs)
+      : _Base_type(__rhs), _M_in_iterator(false)
       { }
 
       /**
        * @brief Move constructs a %match_results.
        */
       match_results(match_results&& __rhs) noexcept
-      : _Base_type(std::move(__rhs))
+      : _Base_type(std::move(__rhs)), _M_in_iterator(false)
       { }
 
       /**
@@ -1905,8 +1914,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       difference_type
       position(size_type __sub = 0) const
       {
-       return __sub < size() ? std::distance(this->prefix().first,
-                                             (*this)[__sub].first) : -1;
+       // [28.12.1.4.5]
+       if (_M_in_iterator)
+         return __sub < size() ? std::distance(_M_begin,
+                                               (*this)[__sub].first) : -1;
+       else
+         return __sub < size() ? std::distance(this->prefix().first,
+                                               (*this)[__sub].first) : -1;
       }
 
       /**
@@ -2106,6 +2120,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       template<typename, typename, typename, typename>
        friend class __detail::_BFSExecutor;
 
+      template<typename, typename, typename>
+       friend class regex_iterator;
+
       template<typename _Bp, typename _Ap,
        typename _Ch_type, typename _Rx_traits>
        friend bool
@@ -2121,6 +2138,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                     const basic_regex<_Ch_type,
                     _Rx_traits>&,
                     regex_constants::match_flag_type);
+
+      _Bi_iter _M_begin;
+      bool     _M_in_iterator;
     };
 
   typedef match_results<const char*>             cmatch;
@@ -2200,8 +2220,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * @retval false Otherwise.
    *
    * @throws an exception of type regex_error.
-   *
-   * @todo Implement this function.
    */
   template<typename _Bi_iter, typename _Alloc,
           typename _Ch_type, typename _Rx_traits>
@@ -2215,6 +2233,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       if (__re._M_automaton == nullptr)
        return false;
+
+      auto __size = __re._M_automaton->_M_sub_count();
+      __size += 2;
+      __m.resize(__size);
+      for (decltype(__size) __i = 0; __i < __size; ++__i)
+       __m.at(__i).matched = false;
+
       if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
        {
          for (auto __it : __m)
@@ -2360,8 +2385,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    *               undefined.
    *
    * @throws an exception of type regex_error.
-   *
-   * @todo Implement this function.
    */
   template<typename _Bi_iter, typename _Alloc,
           typename _Ch_type, typename _Rx_traits>
@@ -2374,6 +2397,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
       if (__re._M_automaton == nullptr)
        return false;
+
+      auto __size = __re._M_automaton->_M_sub_count();
+      __size += 2;
+      __m.resize(__size);
+      for (decltype(__size) __i = 0; __i < __size; ++__i)
+       __m.at(__i).matched = false;
+
       if (__detail::__get_executor(__first, __last, __m, __re, __flags)
          ->_M_search())
        {
@@ -2677,7 +2707,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
     operator++()
     {
-      // FIXME: In all cases in which the call to regex_search returns true,
+      // In all cases in which the call to regex_search returns true,
       // match.prefix().first shall be equal to the previous value of
       // match[0].second, and for each index i in the half-open range
       // [0, match.size()) for which match[i].matched is true,
@@ -2697,12 +2727,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags
                                 | regex_constants::match_not_null
                                 | regex_constants::match_continuous))
-                 return *this;
+                 {
+                   _M_match._M_in_iterator = true;
+                   _M_match._M_begin = _M_begin;
+                   return *this;
+                 }
                else
                  ++__start;
              }
          _M_flags |= regex_constants::match_prev_avail;
-         if (!regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
+         if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
+           {
+             _M_match._M_in_iterator = true;
+             _M_match._M_begin = _M_begin;
+           }
+         else
            _M_match = value_type();
        }
       return *this;
index 8dc779b68e176b51912b8ea89248412ba52c7e35..7f9a19af2d96aa0491a466d4e01cda93ab39f677 100644 (file)
@@ -28,7 +28,7 @@
  *  Do not attempt to use it directly. @headername{regex}
  */
 
-// TODO make comments doxygen format.
+// FIXME make comments doxygen format.
 
 // This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
 // (http://swtch.com/~rsc/regexp/regexp1.html"),
@@ -223,16 +223,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                if (__n < 0)
                  __throw_regex_error(regex_constants::error_badbrace);
                auto __end = _M_nfa._M_insert_dummy();
+               // _M_alt is the "match more" branch, and _M_next is the
+               // "match less" one. Switch _M_alt and _M_next of all created
+               // nodes. This is a hacking but IMO works well.
+               std::stack<_StateIdT> __stack;
                for (int __i = 0; __i < __n; ++__i)
                  {
                    auto __tmp = __r._M_clone();
-                   __e._M_append
-                     (_StateSeqT(_M_nfa,
-                                 _M_nfa._M_insert_alt(__tmp._M_start,
-                                                      __end, __neg),
-                                 __tmp._M_end));
+                   auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
+                                                     __end, __neg);
+                   __stack.push(__alt);
+                   __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
                  }
                __e._M_append(__end);
+               while (!__stack.empty())
+                 {
+                   auto& __tmp = _M_nfa[__stack.top()];
+                   __stack.pop();
+                   swap(__tmp._M_next, __tmp._M_alt);
+                 }
              }
            else // {3,}
              {
index 10b962ad21a2f655817236b42752b4a2e25765c5..94c25e531b32360d8c4b24b214303a901f62bad7 100644 (file)
@@ -52,19 +52,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    */
   //@{
   enum __syntax_option
-    {
-      _S_icase,
-      _S_nosubs,
-      _S_optimize,
-      _S_collate,
-      _S_ECMAScript,
-      _S_basic,
-      _S_extended,
-      _S_awk,
-      _S_grep,
-      _S_egrep,
-      _S_syntax_last
-    };
+  {
+    _S_icase,
+    _S_nosubs,
+    _S_optimize,
+    _S_collate,
+    _S_ECMAScript,
+    _S_basic,
+    _S_extended,
+    _S_awk,
+    _S_grep,
+    _S_egrep,
+    _S_syntax_last
+  };
 
   /**
    * @brief This is a bitmask type indicating how to interpret the regex.
@@ -211,20 +211,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   //@{
 
   enum __match_flag
-    {
-      _S_not_bol,
-      _S_not_eol,
-      _S_not_bow,
-      _S_not_eow,
-      _S_any,
-      _S_not_null,
-      _S_continuous,
-      _S_prev_avail,
-      _S_sed,
-      _S_no_copy,
-      _S_first_only,
-      _S_match_flag_last
-    };
+  {
+    _S_not_bol,
+    _S_not_eol,
+    _S_not_bow,
+    _S_not_eow,
+    _S_any,
+    _S_not_null,
+    _S_continuous,
+    _S_prev_avail,
+    _S_sed,
+    _S_no_copy,
+    _S_first_only,
+    _S_match_flag_last
+  };
 
   /**
    * @brief This is a bitmask type indicating regex matching rules.
@@ -233,110 +233,148 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    * perform bitwise operations on these values and expect the right thing to
    * happen.
    */
-  typedef std::bitset<_S_match_flag_last> match_flag_type;
+  enum match_flag_type : unsigned int
+  {
+    /**
+     * The default matching rules.
+     */
+    match_default     = 0,
 
-  /**
-   * The default matching rules.
-   */
-  constexpr match_flag_type match_default     = 0;
+    /**
+     * The first character in the sequence [first, last) is treated as though it
+     * is not at the beginning of a line, so the character (^) in the regular
+     * expression shall not match [first, first).
+     */
+    match_not_bol     = 1 << _S_not_bol,
 
-  /**
-   * The first character in the sequence [first, last) is treated as though it
-   * is not at the beginning of a line, so the character (^) in the regular
-   * expression shall not match [first, first).
-   */
-  constexpr match_flag_type match_not_bol     = 1 << _S_not_bol;
+    /**
+     * The last character in the sequence [first, last) is treated as though it
+     * is not at the end of a line, so the character ($) in the regular
+     * expression shall not match [last, last).
+     */
+    match_not_eol     = 1 << _S_not_eol,
 
-  /**
-   * The last character in the sequence [first, last) is treated as though it
-   * is not at the end of a line, so the character ($) in the regular
-   * expression shall not match [last, last).
-   */
-  constexpr match_flag_type match_not_eol     = 1 << _S_not_eol;
+    /**
+     * The expression \\b is not matched against the sub-sequence
+     * [first,first).
+     */
+    match_not_bow     = 1 << _S_not_bow,
 
-  /**
-   * The expression \\b is not matched against the sub-sequence
-   * [first,first).
-   */
-  constexpr match_flag_type match_not_bow     = 1 << _S_not_bow;
+    /**
+     * The expression \\b should not be matched against the sub-sequence
+     * [last,last).
+     */
+    match_not_eow     = 1 << _S_not_eow,
 
-  /**
-   * The expression \\b should not be matched against the sub-sequence
-   * [last,last).
-   */
-  constexpr match_flag_type match_not_eow     = 1 << _S_not_eow;
+    /**
+     * If more than one match is possible then any match is an acceptable
+     * result.
+     */
+    match_any         = 1 << _S_any,
 
-  /**
-   * If more than one match is possible then any match is an acceptable
-   * result.
-   */
-  constexpr match_flag_type match_any         = 1 << _S_any;
+    /**
+     * The expression does not match an empty sequence.
+     */
+    match_not_null    = 1 << _S_not_null,
 
-  /**
-   * The expression does not match an empty sequence.
-   */
-  constexpr match_flag_type match_not_null    = 1 << _S_not_null;
+    /**
+     * The expression only matches a sub-sequence that begins at first .
+     */
+    match_continuous  = 1 << _S_continuous,
 
-  /**
-   * The expression only matches a sub-sequence that begins at first .
-   */
-  constexpr match_flag_type match_continuous  = 1 << _S_continuous;
+    /**
+     * --first is a valid iterator position.  When this flag is set then the
+     * flags match_not_bol and match_not_bow are ignored by the regular
+     * expression algorithms 28.11 and iterators 28.12.
+     */
+    match_prev_avail  = 1 << _S_prev_avail,
 
-  /**
-   * --first is a valid iterator position.  When this flag is set then the
-   * flags match_not_bol and match_not_bow are ignored by the regular
-   * expression algorithms 28.11 and iterators 28.12.
-   */
-  constexpr match_flag_type match_prev_avail  = 1 << _S_prev_avail;
+    /**
+     * When a regular expression match is to be replaced by a new string, the
+     * new string is constructed using the rules used by the ECMAScript replace
+     * function in ECMA- 262 [Ecma International, ECMAScript Language
+     * Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
+     * String.prototype.replace. In addition, during search and replace
+     * operations all non-overlapping occurrences of the regular expression
+     * are located and replaced, and sections of the input that did not match
+     * the expression are copied unchanged to the output string.
+     *
+     * Format strings (from ECMA-262 [15.5.4.11]):
+     * @li $$  The dollar-sign itself ($)
+     * @li $&  The matched substring.
+     * @li $`  The portion of @a string that precedes the matched substring.
+     *         This would be match_results::prefix().
+     * @li $'  The portion of @a string that follows the matched substring.
+     *         This would be match_results::suffix().
+     * @li $n  The nth capture, where n is in [1,9] and $n is not followed by a
+     *         decimal digit.  If n <= match_results::size() and the nth capture
+     *         is undefined, use the empty string instead.  If n >
+     *         match_results::size(), the result is implementation-defined.
+     * @li $nn The nnth capture, where nn is a two-digit decimal number on
+     *         [01, 99].  If nn <= match_results::size() and the nth capture is
+     *         undefined, use the empty string instead. If
+     *         nn > match_results::size(), the result is implementation-defined.
+     */
+    format_default    = 0,
 
-  /**
-   * When a regular expression match is to be replaced by a new string, the
-   * new string is constructed using the rules used by the ECMAScript replace
-   * function in ECMA- 262 [Ecma International, ECMAScript Language
-   * Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
-   * String.prototype.replace. In addition, during search and replace
-   * operations all non-overlapping occurrences of the regular expression
-   * are located and replaced, and sections of the input that did not match
-   * the expression are copied unchanged to the output string.
-   *
-   * Format strings (from ECMA-262 [15.5.4.11]):
-   * @li $$  The dollar-sign itself ($)
-   * @li $&  The matched substring.
-   * @li $`  The portion of @a string that precedes the matched substring.
-   *         This would be match_results::prefix().
-   * @li $'  The portion of @a string that follows the matched substring.
-   *         This would be match_results::suffix().
-   * @li $n  The nth capture, where n is in [1,9] and $n is not followed by a
-   *         decimal digit.  If n <= match_results::size() and the nth capture
-   *         is undefined, use the empty string instead.  If n >
-   *         match_results::size(), the result is implementation-defined.
-   * @li $nn The nnth capture, where nn is a two-digit decimal number on
-   *         [01, 99].  If nn <= match_results::size() and the nth capture is
-   *         undefined, use the empty string instead. If
-   *         nn > match_results::size(), the result is implementation-defined.
-   */
-  constexpr match_flag_type format_default    = 0;
+    /**
+     * When a regular expression match is to be replaced by a new string, the
+     * new string is constructed using the rules used by the POSIX sed utility
+     * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
+     * Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
+     */
+    format_sed        = 1 << _S_sed,
 
-  /**
-   * When a regular expression match is to be replaced by a new string, the
-   * new string is constructed using the rules used by the POSIX sed utility
-   * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
-   * Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
-   */
-  constexpr match_flag_type format_sed        = 1 << _S_sed;
+    /**
+     * During a search and replace operation, sections of the character
+     * container sequence being searched that do not match the regular
+     * expression shall not be copied to the output string.
+     */
+    format_no_copy    = 1 << _S_no_copy,
 
-  /**
-   * During a search and replace operation, sections of the character
-   * container sequence being searched that do not match the regular
-   * expression shall not be copied to the output string.
-   */
-  constexpr match_flag_type format_no_copy    = 1 << _S_no_copy;
+    /**
+     * When specified during a search and replace operation, only the first
+     * occurrence of the regular expression shall be replaced.
+     */
+    format_first_only = 1 << _S_first_only,
+  };
 
-  /**
-   * When specified during a search and replace operation, only the first
-   * occurrence of the regular expression shall be replaced.
-   */
-  constexpr match_flag_type format_first_only = 1 << _S_first_only;
+  constexpr inline match_flag_type
+  operator&(match_flag_type __a, match_flag_type __b)
+  {
+    return (match_flag_type)(static_cast<unsigned int>(__a)
+                               & static_cast<unsigned int>(__b));
+  }
+
+  constexpr inline match_flag_type
+  operator|(match_flag_type __a, match_flag_type __b)
+  {
+    return (match_flag_type)(static_cast<unsigned int>(__a)
+                               | static_cast<unsigned int>(__b));
+  }
+
+  constexpr inline match_flag_type
+  operator^(match_flag_type __a, match_flag_type __b)
+  {
+    return (match_flag_type)(static_cast<unsigned int>(__a)
+                               ^ static_cast<unsigned int>(__b));
+  }
+
+  constexpr inline match_flag_type
+  operator~(match_flag_type __a)
+  { return (match_flag_type)(~static_cast<unsigned int>(__a)); }
+
+  inline match_flag_type&
+  operator&=(match_flag_type& __a, match_flag_type __b)
+  { return __a = __a & __b; }
+
+  inline match_flag_type&
+  operator|=(match_flag_type& __a, match_flag_type __b)
+  { return __a = __a | __b; }
+
+  inline match_flag_type&
+  operator^=(match_flag_type& __a, match_flag_type __b)
+  { return __a = __a ^ __b; }
 
   //@}
 
index 3df33e030245ef33e2daf022d724937bea19b2d3..b8e9266f910255db59078be4bc1b7592feb97d3b 100644 (file)
  *  Do not attempt to use it directly. @headername{regex}
  */
 
-// TODO: convert comments to doxygen format.
+// FIXME convert comments to doxygen format.
+
+// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
+// much more similar. Also, make grouping seperated. The
+// regex_constants::nosubs enables much more simpler execution.
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
@@ -57,55 +61,107 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     class _Executor
     {
     public:
+      typedef basic_regex<_CharT, _TraitsT>           _RegexT;
       typedef match_results<_BiIter, _Alloc>          _ResultsT;
       typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
       typedef regex_constants::match_flag_type        _FlagT;
+      typedef typename _TraitsT::char_class_type      _ClassT;
 
-      virtual
-      ~_Executor()
+    public:
+      _Executor(_BiIter         __begin,
+               _BiIter         __end,
+               _ResultsT&      __results,
+               const _RegexT&  __re,
+               _FlagT          __flags)
+      : _M_begin(__begin),
+      _M_end(__end),
+      _M_results(__results),
+      _M_re(__re),
+      _M_flags(__flags)
       { }
 
       // Set matched when string exactly match the pattern.
-      virtual bool
-      _M_match() = 0;
+      bool
+      _M_match()
+      {
+       _M_match_mode = true;
+       _M_init(_M_begin);
+       return _M_main();
+      }
 
       // Set matched when some prefix of the string matches the pattern.
-      virtual bool
-      _M_search() = 0;
-
-    protected:
-      typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
-      typedef typename _TraitsT::char_class_type      _ClassT;
+      bool
+      _M_search_from_first()
+      {
+       _M_match_mode = false;
+       _M_init(_M_begin);
+       return _M_main();
+      }
 
-      _Executor(_BiIter         __begin,
-               _BiIter         __end,
-               _ResultsT&      __results,
-               _FlagT          __flags,
-               _SizeT          __size,
-               const _TraitsT& __traits)
-      : _M_current(__begin), _M_begin(__begin), _M_end(__end),
-      _M_results(__results), _M_flags(__flags), _M_traits(__traits)
+      bool
+      _M_search()
       {
-       __size += 2;
-       _M_results.resize(__size);
-       for (_SizeT __i = 0; __i < __size; ++__i)
-         _M_results[__i].matched = false;
+       if (_M_flags & regex_constants::match_continuous)
+         return _M_search_from_first();
+       auto __cur = _M_begin;
+       do
+         {
+           _M_match_mode = false;
+           _M_init(__cur);
+           if (_M_main())
+             return true;
+         }
+       // Continue when __cur == _M_end
+       while (__cur++ != _M_end);
+       return false;
       }
 
       bool
-      _M_is_word(_CharT __ch)
+      _M_is_word(_CharT __ch) const
       {
        static const _CharT __s = 'w';
-       return _M_traits.isctype(__ch,
-                                _M_traits.lookup_classname(&__s, &__s+1));
+       return _M_re._M_traits.isctype
+         (__ch, _M_re._M_traits.lookup_classname(&__s, &__s+1));
+      }
+
+      bool
+      _M_at_begin() const
+      {
+       return _M_current == _M_begin
+         && !(_M_flags & (regex_constants::match_not_bol
+                          | regex_constants::match_prev_avail));
       }
 
+      bool
+      _M_at_end() const
+      {
+       return _M_current == _M_end
+         && !(_M_flags & regex_constants::match_not_eol);
+      }
+
+      bool
+      _M_word_boundry(_State<_CharT, _TraitsT> __state) const;
+
+      bool
+      _M_lookahead(_State<_CharT, _TraitsT> __state) const;
+
+    public:
+      virtual void
+      _M_init(_BiIter __cur) = 0;
+
+      virtual void
+      _M_set_start(_StateIdT __start) = 0;
+
+      virtual bool
+      _M_main() = 0;
+
       _BiIter         _M_current;
       const _BiIter   _M_begin;
       const _BiIter   _M_end;
-      _ResultsVec&    _M_results;
-      const _TraitsT& _M_traits;
-      _FlagT          _M_flags;
+      const _RegexT&  _M_re;
+      _ResultsT&      _M_results;
+      const _FlagT    _M_flags;
+      bool            _M_match_mode;
     };
 
   // A _DFSExecutor perform a DFS on given NFA and input string. At the very
@@ -128,61 +184,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
     public:
       typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
-      typedef _NFA<_CharT, _TraitsT>                       _RegexT;
+      typedef _NFA<_CharT, _TraitsT>                       _NFAT;
+      typedef typename _BaseT::_RegexT                     _RegexT;
       typedef typename _BaseT::_ResultsT                   _ResultsT;
       typedef typename _BaseT::_ResultsVec                 _ResultsVec;
-      typedef regex_constants::match_flag_type             _FlagT;
+      typedef typename _BaseT::_FlagT                      _FlagT;
 
+    public:
       _DFSExecutor(_BiIter         __begin,
                   _BiIter         __end,
                   _ResultsT&      __results,
-                  const _RegexT&  __nfa,
-                  const _TraitsT& __traits,
+                  const _RegexT&  __re,
                   _FlagT          __flags)
-      : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
-              __traits),
-       _M_traits(__traits), _M_nfa(__nfa), _M_cur_results(this->_M_results),
-       _M_start_state(__nfa._M_start())
+      : _BaseT(__begin, __end, __results, __re, __flags),
+      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+            (__re._M_automaton)),
+      _M_start_state(_M_nfa._M_start())
       { }
 
-      bool
-      _M_match()
+    private:
+      void
+      _M_init(_BiIter __cur)
       {
-       this->_M_current = this->_M_begin;
-       return _M_dfs<true>(_M_start_state);
+       _M_cur_results.resize(_M_nfa._M_sub_count() + 2);
+       this->_M_current = __cur;
       }
 
-      bool
-      _M_search_from_first()
-      {
-       this->_M_current = this->_M_begin;
-       return _M_dfs<false>(_M_start_state);
-      }
+      void
+      _M_set_start(_StateIdT __start)
+      { _M_start_state = __start; }
 
       bool
-      _M_search()
-      {
-       auto __cur = this->_M_begin;
-       do
-         {
-           this->_M_current = __cur;
-           if (_M_dfs<false>(_M_start_state))
-             return true;
-         }
-       // Continue when __cur == _M_end
-       while (__cur++ != this->_M_end);
-       return false;
-      }
+      _M_main()
+      { return _M_dfs(this->_M_start_state); }
 
-    private:
-      template<bool __match_mode>
-       bool
-       _M_dfs(_StateIdT __i);
+      bool
+      _M_dfs(_StateIdT __start);
 
       // To record current solution.
       _ResultsVec     _M_cur_results;
-      const _TraitsT& _M_traits;
-      const _RegexT&  _M_nfa;
+      const _NFAT&    _M_nfa;
       _StateIdT       _M_start_state;
     };
 
@@ -206,47 +247,57 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     {
     public:
       typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
-      typedef _NFA<_CharT, _TraitsT>                       _RegexT;
+      typedef _NFA<_CharT, _TraitsT>                       _NFAT;
+      typedef typename _BaseT::_RegexT                     _RegexT;
       typedef typename _BaseT::_ResultsT                   _ResultsT;
+      typedef typename _BaseT::_ResultsVec                 _ResultsVec;
+      typedef typename _BaseT::_FlagT                      _FlagT;
       // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
       // carefully work out how to compare to conflict matching states.
       //
       // A matching state is a pair(where, when); `where` is a NFA node; `when`
-      // is a _BiIter, indicating which char is the next to be mathed one.  Two
-      // matching states conflict means that they have equivalent `where` and
-      // `when`.
+      // is a _BiIter, indicating which char is the next to be matched. Two
+      // matching states conflict if they have equivalent `where` and `when`.
       //
-      // Now since we need to drop one and keep another, because at most one of
-      // them could be the final optimal solution. This behavior is affected by
+      // Now we need to drop one and keep another, because at most one of them
+      // could be the final optimal solution. This behavior is affected by
       // greedy policy.
       //
       // The definition of `greedy`:
       // For the sequence of quantifiers in NFA sorted by there start position,
-      // now maintain a vector in a matching state, with equal length to
+      // now maintain a vector in every matching state, with equal length to
       // quantifier seq, recording repeating times of every quantifier. Now to
       // compare two matching states, we just lexically compare these two
       // vectors. To win the compare(to survive), one matching state needs to
       // make its greedy quantifier count larger, and ungreedy quantifiers
       // count smaller.
       //
-      // In the implementation, we recorded negtive numbers for greedy
-      // quantifiers and positive numbers of ungreedy ones. Now a simple
+      // In the implementation, we recorded negtive counts for greedy
+      // quantifiers and positive counts of ungreedy ones. Now the implicit
       // operator<() for lexicographical_compare will emit the answer.
       //
       // When two vectors equal, it means the `where`, `when` and quantifier
-      // counts are identical, it indicates the same answer, so just return
+      // counts are identical, and indicates the same solution; so just return
       // false.
       struct _ResultsEntry
-      : private _BaseT::_ResultsVec
+      : private _ResultsVec
       {
       public:
        _ResultsEntry(unsigned int __res_sz, unsigned int __sz)
-       : _BaseT::_ResultsVec(__res_sz), _M_quant_keys(__sz)
+       : _ResultsVec(__res_sz), _M_quant_keys(__sz)
        { }
 
+       void
+       resize(unsigned int __n)
+       { _ResultsVec::resize(__n); }
+
+       unsigned int
+       size()
+       { return _ResultsVec::size(); }
+
        sub_match<_BiIter>&
        operator[](unsigned int __idx)
-       { return this->_BaseT::_ResultsVec::operator[](__idx); }
+       { return _ResultsVec::operator[](__idx); }
 
        bool
        operator<(const _ResultsEntry& __rhs) const
@@ -263,75 +314,47 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _M_inc(unsigned int __idx, bool __neg)
        { _M_quant_keys[__idx] += __neg ? 1 : -1; }
 
-       typename _BaseT::_ResultsVec
+       _ResultsVec
        _M_get()
        { return *this; }
 
       public:
        std::vector<int> _M_quant_keys;
       };
-
       typedef std::unique_ptr<_ResultsEntry>               _ResultsPtr;
-      typedef regex_constants::match_flag_type             _FlagT;
 
+    public:
       _BFSExecutor(_BiIter         __begin,
                   _BiIter         __end,
                   _ResultsT&      __results,
-                  const _RegexT&  __nfa,
-                  const _TraitsT& __traits,
+                  const _RegexT&  __re,
                   _FlagT          __flags)
-      : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
-              __traits),
-       _M_nfa(__nfa),
-       _M_cur_results(nullptr),
-       _M_start_state(__nfa._M_start())
+      : _BaseT(__begin, __end, __results, __re, __flags),
+      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+            (__re._M_automaton)),
+      _M_start_state(_M_nfa._M_start())
       { }
 
-      bool
-      _M_match()
-      {
-       _M_init(this->_M_begin);
-       return _M_main_loop<true>();
-      }
-
-      bool
-      _M_search_from_first()
-      {
-       _M_init(this->_M_begin);
-       return _M_main_loop<false>();
-      }
-
-      bool
-      _M_search()
-      {
-       auto __cur = this->_M_begin;
-       do
-         {
-           _M_init(__cur);
-           if (_M_main_loop<false>())
-             return true;
-         }
-       // Continue when __cur == _M_end
-       while (__cur++ != this->_M_end);
-       return false;
-      }
-
     private:
       void
       _M_init(_BiIter __cur)
       {
-       _GLIBCXX_DEBUG_ASSERT(_M_start_state != _S_invalid_state_id);
+       _GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
        this->_M_current = __cur;
        _M_covered.clear();
-       _M_covered[_M_start_state] =
-         _ResultsPtr(new _ResultsEntry(this->_M_results.size(),
+       _ResultsVec& __res(this->_M_results);
+       _M_covered[this->_M_start_state] =
+         _ResultsPtr(new _ResultsEntry(__res.size(),
                                        _M_nfa._M_quant_count));
        _M_e_closure();
       }
 
-      template<bool __match_mode>
-       bool
-       _M_main_loop();
+      void
+      _M_set_start(_StateIdT __start)
+      { _M_start_state = __start; }
+
+      bool
+      _M_main();
 
       void
       _M_e_closure();
@@ -345,10 +368,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       std::map<_StateIdT, _ResultsPtr> _M_covered;
       // To record global optimal solution.
       _ResultsPtr                      _M_cur_results;
-      const _RegexT&                   _M_nfa;
+      const _NFAT&                     _M_nfa;
       _StateIdT                        _M_start_state;
     };
 
+  template<typename _BiIter, typename _Alloc,
+    typename _CharT, typename _TraitsT>
+    std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
+    __get_executor(_BiIter __b,
+                  _BiIter __e,
+                  match_results<_BiIter, _Alloc>& __m,
+                  const basic_regex<_CharT, _TraitsT>& __re,
+                  regex_constants::match_flag_type __flags);
+
  //@} regex-detail
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace __detail
index b110c5dc2f0ae5d35d1ada6c66e1a486cb7f1434..af2455b8a4e366605fd3b89894d586982e550357 100644 (file)
@@ -36,7 +36,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template<typename _BiIter, typename _Alloc,
     typename _CharT, typename _TraitsT>
-  template<bool __match_mode>
     bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
     _M_dfs(_StateIdT __i)
     {
@@ -44,9 +43,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        // This is not that certain. Need deeper investigate.
        return false;
       auto& __current = this->_M_current;
-      auto& __begin = this->_M_begin;
-      auto& __end = this->_M_end;
-      auto& __results = _M_cur_results;
       const auto& __state = _M_nfa[__i];
       bool __ret = false;
       switch (__state._M_opcode)
@@ -54,129 +50,115 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        case _S_opcode_alternative:
          // Greedy or not, this is a question ;)
          if (!__state._M_neg)
-           __ret = _M_dfs<__match_mode>(__state._M_alt)
-             || _M_dfs<__match_mode>(__state._M_next);
+           __ret = _M_dfs(__state._M_alt)
+             || _M_dfs(__state._M_next);
          else
-           __ret = _M_dfs<__match_mode>(__state._M_next)
-             || _M_dfs<__match_mode>(__state._M_alt);
+           __ret = _M_dfs(__state._M_next)
+             || _M_dfs(__state._M_alt);
          break;
        case _S_opcode_subexpr_begin:
          // Here's the critical part: if there's nothing changed since last
          // visit, do NOT continue. This prevents the executor from get into
          // infinite loop when use "()*" to match "".
          //
-         // Every change on __results will be roll back after the recursion
-         // step finished.
-         if (!__results[__state._M_subexpr].matched
-             || __results[__state._M_subexpr].first != __current)
+         // Every change on _M_cur_results will be roll back after the
+         // recursion step finished.
+         if (!_M_cur_results[__state._M_subexpr].matched
+             || _M_cur_results[__state._M_subexpr].first != __current)
            {
              auto __back = __current;
-             __results[__state._M_subexpr].first = __current;
-             __ret = _M_dfs<__match_mode>(__state._M_next);
-             __results[__state._M_subexpr].first = __back;
+             _M_cur_results[__state._M_subexpr].first = __current;
+             __ret = _M_dfs(__state._M_next);
+             _M_cur_results[__state._M_subexpr].first = __back;
            }
          break;
        case _S_opcode_subexpr_end:
-         if (__results[__state._M_subexpr].second != __current
-             || __results[__state._M_subexpr].matched != true)
+         if (_M_cur_results[__state._M_subexpr].second != __current
+             || _M_cur_results[__state._M_subexpr].matched != true)
            {
-             auto __back = __results[__state._M_subexpr];
-             __results[__state._M_subexpr].second = __current;
-             __results[__state._M_subexpr].matched = true;
-             __ret = _M_dfs<__match_mode>(__state._M_next);
-             __results[__state._M_subexpr] = __back;
+             auto __back = _M_cur_results[__state._M_subexpr];
+             _M_cur_results[__state._M_subexpr].second = __current;
+             _M_cur_results[__state._M_subexpr].matched = true;
+             __ret = _M_dfs(__state._M_next);
+             _M_cur_results[__state._M_subexpr] = __back;
            }
          else
-           __ret = _M_dfs<__match_mode>(__state._M_next);
+           __ret = _M_dfs(__state._M_next);
          break;
        case _S_opcode_line_begin_assertion:
-         if (__current == __begin)
-           __ret = _M_dfs<__match_mode>(__state._M_next);
+         if (this->_M_at_begin())
+           __ret = _M_dfs(__state._M_next);
          break;
        case _S_opcode_line_end_assertion:
-         if (__current == __end)
-           __ret = _M_dfs<__match_mode>(__state._M_next);
+         if (this->_M_at_end())
+           __ret = _M_dfs(__state._M_next);
          break;
-         // By definition.
        case _S_opcode_word_boundry:
-           {
-             bool __ans = false;
-             if (__current == __begin && this->_M_is_word(*__current))
-               __ans = true;
-             else if (__current == __end && this->_M_is_word(*__current))
-               __ans = true;
-             else
-               {
-                 auto __pre = __current;
-                 --__pre;
-                 if (this->_M_is_word(*__current)
-                     != this->_M_is_word(*__pre))
-                   __ans = true;
-               }
-             if (__ans == !__state._M_neg)
-               __ret = _M_dfs<__match_mode>(__state._M_next);
-           }
+         if (this->_M_word_boundry(__state) == !__state._M_neg)
+           __ret = _M_dfs(__state._M_next);
          break;
          // Here __state._M_alt offers a single start node for a sub-NFA.
          // We recursivly invoke our algorithm to match the sub-NFA.
        case _S_opcode_subexpr_lookahead:
-           {
-             _ResultsT __m;
-             // FIXME Here's not necessarily a DFSExecutor. But we need to
-             // refactor the whole NFA to a recursive tree structure first.
-             _DFSExecutor __sub(this->_M_current,
-                                this->_M_end,
-                                __m,
-                                this->_M_nfa,
-                                this->_M_traits,
-                                this->_M_flags);
-             __sub._M_start_state = __state._M_alt;
-             if (__sub._M_search_from_first() == !__state._M_neg)
-               __ret = _M_dfs<__match_mode>(__state._M_next);
-           }
+         if (this->_M_lookahead(__state) == !__state._M_neg)
+           __ret = _M_dfs(__state._M_next);
          break;
        case _S_opcode_match:
-         if (__current != __end && __state._M_matches(*__current))
+         if (__current != this->_M_end && __state._M_matches(*__current))
            {
              ++__current;
-             __ret = _M_dfs<__match_mode>(__state._M_next);
+             __ret = _M_dfs(__state._M_next);
              --__current;
            }
          break;
-       // First fetch the matched result from __results as __submatch;
+       // First fetch the matched result from _M_cur_results as __submatch;
        // then compare it with
        // (__current, __current + (__submatch.second - __submatch.first))
        // If matched, keep going; else just return to try another state.
        case _S_opcode_backref:
          {
-           auto& __submatch = __results[__state._M_backref_index];
+           auto& __submatch = _M_cur_results[__state._M_backref_index];
            if (!__submatch.matched)
              break;
            auto __last = __current;
            for (auto __tmp = __submatch.first;
-                __last != __end && __tmp != __submatch.second;
+                __last != this->_M_end && __tmp != __submatch.second;
                 ++__tmp)
              ++__last;
-           if (_M_traits.transform(__submatch.first, __submatch.second)
-               == _M_traits.transform(__current, __last))
+           if (this->_M_re._M_traits.transform(__submatch.first,
+                                               __submatch.second)
+               == this->_M_re._M_traits.transform(__current, __last))
              if (__last != __current)
                {
                  auto __backup = __current;
                  __current = __last;
-                 __ret = _M_dfs<__match_mode>(__state._M_next);
+                 __ret = _M_dfs(__state._M_next);
                  __current = __backup;
                }
              else
-               __ret = _M_dfs<__match_mode>(__state._M_next);
+               __ret = _M_dfs(__state._M_next);
          }
          break;
        case _S_opcode_accept:
-         if (__match_mode)
-           __ret = __current == __end;
+         if (this->_M_match_mode)
+           __ret = __current == this->_M_end;
          else
            __ret = true;
+         if (__current == this->_M_begin
+             && (this->_M_flags & regex_constants::match_not_null))
+           __ret = false;
          if (__ret)
-           this->_M_results = __results;
+           {
+             _ResultsVec& __res(this->_M_results);
+             if (this->_M_re.flags() & regex_constants::nosubs)
+               {
+                 _M_cur_results.resize(3); // truncate
+                 __res.resize(3);
+               }
+             for (unsigned int __i = 0; __i < _M_cur_results.size(); ++__i)
+               if (_M_cur_results[__i].matched)
+                 __res[__i] = _M_cur_results[__i];
+           }
          break;
        default:
          _GLIBCXX_DEBUG_ASSERT(false);
@@ -186,23 +168,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template<typename _BiIter, typename _Alloc,
     typename _CharT, typename _TraitsT>
-  template<bool __match_mode>
     bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
-    _M_main_loop()
+    _M_main()
     {
       bool __ret = false;
+      if (!this->_M_match_mode
+         && !(this->_M_flags & regex_constants::match_not_null))
+       __ret = _M_includes_some() || __ret;
       while (this->_M_current != this->_M_end)
        {
-         if (!__match_mode)
-           // To keep regex_search greedy, no "return true" here.
-           __ret = _M_includes_some() || __ret;
          _M_move();
          ++this->_M_current;
          _M_e_closure();
+         if (!this->_M_match_mode)
+           // To keep regex_search greedy, no "return true" here.
+           __ret = _M_includes_some() || __ret;
        }
-      __ret = _M_includes_some() || __ret;
+      if (this->_M_match_mode)
+       __ret = _M_includes_some();
       if (__ret)
-       this->_M_results = _M_cur_results->_M_get();
+       {
+         _ResultsVec& __res(this->_M_results);
+         if (this->_M_re.flags() & regex_constants::nosubs)
+           {
+             // truncate
+             _M_cur_results->resize(3);
+             __res.resize(3);
+           }
+         for (unsigned int __i = 0; __i < _M_cur_results->size(); ++__i)
+           if ((*_M_cur_results)[__i].matched)
+             __res[__i] = (*_M_cur_results)[__i];
+       }
       return __ret;
     }
 
@@ -211,11 +207,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
     _M_e_closure()
     {
-      auto& __current = this->_M_current;
       std::queue<_StateIdT> __q;
       std::vector<bool> __in_q(_M_nfa.size(), false);
-      auto& __begin = this->_M_begin;
-      auto& __end = this->_M_end;
+      auto& __current = this->_M_current;
 
       for (auto& __it : _M_covered)
        {
@@ -292,46 +286,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                }
                break;
              case _S_opcode_line_begin_assertion:
-               if (__current == __begin)
+               if (this->_M_at_begin())
                  __add_visited_state(__state._M_next);
                break;
              case _S_opcode_line_end_assertion:
-               if (__current == __end)
+               if (this->_M_at_end())
                  __add_visited_state(__state._M_next);
                break;
              case _S_opcode_word_boundry:
-                 {
-                   bool __ans = false;
-                   if (__current == __begin && this->_M_is_word(*__current))
-                     __ans = true;
-                   else if (__current == __end && this->_M_is_word(*__current))
-                     __ans = true;
-                   else
-                     {
-                       auto __pre = __current;
-                       --__pre;
-                       if (this->_M_is_word(*__current)
-                           != this->_M_is_word(*__pre))
-                         __ans = true;
-                     }
-                   if (__ans == !__state._M_neg)
-                     __add_visited_state(__state._M_next);
-                 }
+               if (this->_M_word_boundry(__state) == !__state._M_neg)
+                 __add_visited_state(__state._M_next);
                break;
              case _S_opcode_subexpr_lookahead:
-                 {
-                   _ResultsT __m;
-                   // Same comment as in DFS.
-                   _BFSExecutor __sub(this->_M_current,
-                                      this->_M_end,
-                                      __m,
-                                      this->_M_nfa,
-                                      this->_M_traits,
-                                      this->_M_flags);
-                   __sub._M_start_state = __state._M_alt;
-                   if (__sub._M_search_from_first() == !__state._M_neg)
-                     __add_visited_state(__state._M_next);
-                 }
+               if (this->_M_lookahead(__state) == !__state._M_neg)
+                 __add_visited_state(__state._M_next);
                break;
              case _S_opcode_match:
                break;
@@ -395,6 +363,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __succ;
     }
 
+  // Return whether now is at some word boundry.
+  template<typename _BiIter, typename _Alloc,
+    typename _CharT, typename _TraitsT>
+    bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
+    _M_word_boundry(_State<_CharT, _TraitsT> __state) const
+    {
+      // By definition.
+      bool __ans = false;
+      auto __pre = _M_current;
+      --__pre;
+      if (!(_M_at_begin() && _M_at_end()))
+       if (_M_at_begin())
+         __ans = _M_is_word(*_M_current)
+           && !(_M_flags & regex_constants::match_not_bow);
+       else if (_M_at_end())
+         __ans = _M_is_word(*__pre)
+           && !(_M_flags & regex_constants::match_not_eow);
+       else
+         __ans = _M_is_word(*_M_current)
+           != _M_is_word(*__pre);
+      return __ans;
+    }
+
+  // Return whether now match the given sub-NFA.
+  template<typename _BiIter, typename _Alloc,
+    typename _CharT, typename _TraitsT>
+    bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
+    _M_lookahead(_State<_CharT, _TraitsT> __state) const
+    {
+      auto __sub = __get_executor(this->_M_current,
+                                 this->_M_end,
+                                 this->_M_results,
+                                 this->_M_re,
+                                 this->_M_flags);
+      __sub->_M_set_start(__state._M_alt);
+      return __sub->_M_search_from_first();
+    }
+
   template<typename _BiIter, typename _Alloc,
     typename _CharT, typename _TraitsT>
     std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
@@ -411,10 +417,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
        (__re._M_automaton);
       if (__p->_M_has_backref)
-       return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p,
-                                             __re._M_traits, __flags));
-      return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p,
-                                           __re._M_traits, __flags));
+       return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
+      return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
     }
 
 _GLIBCXX_END_NAMESPACE_VERSION
index 824d6ce1081513387cb8135985065a6d9ddac249..09a18f634a09a7b3adebaa389b87431375243fac 100644 (file)
@@ -68,7 +68,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _S_token_backref,
        _S_token_subexpr_begin,
        _S_token_subexpr_no_group_begin,
-       _S_token_subexpr_lookahead_begin,
+       _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
        _S_token_subexpr_end,
        _S_token_bracket_begin,
        _S_token_bracket_neg_begin,
@@ -86,7 +86,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _S_token_ungreedy,
        _S_token_line_begin,
        _S_token_line_end,
-       _S_token_word_bound,
+       _S_token_word_bound, // neg if _M_value[0] == 'n'
        _S_token_comma,
        _S_token_dup_count,
        _S_token_eof,
@@ -174,7 +174,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _StringT                      _M_value;
       bool                          _M_at_bracket_start;
     public:
-      // TODO: make them static when this file is stable.
+      // FIXME: make them static when this file is stable.
       const std::map<char, _TokenT> _M_token_map;
       const std::map<char, char>    _M_ecma_escape_map;
       const std::map<char, char>    _M_awk_escape_map;
index 4b66157278b1691f4027838c056aa870580ed0cb..abdbcc64f1f3b4d9d41b728268f3e2b581a75b0f 100644 (file)
@@ -28,7 +28,7 @@
  *  Do not attempt to use it directly. @headername{regex}
  */
 
-// TODO make comments doxygen format.
+// FIXME make comments doxygen format.
 
 // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
 // and awk
index 82e99058743cd2aee8ebbd62a8a438448ff4a9d5..3064b3b26e4bbe68e34b148629422aa5792f383c 100644 (file)
@@ -1,5 +1,4 @@
 // { dg-options "-std=gnu++11" }
-// { dg-do run { xfail *-*-* } }
 
 //
 // 2013-09-14  Tim Shen <timshen91@gmail.com>
@@ -54,22 +53,37 @@ test01()
   string sol[] =
     {
       "This",
+      "",
       "is",
+      "",
       "a",
+      "",
       "regular",
+      "",
       "expression",
+      "",
     };
 
   regex re("\\b\\w*\\b");
   int i = 0;
   for (auto it = sregex_iterator(s.begin(), s.end(), re);
-       it != sregex_iterator() && i < 5;
+       it != sregex_iterator();
        ++it)
     {
       string s((*it)[0].first, (*it)[0].second);
       VERIFY(s == sol[i++]);
     }
-  VERIFY(i == 5);
+  VERIFY(i == 10);
+
+  {
+    cmatch m;
+    regex re("(?=(as)df)as(df)");
+    regex_search("asdf", m, re);
+    VERIFY(m.size() == 3);
+    VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
+    VERIFY(m[1].matched && string(m[1].first, m[1].second) == "as");
+    VERIFY(m[2].matched && string(m[2].first, m[2].second) == "df");
+  }
 }
 
 int
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc
new file mode 100644 (file)
index 0000000..4be406c
--- /dev/null
@@ -0,0 +1,71 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-09-18  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.3 regex_search
+// Tests ECMAScript flags.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  cmatch m;
+  regex re("((as)(df))", regex_constants::ECMAScript | regex_constants::nosubs);
+  VERIFY(regex_search("asdf", m, re));
+  VERIFY(m.size() == 1);
+  VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
+
+  VERIFY( regex_search("a", regex("^a")));
+  VERIFY(!regex_search("a", regex("^a"), regex_constants::match_not_bol));
+  VERIFY( regex_search("a", regex("a$")));
+  VERIFY(!regex_search("a", regex("a$"), regex_constants::match_not_eol));
+  VERIFY( regex_search("a", regex("\\ba")));
+  VERIFY(!regex_search("a", regex("\\ba"), regex_constants::match_not_bow));
+  VERIFY( regex_search("a", regex("a\\b")));
+  VERIFY(!regex_search("a", regex("a\\b"), regex_constants::match_not_eow));
+  VERIFY( regex_search("", regex("")));
+  VERIFY(!regex_search("", regex(""), regex_constants::match_not_null));
+  VERIFY( regex_search("", regex("^$")));
+  VERIFY(!regex_search("", regex("^$"), regex_constants::match_not_null));
+  VERIFY( regex_search("aaa", m, regex("a*?"),
+                      regex_constants::match_not_null));
+  VERIFY(m[0].matched && string(m[0].first, m[0].second) == "a");
+  VERIFY( regex_search("asdf", regex("sdf")));
+  VERIFY(!regex_search("asdf", regex("sdf"),
+                      regex_constants::match_continuous));
+  VERIFY( regex_search(" a"+1, regex("\\ba"),
+                      regex_constants::match_prev_avail));
+  VERIFY( regex_search("ba"+1, regex("\\Ba"),
+                      regex_constants::match_prev_avail));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
index 75ef0584b8ab8a24e5bbb80ff4bb6d5dd3c69140..978565021ccb94ec30fca6def9a263d587fca93a 100644 (file)
@@ -1,5 +1,4 @@
 // { dg-options "-std=gnu++11" }
-// { dg-do run { xfail *-*-* } }
 
 //
 // 2013-07-25  Tim Shen <timshen91@gmail.com>
index cd2c68e33eeb4a6d6c6f9da12c81d99b84a2ad7b..9cb96f7a162976ab70c63971fbcba18ca1204a68 100644 (file)
@@ -1,6 +1,5 @@
 // { dg-options "-std=gnu++11" }
 // { dg-require-namedlocale "en_US.UTF-8" }
-// { dg-do run { xfail *-*-* } }
 
 //
 // 2013-09-05  Tim Shen <timshen91@gmail.com>
@@ -42,13 +41,19 @@ test01()
 
   re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
 
-  std::wsregex_iterator p(str2.begin(), str2.end(), re2);
-  auto a = p;
-  ++p;
-  VERIFY(a != p);
-  //for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
-  //    p != std::wsregex_iterator{}; ++p)
-  //  std::wcout << (*p)[1] << std::endl;
+  std::wstring sol[] =
+    {
+      L"ä\u2009Ä",
+      L"\u2009",
+      L"ö\u2009Ö",
+      L"\u2009",
+      L"ü\u2009Ü",
+      L"",
+    };
+  int i = 0;
+  for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
+      p != std::wsregex_iterator{}; ++p)
+    VERIFY(std::wstring((*p)[1].first, (*p)[1].second) == sol[i++]);
 }
 
 int