1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013-2016 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_automaton.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 // This macro defines the maximal state number a NFA can have.
32 #ifndef _GLIBCXX_REGEX_STATE_LIMIT
33 #define _GLIBCXX_REGEX_STATE_LIMIT 100000
36 namespace std
_GLIBCXX_VISIBILITY(default)
40 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43 * @defgroup regex-detail Base and Implementation Classes
48 typedef long _StateIdT
;
49 static const _StateIdT _S_invalid_state_id
= -1;
51 template<typename _CharT
>
52 using _Matcher
= std::function
<bool (_CharT
)>;
54 /// Operation codes that define the type of transitions within the base NFA
55 /// that represents the regular expression.
59 _S_opcode_alternative
,
62 _S_opcode_line_begin_assertion
,
63 _S_opcode_line_end_assertion
,
64 _S_opcode_word_boundary
,
65 _S_opcode_subexpr_lookahead
,
66 _S_opcode_subexpr_begin
,
67 _S_opcode_subexpr_end
,
76 _Opcode _M_opcode
; // type of outgoing transition
79 _StateIdT _M_next
; // outgoing transition
80 union // Since they are mutually exclusive.
82 size_t _M_subexpr
; // for _S_opcode_subexpr_*
83 size_t _M_backref_index
; // for _S_opcode_backref
86 // for _S_opcode_alternative, _S_opcode_repeat and
87 // _S_opcode_subexpr_lookahead
89 // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
90 // quantifiers (ungreedy if set true)
93 // For _S_opcode_match
94 __gnu_cxx::__aligned_membuf
<_Matcher
<char>> _M_matcher_storage
;
98 explicit _State_base(_Opcode __opcode
)
99 : _M_opcode(__opcode
), _M_next(_S_invalid_state_id
)
106 return _M_opcode
== _S_opcode_alternative
107 || _M_opcode
== _S_opcode_repeat
108 || _M_opcode
== _S_opcode_subexpr_lookahead
;
111 #ifdef _GLIBCXX_DEBUG
113 _M_print(std::ostream
& ostr
) const;
115 // Prints graphviz dot commands for state.
117 _M_dot(std::ostream
& __ostr
, _StateIdT __id
) const;
121 template<typename _Char_type
>
122 struct _State
: _State_base
124 typedef _Matcher
<_Char_type
> _MatcherT
;
125 static_assert(sizeof(_MatcherT
) == sizeof(_Matcher
<char>),
126 "std::function<bool(T)> has the same size as "
127 "std::function<bool(char)>");
128 static_assert(alignof(_MatcherT
) == alignof(_Matcher
<char>),
129 "std::function<bool(T)> has the same alignment as "
130 "std::function<bool(char)>");
133 _State(_Opcode __opcode
) : _State_base(__opcode
)
135 if (_M_opcode() == _S_opcode_match
)
136 new (this->_M_matcher_storage
._M_addr()) _MatcherT();
139 _State(const _State
& __rhs
) : _State_base(__rhs
)
141 if (__rhs
._M_opcode() == _S_opcode_match
)
142 new (this->_M_matcher_storage
._M_addr())
143 _MatcherT(__rhs
._M_get_matcher());
146 _State(_State
&& __rhs
) : _State_base(__rhs
)
148 if (__rhs
._M_opcode() == _S_opcode_match
)
149 new (this->_M_matcher_storage
._M_addr())
150 _MatcherT(std::move(__rhs
._M_get_matcher()));
154 operator=(const _State
&) = delete;
158 if (_M_opcode() == _S_opcode_match
)
159 _M_get_matcher().~_MatcherT();
162 // Since correct ctor and dtor rely on _M_opcode, it's better not to
163 // change it over time.
166 { return _State_base::_M_opcode
; }
169 _M_matches(_Char_type __char
) const
170 { return _M_get_matcher()(__char
); }
174 { return *static_cast<_MatcherT
*>(this->_M_matcher_storage
._M_addr()); }
177 _M_get_matcher() const
179 return *static_cast<const _MatcherT
*>(
180 this->_M_matcher_storage
._M_addr());
186 typedef size_t _SizeT
;
187 typedef regex_constants::syntax_option_type _FlagT
;
190 _NFA_base(_FlagT __f
)
191 : _M_flags(__f
), _M_start_state(0), _M_subexpr_count(0),
192 _M_has_backref(false)
195 _NFA_base(_NFA_base
&&) = default;
198 ~_NFA_base() = default;
207 { return _M_start_state
; }
211 { return _M_subexpr_count
; }
213 std::vector
<size_t> _M_paren_stack
;
215 _StateIdT _M_start_state
;
216 _SizeT _M_subexpr_count
;
220 template<typename _TraitsT
>
222 : _NFA_base
, std::vector
<_State
<typename
_TraitsT::char_type
>>
224 typedef typename
_TraitsT::char_type _Char_type
;
225 typedef _State
<_Char_type
> _StateT
;
226 typedef _Matcher
<_Char_type
> _MatcherT
;
228 _NFA(const typename
_TraitsT::locale_type
& __loc
, _FlagT __flags
)
230 { _M_traits
.imbue(__loc
); }
232 // for performance reasons _NFA objects should only be moved not copied
233 _NFA(const _NFA
&) = delete;
234 _NFA(_NFA
&&) = default;
239 auto __ret
= _M_insert_state(_StateT(_S_opcode_accept
));
244 _M_insert_alt(_StateIdT __next
, _StateIdT __alt
, bool __neg
)
246 _StateT
__tmp(_S_opcode_alternative
);
247 // It labels every quantifier to make greedy comparison easier in BFS
249 __tmp
._M_next
= __next
;
250 __tmp
._M_alt
= __alt
;
251 return _M_insert_state(std::move(__tmp
));
255 _M_insert_repeat(_StateIdT __next
, _StateIdT __alt
, bool __neg
)
257 _StateT
__tmp(_S_opcode_repeat
);
258 // It labels every quantifier to make greedy comparison easier in BFS
260 __tmp
._M_next
= __next
;
261 __tmp
._M_alt
= __alt
;
262 __tmp
._M_neg
= __neg
;
263 return _M_insert_state(std::move(__tmp
));
267 _M_insert_matcher(_MatcherT __m
)
269 _StateT
__tmp(_S_opcode_match
);
270 __tmp
._M_get_matcher() = std::move(__m
);
271 return _M_insert_state(std::move(__tmp
));
275 _M_insert_subexpr_begin()
277 auto __id
= this->_M_subexpr_count
++;
278 this->_M_paren_stack
.push_back(__id
);
279 _StateT
__tmp(_S_opcode_subexpr_begin
);
280 __tmp
._M_subexpr
= __id
;
281 return _M_insert_state(std::move(__tmp
));
285 _M_insert_subexpr_end()
287 _StateT
__tmp(_S_opcode_subexpr_end
);
288 __tmp
._M_subexpr
= this->_M_paren_stack
.back();
289 this->_M_paren_stack
.pop_back();
290 return _M_insert_state(std::move(__tmp
));
294 _M_insert_backref(size_t __index
);
297 _M_insert_line_begin()
298 { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion
)); }
302 { return _M_insert_state(_StateT(_S_opcode_line_end_assertion
)); }
305 _M_insert_word_bound(bool __neg
)
307 _StateT
__tmp(_S_opcode_word_boundary
);
308 __tmp
._M_neg
= __neg
;
309 return _M_insert_state(std::move(__tmp
));
313 _M_insert_lookahead(_StateIdT __alt
, bool __neg
)
315 _StateT
__tmp(_S_opcode_subexpr_lookahead
);
316 __tmp
._M_alt
= __alt
;
317 __tmp
._M_neg
= __neg
;
318 return _M_insert_state(std::move(__tmp
));
323 { return _M_insert_state(_StateT(_S_opcode_dummy
)); }
326 _M_insert_state(_StateT __s
)
328 this->push_back(std::move(__s
));
329 if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT
)
331 regex_constants::error_space
,
332 "Number of NFA states exceeds limit. Please use shorter regex "
333 "string, or use smaller brace expression, or make "
334 "_GLIBCXX_REGEX_STATE_LIMIT larger.");
335 return this->size()-1;
338 // Eliminate dummy node in this NFA to make it compact.
340 _M_eliminate_dummy();
342 #ifdef _GLIBCXX_DEBUG
344 _M_dot(std::ostream
& __ostr
) const;
350 /// Describes a sequence of one or more %_State, its current start
351 /// and end(s). This structure contains fragments of an NFA during
353 template<typename _TraitsT
>
357 typedef _NFA
<_TraitsT
> _RegexT
;
360 _StateSeq(_RegexT
& __nfa
, _StateIdT __s
)
361 : _M_nfa(__nfa
), _M_start(__s
), _M_end(__s
)
364 _StateSeq(_RegexT
& __nfa
, _StateIdT __s
, _StateIdT __end
)
365 : _M_nfa(__nfa
), _M_start(__s
), _M_end(__end
)
368 // Append a state on *this and change *this to the new sequence.
370 _M_append(_StateIdT __id
)
372 _M_nfa
[_M_end
]._M_next
= __id
;
376 // Append a sequence on *this and change *this to the new sequence.
378 _M_append(const _StateSeq
& __s
)
380 _M_nfa
[_M_end
]._M_next
= __s
._M_start
;
384 // Clones an entire sequence.
395 _GLIBCXX_END_NAMESPACE_VERSION
396 } // namespace __detail
399 #include <bits/regex_automaton.tcc>