1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_automaton.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std
_GLIBCXX_VISIBILITY(default)
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38 * @defgroup regex-detail Base and Implementation Classes
43 typedef int _StateIdT
;
44 typedef std::set
<_StateIdT
> _StateSet
;
45 static const _StateIdT _S_invalid_state_id
= -1;
47 template<typename _CharT
>
48 using _Matcher
= std::function
<bool (_CharT
)>;
50 /// Operation codes that define the type of transitions within the base NFA
51 /// that represents the regular expression.
54 _S_opcode_unknown
= 0,
55 _S_opcode_alternative
= 1,
56 _S_opcode_backref
= 2,
57 _S_opcode_subexpr_begin
= 4,
58 _S_opcode_subexpr_end
= 5,
59 _S_opcode_match
= 100,
60 _S_opcode_accept
= 255
63 template<typename _CharT
, typename _TraitsT
>
68 typedef _Matcher
<_CharT
> _MatcherT
;
70 _OpcodeT _M_opcode
; // type of outgoing transition
71 _StateIdT _M_next
; // outgoing transition
72 union // Since they are mutual exclusive.
74 _StateIdT _M_alt
; // for _S_opcode_alternative
75 unsigned int _M_subexpr
; // for _S_opcode_subexpr_*
76 unsigned int _M_backref_index
; // for _S_opcode_backref
78 _MatcherT _M_matches
; // for _S_opcode_match
80 explicit _State(_OpcodeT __opcode
)
81 : _M_opcode(__opcode
), _M_next(_S_invalid_state_id
)
84 _State(const _MatcherT
& __m
)
85 : _M_opcode(_S_opcode_match
), _M_next(_S_invalid_state_id
),
89 _State(_OpcodeT __opcode
, unsigned __index
)
90 : _M_opcode(__opcode
), _M_next(_S_invalid_state_id
)
92 if (__opcode
== _S_opcode_subexpr_begin
93 || __opcode
== _S_opcode_subexpr_end
)
95 else if (__opcode
== _S_opcode_backref
)
96 _M_backref_index
= __index
;
99 _State(_StateIdT __next
, _StateIdT __alt
)
100 : _M_opcode(_S_opcode_alternative
), _M_next(__next
), _M_alt(__alt
)
103 #ifdef _GLIBCXX_DEBUG
105 _M_print(std::ostream
& ostr
) const;
107 // Prints graphviz dot commands for state.
109 _M_dot(std::ostream
& __ostr
, _StateIdT __id
) const;
113 /// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
114 template<typename _CharT
, typename _TraitsT
>
118 typedef unsigned int _SizeT
;
122 _M_sub_count() const = 0;
124 #ifdef _GLIBCXX_DEBUG
125 virtual std::ostream
&
126 _M_dot(std::ostream
& __ostr
) const = 0;
130 template<typename _CharT
, typename _TraitsT
>
132 : public _Automaton
<_CharT
, _TraitsT
>,
133 public std::vector
<_State
<_CharT
, _TraitsT
>>
136 typedef _State
<_CharT
, _TraitsT
> _StateT
;
137 typedef const _Matcher
<_CharT
>& _MatcherT
;
138 typedef unsigned int _SizeT
;
139 typedef regex_constants::syntax_option_type _FlagT
;
142 : _M_flags(__f
), _M_start_state(0), _M_subexpr_count(0),
143 _M_has_backref(false)
152 { return _M_start_state
; }
155 _M_final_states() const
156 { return _M_accepting_states
; }
160 { return _M_subexpr_count
; }
165 this->push_back(_StateT(_S_opcode_accept
));
166 _M_accepting_states
.insert(this->size()-1);
167 return this->size()-1;
171 _M_insert_alt(_StateIdT __next
, _StateIdT __alt
)
173 this->push_back(_StateT(__next
, __alt
));
174 return this->size()-1;
178 _M_insert_matcher(_MatcherT __m
)
180 this->push_back(_StateT(__m
));
181 return this->size()-1;
185 _M_insert_subexpr_begin()
187 auto __id
= _M_subexpr_count
++;
188 _M_paren_stack
.push_back(__id
);
189 this->push_back(_StateT(_S_opcode_subexpr_begin
, __id
));
190 return this->size()-1;
194 _M_insert_subexpr_end()
196 this->push_back(_StateT(_S_opcode_subexpr_end
, _M_paren_stack
.back()));
197 _M_paren_stack
.pop_back();
198 return this->size()-1;
202 _M_insert_backref(unsigned int __index
);
204 #ifdef _GLIBCXX_DEBUG
206 _M_dot(std::ostream
& __ostr
) const;
210 _StateIdT _M_start_state
;
211 _StateSet _M_accepting_states
;
212 _SizeT _M_subexpr_count
;
214 std::vector
<unsigned int> _M_paren_stack
;
217 /// Describes a sequence of one or more %_State, its current start
218 /// and end(s). This structure contains fragments of an NFA during
220 template<typename _CharT
, typename _TraitsT
>
224 typedef _NFA
<_CharT
, _TraitsT
> _RegexT
;
226 // Constructs a single-node sequence
227 _StateSeq(_RegexT
& __ss
, _StateIdT __s
,
228 _StateIdT __e
= _S_invalid_state_id
)
229 : _M_nfa(__ss
), _M_start(__s
), _M_end1(__s
), _M_end2(__e
)
231 // Constructs a split sequence from two other sequencces
232 _StateSeq(const _StateSeq
& __e1
, const _StateSeq
& __e2
)
233 : _M_nfa(__e1
._M_nfa
),
234 _M_start(_M_nfa
._M_insert_alt(__e1
._M_start
, __e2
._M_start
)),
235 _M_end1(__e1
._M_end1
), _M_end2(__e2
._M_end1
)
238 // Constructs a split sequence from a single sequence
239 _StateSeq(const _StateSeq
& __e
, _StateIdT __id
)
240 : _M_nfa(__e
._M_nfa
),
241 _M_start(_M_nfa
._M_insert_alt(__id
, __e
._M_start
)),
242 _M_end1(__id
), _M_end2(__e
._M_end1
)
245 // Constructs a copy of a %_StateSeq
246 _StateSeq(const _StateSeq
& __rhs
)
247 : _M_nfa(__rhs
._M_nfa
), _M_start(__rhs
._M_start
),
248 _M_end1(__rhs
._M_end1
), _M_end2(__rhs
._M_end2
)
251 _StateSeq
& operator=(const _StateSeq
& __rhs
);
257 // Extends a sequence by one.
259 _M_push_back(_StateIdT __id
);
261 // Extends and maybe joins a sequence.
263 _M_append(_StateIdT __id
);
266 _M_append(_StateSeq
& __rhs
);
268 // Clones an entire sequence.
280 _GLIBCXX_END_NAMESPACE_VERSION
281 } // namespace __detail
284 #include <bits/regex_automaton.tcc>