]> git.ipfire.org Git - thirdparty/gcc.git/blob - libstdc++-v3/include/bits/regex_compiler.tcc
regex_automaton.h: Rearrange _NFA's layout.
[thirdparty/gcc.git] / libstdc++-v3 / include / bits / regex_compiler.tcc
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex_compiler.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 template<typename _FwdIter, typename _CharT, typename _TraitsT>
38 _Compiler<_FwdIter, _CharT, _TraitsT>::
39 _Compiler(_FwdIter __b, _FwdIter __e,
40 const _TraitsT& __traits, _FlagT __flags)
41 : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
42 _M_state_store(__flags), _M_flags(__flags)
43 {
44 _StateSeqT __r(_M_state_store,
45 _M_state_store._M_insert_subexpr_begin());
46 _M_disjunction();
47 if (!_M_stack.empty())
48 {
49 __r._M_append(_M_stack.top());
50 _M_stack.pop();
51 }
52 __r._M_append(_M_state_store._M_insert_subexpr_end());
53 __r._M_append(_M_state_store._M_insert_accept());
54 }
55
56 template<typename _FwdIter, typename _CharT, typename _TraitsT>
57 bool
58 _Compiler<_FwdIter, _CharT, _TraitsT>::
59 _M_match_token(_TokenT token)
60 {
61 if (token == _M_scanner._M_get_token())
62 {
63 _M_value = _M_scanner._M_get_value();
64 _M_scanner._M_advance();
65 return true;
66 }
67 return false;
68 }
69
70 template<typename _FwdIter, typename _CharT, typename _TraitsT>
71 void
72 _Compiler<_FwdIter, _CharT, _TraitsT>::
73 _M_disjunction()
74 {
75 this->_M_alternative();
76 if (_M_match_token(_ScannerT::_S_token_or))
77 {
78 _StateSeqT __alt1 = _M_stack.top(); _M_stack.pop();
79 this->_M_disjunction();
80 _StateSeqT __alt2 = _M_stack.top(); _M_stack.pop();
81 _M_stack.push(_StateSeqT(__alt1, __alt2));
82 }
83 }
84
85 template<typename _FwdIter, typename _CharT, typename _TraitsT>
86 void
87 _Compiler<_FwdIter, _CharT, _TraitsT>::
88 _M_alternative()
89 {
90 if (this->_M_term())
91 {
92 _StateSeqT __re = _M_stack.top(); _M_stack.pop();
93 this->_M_alternative();
94 if (!_M_stack.empty())
95 {
96 __re._M_append(_M_stack.top());
97 _M_stack.pop();
98 }
99 _M_stack.push(__re);
100 }
101 }
102
103 template<typename _FwdIter, typename _CharT, typename _TraitsT>
104 bool
105 _Compiler<_FwdIter, _CharT, _TraitsT>::
106 _M_term()
107 {
108 if (this->_M_assertion())
109 return true;
110 if (this->_M_atom())
111 {
112 this->_M_quantifier();
113 return true;
114 }
115 return false;
116 }
117
118 // TODO Implement it.
119 template<typename _FwdIter, typename _CharT, typename _TraitsT>
120 bool
121 _Compiler<_FwdIter, _CharT, _TraitsT>::
122 _M_assertion()
123 {
124 return false;
125 }
126
127 template<typename _FwdIter, typename _CharT, typename _TraitsT>
128 void
129 _Compiler<_FwdIter, _CharT, _TraitsT>::
130 _M_quantifier()
131 {
132 if (_M_match_token(_ScannerT::_S_token_closure0))
133 {
134 if (_M_stack.empty())
135 __throw_regex_error(regex_constants::error_badrepeat);
136 _StateSeqT __r(_M_stack.top(), -1);
137 __r._M_append(__r._M_front());
138 _M_stack.pop();
139 _M_stack.push(__r);
140 return;
141 }
142 if (_M_match_token(_ScannerT::_S_token_closure1))
143 {
144 if (_M_stack.empty())
145 __throw_regex_error(regex_constants::error_badrepeat);
146 _StateSeqT __r(_M_state_store,
147 _M_state_store.
148 _M_insert_alt(_S_invalid_state_id,
149 _M_stack.top()._M_front()));
150 _M_stack.top()._M_append(__r);
151 return;
152 }
153 if (_M_match_token(_ScannerT::_S_token_opt))
154 {
155 if (_M_stack.empty())
156 __throw_regex_error(regex_constants::error_badrepeat);
157 _StateSeqT __r(_M_stack.top(), -1);
158 _M_stack.pop();
159 _M_stack.push(__r);
160 return;
161 }
162 if (_M_match_token(_ScannerT::_S_token_interval_begin))
163 {
164 if (_M_stack.empty())
165 __throw_regex_error(regex_constants::error_badrepeat);
166 if (!_M_match_token(_ScannerT::_S_token_dup_count))
167 __throw_regex_error(regex_constants::error_badbrace);
168 _StateSeqT __r(_M_stack.top());
169 int __min_rep = _M_cur_int_value(10);
170 for (int __i = 1; __i < __min_rep; ++__i)
171 _M_stack.top()._M_append(__r._M_clone());
172 if (_M_match_token(_ScannerT::_S_token_comma))
173 if (_M_match_token(_ScannerT::_S_token_dup_count))
174 {
175 int __n = _M_cur_int_value(10) - __min_rep;
176 if (__n < 0)
177 __throw_regex_error(regex_constants::error_badbrace);
178 for (int __i = 0; __i < __n; ++__i)
179 {
180 _StateSeqT __r(_M_state_store,
181 _M_state_store.
182 _M_insert_alt(_S_invalid_state_id,
183 _M_stack.top()._M_front()));
184 _M_stack.top()._M_append(__r);
185 }
186 }
187 else
188 {
189 _StateSeqT __r(_M_stack.top(), -1);
190 __r._M_push_back(__r._M_front());
191 _M_stack.pop();
192 _M_stack.push(__r);
193 }
194 if (!_M_match_token(_ScannerT::_S_token_interval_end))
195 __throw_regex_error(regex_constants::error_brace);
196 return;
197 }
198 }
199
200 template<typename _FwdIter, typename _CharT, typename _TraitsT>
201 bool
202 _Compiler<_FwdIter, _CharT, _TraitsT>::
203 _M_atom()
204 {
205 if (_M_match_token(_ScannerT::_S_token_anychar))
206 {
207 _M_stack.push(_StateSeqT(_M_state_store,
208 _M_state_store._M_insert_matcher
209 (_AnyMatcher<_CharT, _TraitsT>(_M_traits))));
210 return true;
211 }
212 if (_M_try_char())
213 {
214 _M_stack.push(_StateSeqT(_M_state_store,
215 _M_state_store._M_insert_matcher
216 (_CharMatcher<_CharT, _TraitsT>(_M_value[0],
217 _M_traits,
218 _M_flags))));
219 return true;
220 }
221 if (_M_match_token(_ScannerT::_S_token_backref))
222 {
223 _M_stack.push(_StateSeqT(_M_state_store, _M_state_store.
224 _M_insert_backref(_M_cur_int_value(10))));
225 return true;
226 }
227 if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
228 {
229 int __mark = _M_state_store._M_sub_count();
230 _StateSeqT __r(_M_state_store,
231 _M_state_store.
232 _M_insert_subexpr_begin());
233 this->_M_disjunction();
234 if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
235 __throw_regex_error(regex_constants::error_paren);
236 if (!_M_stack.empty())
237 {
238 __r._M_append(_M_stack.top());
239 _M_stack.pop();
240 }
241 __r._M_append(_M_state_store._M_insert_subexpr_end());
242 _M_stack.push(__r);
243 return true;
244 }
245 return _M_bracket_expression();
246 }
247
248 template<typename _FwdIter, typename _CharT, typename _TraitsT>
249 bool
250 _Compiler<_FwdIter, _CharT, _TraitsT>::
251 _M_bracket_expression()
252 {
253 bool __neg =
254 _M_match_token(_ScannerT::_S_token_bracket_neg_begin);
255 if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
256 return false;
257 _BMatcherT __matcher(__neg, _M_traits, _M_flags);
258 _M_bracket_list(__matcher);
259 _M_stack.push(_StateSeqT(_M_state_store,
260 _M_state_store._M_insert_matcher(__matcher)));
261 return true;
262 }
263
264 template<typename _FwdIter, typename _CharT, typename _TraitsT>
265 void
266 _Compiler<_FwdIter, _CharT, _TraitsT>::
267 _M_bracket_list(_BMatcherT& __matcher)
268 {
269 if (_M_match_token(_ScannerT::_S_token_bracket_end))
270 return;
271 _M_expression_term(__matcher);
272 _M_bracket_list(__matcher);
273 return;
274 }
275
276 template<typename _FwdIter, typename _CharT, typename _TraitsT>
277 void
278 _Compiler<_FwdIter, _CharT, _TraitsT>::
279 _M_expression_term(_BMatcherT& __matcher)
280 {
281 if (_M_match_token(_ScannerT::_S_token_collsymbol))
282 {
283 __matcher._M_add_collating_element(_M_value);
284 return;
285 }
286 if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
287 {
288 __matcher._M_add_equivalence_class(_M_value);
289 return;
290 }
291 if (_M_match_token(_ScannerT::_S_token_char_class_name))
292 {
293 __matcher._M_add_character_class(_M_value);
294 return;
295 }
296 if (_M_try_char()) // [a
297 {
298 auto __ch = _M_value[0];
299 if (_M_try_char())
300 {
301 if (_M_value[0] == std::use_facet<std::ctype<_CharT>>
302 (_M_traits.getloc()).widen('-')) // [a-
303 {
304 if (_M_try_char()) // [a-z]
305 {
306 __matcher._M_make_range(__ch, _M_value[0]);
307 return;
308 }
309 // If the dash is the last character in the bracket
310 // expression, it is not special.
311 if (_M_scanner._M_get_token()
312 != _ScannerT::_S_token_bracket_end)
313 __throw_regex_error(regex_constants::error_range);
314 }
315 __matcher._M_add_char(_M_value[0]);
316 }
317 __matcher._M_add_char(__ch);
318 return;
319 }
320 __throw_regex_error(regex_constants::error_brack);
321 }
322
323 template<typename _FwdIter, typename _CharT, typename _TraitsT>
324 bool
325 _Compiler<_FwdIter, _CharT, _TraitsT>::
326 _M_try_char()
327 {
328 bool __is_char = false;
329 if (_M_match_token(_ScannerT::_S_token_oct_num))
330 {
331 __is_char = true;
332 _M_value.assign(1, _M_cur_int_value(8));
333 }
334 else if (_M_match_token(_ScannerT::_S_token_hex_num))
335 {
336 __is_char = true;
337 _M_value.assign(1, _M_cur_int_value(16));
338 }
339 else if (_M_match_token(_ScannerT::_S_token_ord_char))
340 __is_char = true;
341 return __is_char;
342 }
343
344 template<typename _FwdIter, typename _CharT, typename _TraitsT>
345 int
346 _Compiler<_FwdIter, _CharT, _TraitsT>::
347 _M_cur_int_value(int __radix)
348 {
349 int __v = 0;
350 for (typename _StringT::size_type __i = 0;
351 __i < _M_value.length(); ++__i)
352 __v =__v * __radix + _M_traits.value(_M_value[__i], __radix);
353 return __v;
354 }
355
356 template<typename _CharT, typename _TraitsT>
357 bool _BracketMatcher<_CharT, _TraitsT>::
358 operator()(_CharT __ch) const
359 {
360 bool __ret = false;
361 if (_M_traits.isctype(__ch, _M_class_set))
362 __ret = true;
363 else if (_M_char_set.count(_M_translate(__ch)))
364 __ret = true;
365 else
366 {
367 _StringT __s = _M_get_str(_M_flags & regex_constants::collate
368 ? _M_translate(__ch) : __ch);
369 for (auto& __it : _M_range_set)
370 if (__it.first <= __s && __s <= __it.second)
371 {
372 __ret = true;
373 break;
374 }
375 }
376 if (_M_is_non_matching)
377 return !__ret;
378 else
379 return __ret;
380 }
381
382 _GLIBCXX_END_NAMESPACE_VERSION
383 } // namespace __detail
384 } // namespace