]>
Commit | Line | Data |
---|---|---|
6cb784b6 TS |
1 | // class template regex -*- C++ -*- |
2 | ||
3 | // Copyright (C) 2013 Free Software Foundation, Inc. | |
4 | // | |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | /** | |
26 | * @file bits/regex_executor.tcc | |
27 | * This is an internal header file, included by other library headers. | |
28 | * Do not attempt to use it directly. @headername{regex} | |
29 | */ | |
30 | ||
31 | namespace std _GLIBCXX_VISIBILITY(default) | |
32 | { | |
33 | namespace __detail | |
34 | { | |
35 | _GLIBCXX_BEGIN_NAMESPACE_VERSION | |
36 | ||
18971f1f TS |
37 | template<typename _BiIter, typename _Alloc, |
38 | typename _CharT, typename _TraitsT> | |
39 | bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
40 | _M_search() | |
41 | { | |
42 | if (_M_flags & regex_constants::match_continuous) | |
43 | return _M_search_from_first(); | |
44 | auto __cur = _M_begin; | |
45 | do | |
46 | { | |
47 | _M_match_mode = false; | |
48 | _M_init(__cur); | |
49 | if (_M_main()) | |
50 | return true; | |
51 | } | |
52 | // Continue when __cur == _M_end | |
53 | while (__cur++ != _M_end); | |
54 | return false; | |
55 | } | |
56 | ||
6cb784b6 TS |
57 | template<typename _BiIter, typename _Alloc, |
58 | typename _CharT, typename _TraitsT> | |
6cb784b6 TS |
59 | bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: |
60 | _M_dfs(_StateIdT __i) | |
61 | { | |
1b488e33 | 62 | auto& __current = this->_M_current; |
6cb784b6 TS |
63 | const auto& __state = _M_nfa[__i]; |
64 | bool __ret = false; | |
65 | switch (__state._M_opcode) | |
e280b6ff TS |
66 | { |
67 | case _S_opcode_alternative: | |
7b86458e TS |
68 | // Greedy or not, this is a question ;) |
69 | if (!__state._M_neg) | |
b21abcee TS |
70 | __ret = _M_dfs(__state._M_alt) |
71 | || _M_dfs(__state._M_next); | |
7b86458e | 72 | else |
b21abcee TS |
73 | __ret = _M_dfs(__state._M_next) |
74 | || _M_dfs(__state._M_alt); | |
e280b6ff TS |
75 | break; |
76 | case _S_opcode_subexpr_begin: | |
77 | // Here's the critical part: if there's nothing changed since last | |
78 | // visit, do NOT continue. This prevents the executor from get into | |
79 | // infinite loop when use "()*" to match "". | |
80 | // | |
b21abcee TS |
81 | // Every change on _M_cur_results will be roll back after the |
82 | // recursion step finished. | |
83 | if (!_M_cur_results[__state._M_subexpr].matched | |
84 | || _M_cur_results[__state._M_subexpr].first != __current) | |
e280b6ff | 85 | { |
468146e0 | 86 | auto __back = _M_cur_results[__state._M_subexpr].first; |
b21abcee TS |
87 | _M_cur_results[__state._M_subexpr].first = __current; |
88 | __ret = _M_dfs(__state._M_next); | |
89 | _M_cur_results[__state._M_subexpr].first = __back; | |
e280b6ff TS |
90 | } |
91 | break; | |
92 | case _S_opcode_subexpr_end: | |
b21abcee TS |
93 | if (_M_cur_results[__state._M_subexpr].second != __current |
94 | || _M_cur_results[__state._M_subexpr].matched != true) | |
e280b6ff | 95 | { |
b21abcee TS |
96 | auto __back = _M_cur_results[__state._M_subexpr]; |
97 | _M_cur_results[__state._M_subexpr].second = __current; | |
98 | _M_cur_results[__state._M_subexpr].matched = true; | |
99 | __ret = _M_dfs(__state._M_next); | |
100 | _M_cur_results[__state._M_subexpr] = __back; | |
e280b6ff TS |
101 | } |
102 | else | |
b21abcee | 103 | __ret = _M_dfs(__state._M_next); |
e280b6ff | 104 | break; |
7b86458e | 105 | case _S_opcode_line_begin_assertion: |
b21abcee TS |
106 | if (this->_M_at_begin()) |
107 | __ret = _M_dfs(__state._M_next); | |
7b86458e TS |
108 | break; |
109 | case _S_opcode_line_end_assertion: | |
b21abcee TS |
110 | if (this->_M_at_end()) |
111 | __ret = _M_dfs(__state._M_next); | |
7b86458e | 112 | break; |
7b86458e | 113 | case _S_opcode_word_boundry: |
b21abcee TS |
114 | if (this->_M_word_boundry(__state) == !__state._M_neg) |
115 | __ret = _M_dfs(__state._M_next); | |
7b86458e TS |
116 | break; |
117 | // Here __state._M_alt offers a single start node for a sub-NFA. | |
118 | // We recursivly invoke our algorithm to match the sub-NFA. | |
119 | case _S_opcode_subexpr_lookahead: | |
b21abcee TS |
120 | if (this->_M_lookahead(__state) == !__state._M_neg) |
121 | __ret = _M_dfs(__state._M_next); | |
7b86458e | 122 | break; |
e280b6ff | 123 | case _S_opcode_match: |
b21abcee | 124 | if (__current != this->_M_end && __state._M_matches(*__current)) |
e280b6ff TS |
125 | { |
126 | ++__current; | |
b21abcee | 127 | __ret = _M_dfs(__state._M_next); |
e280b6ff TS |
128 | --__current; |
129 | } | |
130 | break; | |
b21abcee | 131 | // First fetch the matched result from _M_cur_results as __submatch; |
e280b6ff TS |
132 | // then compare it with |
133 | // (__current, __current + (__submatch.second - __submatch.first)) | |
134 | // If matched, keep going; else just return to try another state. | |
135 | case _S_opcode_backref: | |
136 | { | |
b21abcee | 137 | auto& __submatch = _M_cur_results[__state._M_backref_index]; |
e280b6ff TS |
138 | if (!__submatch.matched) |
139 | break; | |
140 | auto __last = __current; | |
141 | for (auto __tmp = __submatch.first; | |
b21abcee | 142 | __last != this->_M_end && __tmp != __submatch.second; |
e280b6ff TS |
143 | ++__tmp) |
144 | ++__last; | |
b21abcee TS |
145 | if (this->_M_re._M_traits.transform(__submatch.first, |
146 | __submatch.second) | |
147 | == this->_M_re._M_traits.transform(__current, __last)) | |
e280b6ff TS |
148 | if (__last != __current) |
149 | { | |
150 | auto __backup = __current; | |
151 | __current = __last; | |
b21abcee | 152 | __ret = _M_dfs(__state._M_next); |
e280b6ff TS |
153 | __current = __backup; |
154 | } | |
155 | else | |
b21abcee | 156 | __ret = _M_dfs(__state._M_next); |
e280b6ff TS |
157 | } |
158 | break; | |
159 | case _S_opcode_accept: | |
b21abcee TS |
160 | if (this->_M_match_mode) |
161 | __ret = __current == this->_M_end; | |
e280b6ff TS |
162 | else |
163 | __ret = true; | |
b21abcee TS |
164 | if (__current == this->_M_begin |
165 | && (this->_M_flags & regex_constants::match_not_null)) | |
166 | __ret = false; | |
e280b6ff | 167 | if (__ret) |
c2669da9 | 168 | this->_M_set_results(_M_cur_results); |
e280b6ff TS |
169 | break; |
170 | default: | |
171 | _GLIBCXX_DEBUG_ASSERT(false); | |
172 | } | |
6cb784b6 TS |
173 | return __ret; |
174 | } | |
175 | ||
176 | template<typename _BiIter, typename _Alloc, | |
177 | typename _CharT, typename _TraitsT> | |
7b86458e | 178 | bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: |
b21abcee | 179 | _M_main() |
6cb784b6 | 180 | { |
18971f1f | 181 | _M_e_closure(); |
7b86458e | 182 | bool __ret = false; |
b21abcee TS |
183 | if (!this->_M_match_mode |
184 | && !(this->_M_flags & regex_constants::match_not_null)) | |
185 | __ret = _M_includes_some() || __ret; | |
6cb784b6 | 186 | while (this->_M_current != this->_M_end) |
e280b6ff | 187 | { |
e280b6ff TS |
188 | _M_move(); |
189 | ++this->_M_current; | |
18971f1f TS |
190 | if (_M_stack._M_empty()) |
191 | break; | |
e280b6ff | 192 | _M_e_closure(); |
b21abcee TS |
193 | if (!this->_M_match_mode) |
194 | // To keep regex_search greedy, no "return true" here. | |
195 | __ret = _M_includes_some() || __ret; | |
e280b6ff | 196 | } |
b21abcee TS |
197 | if (this->_M_match_mode) |
198 | __ret = _M_includes_some(); | |
7b86458e | 199 | if (__ret) |
c2669da9 | 200 | this->_M_set_results(_M_cur_results->_M_get()); |
18971f1f TS |
201 | _M_match_stack._M_clear(); |
202 | _GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty()); | |
7b86458e | 203 | return __ret; |
6cb784b6 TS |
204 | } |
205 | ||
6cb784b6 TS |
206 | template<typename _BiIter, typename _Alloc, |
207 | typename _CharT, typename _TraitsT> | |
208 | void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
209 | _M_e_closure() | |
210 | { | |
b21abcee | 211 | auto& __current = this->_M_current; |
7b86458e | 212 | |
18971f1f | 213 | while (!_M_stack._M_empty()) |
e280b6ff | 214 | { |
18971f1f TS |
215 | auto __u = _M_stack._M_pop(); |
216 | _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u)); | |
e280b6ff | 217 | const auto& __state = _M_nfa[__u]; |
6cb784b6 | 218 | |
7b86458e TS |
219 | // Can be implemented using method, but there will be too many |
220 | // arguments. I would use macro function before C++11, but lambda is | |
221 | // a better choice, since hopefully compiler can inline it. | |
18971f1f | 222 | auto __add_visited_state = [=](_StateIdT __v) |
e280b6ff | 223 | { |
18971f1f | 224 | if (_M_covered.count(__v) == 0) |
e280b6ff | 225 | { |
7b86458e TS |
226 | _M_covered[__v] = |
227 | _ResultsPtr(new _ResultsEntry(*_M_covered[__u])); | |
18971f1f TS |
228 | _M_stack._M_push(__v); |
229 | return; | |
230 | } | |
231 | auto& __cu = _M_covered[__u]; | |
232 | auto& __cv = _M_covered[__v]; | |
233 | if (*__cu < *__cv) | |
234 | { | |
235 | __cv = _ResultsPtr(new _ResultsEntry(*__cu)); | |
e280b6ff TS |
236 | // if a state is updated, it's outgoing neighbors should be |
237 | // reconsidered too. Push them to the queue. | |
18971f1f | 238 | _M_stack._M_push(__v); |
e280b6ff TS |
239 | } |
240 | }; | |
6cb784b6 | 241 | |
7b86458e | 242 | // Identical to DFS's switch part. |
e280b6ff TS |
243 | switch (__state._M_opcode) |
244 | { | |
7b86458e TS |
245 | // Needs to maintain quantifier count vector here. A quantifier |
246 | // must be concerned with a alt node. | |
e280b6ff | 247 | case _S_opcode_alternative: |
7b86458e TS |
248 | { |
249 | __add_visited_state(__state._M_next); | |
18971f1f TS |
250 | auto& __cu = *_M_covered[__u]; |
251 | auto __back = __cu._M_quant_keys[__state._M_quant_index]; | |
252 | __cu._M_inc(__state._M_quant_index, __state._M_neg); | |
7b86458e | 253 | __add_visited_state(__state._M_alt); |
18971f1f | 254 | __cu._M_quant_keys[__state._M_quant_index] = __back; |
7b86458e | 255 | } |
e280b6ff TS |
256 | break; |
257 | case _S_opcode_subexpr_begin: | |
258 | { | |
7b86458e TS |
259 | auto& __sub = (*_M_covered[__u])[__state._M_subexpr]; |
260 | if (!__sub.matched || __sub.first != __current) | |
261 | { | |
262 | auto __back = __sub.first; | |
263 | __sub.first = __current; | |
264 | __add_visited_state(__state._M_next); | |
265 | __sub.first = __back; | |
266 | } | |
e280b6ff TS |
267 | } |
268 | break; | |
269 | case _S_opcode_subexpr_end: | |
270 | { | |
271 | auto& __cu = *_M_covered[__u]; | |
272 | auto __back = __cu[__state._M_subexpr]; | |
273 | __cu[__state._M_subexpr].second = __current; | |
274 | __cu[__state._M_subexpr].matched = true; | |
275 | __add_visited_state(__state._M_next); | |
276 | __cu[__state._M_subexpr] = __back; | |
277 | } | |
278 | break; | |
7b86458e | 279 | case _S_opcode_line_begin_assertion: |
b21abcee | 280 | if (this->_M_at_begin()) |
7b86458e TS |
281 | __add_visited_state(__state._M_next); |
282 | break; | |
283 | case _S_opcode_line_end_assertion: | |
b21abcee | 284 | if (this->_M_at_end()) |
7b86458e TS |
285 | __add_visited_state(__state._M_next); |
286 | break; | |
287 | case _S_opcode_word_boundry: | |
b21abcee TS |
288 | if (this->_M_word_boundry(__state) == !__state._M_neg) |
289 | __add_visited_state(__state._M_next); | |
7b86458e TS |
290 | break; |
291 | case _S_opcode_subexpr_lookahead: | |
b21abcee TS |
292 | if (this->_M_lookahead(__state) == !__state._M_neg) |
293 | __add_visited_state(__state._M_next); | |
7b86458e | 294 | break; |
e280b6ff | 295 | case _S_opcode_match: |
18971f1f | 296 | _M_match_stack._M_push(__u); |
e280b6ff TS |
297 | break; |
298 | case _S_opcode_accept: | |
e280b6ff TS |
299 | break; |
300 | default: | |
301 | _GLIBCXX_DEBUG_ASSERT(false); | |
302 | } | |
303 | } | |
6cb784b6 TS |
304 | } |
305 | ||
306 | template<typename _BiIter, typename _Alloc, | |
307 | typename _CharT, typename _TraitsT> | |
308 | void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
309 | _M_move() | |
310 | { | |
311 | decltype(_M_covered) __next; | |
18971f1f | 312 | while (!_M_match_stack._M_empty()) |
e280b6ff | 313 | { |
18971f1f TS |
314 | auto __u = _M_match_stack._M_pop(); |
315 | const auto& __state = _M_nfa[__u]; | |
316 | auto& __cu = _M_covered[__u]; | |
317 | if (__state._M_matches(*this->_M_current) | |
318 | && (__next.count(__state._M_next) == 0 | |
319 | || *__cu < *__next[__state._M_next])) | |
320 | { | |
321 | __next[__state._M_next] = std::move(__cu); | |
322 | _M_stack._M_push(__state._M_next); | |
323 | } | |
e280b6ff | 324 | } |
6cb784b6 TS |
325 | _M_covered = move(__next); |
326 | } | |
327 | ||
328 | template<typename _BiIter, typename _Alloc, | |
329 | typename _CharT, typename _TraitsT> | |
330 | bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
7b86458e | 331 | _M_includes_some() |
6cb784b6 | 332 | { |
7b86458e | 333 | bool __succ = false; |
18971f1f TS |
334 | for (auto __u : _M_nfa._M_final_states()) |
335 | if (_M_covered.count(__u)) | |
336 | { | |
337 | __succ = true; | |
338 | auto& __cu = _M_covered[__u]; | |
339 | if (_M_cur_results == nullptr || *__cu < *_M_cur_results) | |
340 | _M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu)); | |
341 | } | |
7b86458e | 342 | return __succ; |
6cb784b6 TS |
343 | } |
344 | ||
b21abcee TS |
345 | // Return whether now is at some word boundry. |
346 | template<typename _BiIter, typename _Alloc, | |
347 | typename _CharT, typename _TraitsT> | |
348 | bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
349 | _M_word_boundry(_State<_CharT, _TraitsT> __state) const | |
350 | { | |
351 | // By definition. | |
352 | bool __ans = false; | |
353 | auto __pre = _M_current; | |
354 | --__pre; | |
355 | if (!(_M_at_begin() && _M_at_end())) | |
356 | if (_M_at_begin()) | |
357 | __ans = _M_is_word(*_M_current) | |
358 | && !(_M_flags & regex_constants::match_not_bow); | |
359 | else if (_M_at_end()) | |
360 | __ans = _M_is_word(*__pre) | |
361 | && !(_M_flags & regex_constants::match_not_eow); | |
362 | else | |
363 | __ans = _M_is_word(*_M_current) | |
364 | != _M_is_word(*__pre); | |
365 | return __ans; | |
366 | } | |
367 | ||
c2669da9 TS |
368 | template<typename _BiIter, typename _Alloc, |
369 | typename _CharT, typename _TraitsT> | |
370 | void _Executor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
371 | _M_set_results(_ResultsVec& __cur_results) | |
372 | { | |
6cb43087 | 373 | for (size_t __i = 0; __i < __cur_results.size(); ++__i) |
c2669da9 TS |
374 | if (__cur_results[__i].matched) |
375 | _M_results[__i] = __cur_results[__i]; | |
376 | } | |
377 | ||
6cb43087 TS |
378 | enum class _RegexExecutorPolicy : int |
379 | { _S_auto, _S_force_dfs }; | |
380 | ||
6cb784b6 | 381 | template<typename _BiIter, typename _Alloc, |
6cb43087 TS |
382 | typename _CharT, typename _TraitsT, |
383 | _RegexExecutorPolicy __policy> | |
6cb784b6 TS |
384 | std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> |
385 | __get_executor(_BiIter __b, | |
e280b6ff | 386 | _BiIter __e, |
c2669da9 | 387 | std::vector<sub_match<_BiIter>, _Alloc>& __m, |
e280b6ff TS |
388 | const basic_regex<_CharT, _TraitsT>& __re, |
389 | regex_constants::match_flag_type __flags) | |
6cb784b6 TS |
390 | { |
391 | typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> | |
e280b6ff | 392 | _ExecutorPtr; |
6cb784b6 | 393 | typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT; |
1b488e33 | 394 | typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT; |
6cb784b6 | 395 | auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>> |
e280b6ff | 396 | (__re._M_automaton); |
6cb43087 TS |
397 | if (__policy == _RegexExecutorPolicy::_S_force_dfs |
398 | || (__policy == _RegexExecutorPolicy::_S_auto && __p->_M_has_backref)) | |
b21abcee TS |
399 | return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags)); |
400 | return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags)); | |
6cb784b6 TS |
401 | } |
402 | ||
403 | _GLIBCXX_END_NAMESPACE_VERSION | |
404 | } // namespace __detail | |
405 | } // namespace |