]>
Commit | Line | Data |
---|---|---|
6cb784b6 TS |
1 | // class template regex -*- C++ -*- |
2 | ||
3 | // Copyright (C) 2013 Free Software Foundation, Inc. | |
4 | // | |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | /** | |
26 | * @file bits/regex_executor.tcc | |
27 | * This is an internal header file, included by other library headers. | |
28 | * Do not attempt to use it directly. @headername{regex} | |
29 | */ | |
30 | ||
31 | namespace std _GLIBCXX_VISIBILITY(default) | |
32 | { | |
33 | namespace __detail | |
34 | { | |
35 | _GLIBCXX_BEGIN_NAMESPACE_VERSION | |
36 | ||
6cb784b6 TS |
37 | template<typename _BiIter, typename _Alloc, |
38 | typename _CharT, typename _TraitsT> | |
39 | template<bool __match_mode> | |
40 | bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
41 | _M_dfs(_StateIdT __i) | |
42 | { | |
6cb784b6 | 43 | if (__i == _S_invalid_state_id) |
e280b6ff TS |
44 | // This is not that certain. Need deeper investigate. |
45 | return false; | |
1b488e33 | 46 | auto& __current = this->_M_current; |
7b86458e | 47 | auto& __begin = this->_M_begin; |
1b488e33 | 48 | auto& __end = this->_M_end; |
7b86458e | 49 | auto& __results = _M_cur_results; |
6cb784b6 TS |
50 | const auto& __state = _M_nfa[__i]; |
51 | bool __ret = false; | |
52 | switch (__state._M_opcode) | |
e280b6ff TS |
53 | { |
54 | case _S_opcode_alternative: | |
7b86458e TS |
55 | // Greedy or not, this is a question ;) |
56 | if (!__state._M_neg) | |
57 | __ret = _M_dfs<__match_mode>(__state._M_alt) | |
58 | || _M_dfs<__match_mode>(__state._M_next); | |
59 | else | |
60 | __ret = _M_dfs<__match_mode>(__state._M_next) | |
61 | || _M_dfs<__match_mode>(__state._M_alt); | |
e280b6ff TS |
62 | break; |
63 | case _S_opcode_subexpr_begin: | |
64 | // Here's the critical part: if there's nothing changed since last | |
65 | // visit, do NOT continue. This prevents the executor from get into | |
66 | // infinite loop when use "()*" to match "". | |
67 | // | |
68 | // Every change on __results will be roll back after the recursion | |
69 | // step finished. | |
70 | if (!__results[__state._M_subexpr].matched | |
71 | || __results[__state._M_subexpr].first != __current) | |
72 | { | |
73 | auto __back = __current; | |
74 | __results[__state._M_subexpr].first = __current; | |
75 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
76 | __results[__state._M_subexpr].first = __back; | |
77 | } | |
78 | break; | |
79 | case _S_opcode_subexpr_end: | |
80 | if (__results[__state._M_subexpr].second != __current | |
81 | || __results[__state._M_subexpr].matched != true) | |
82 | { | |
83 | auto __back = __results[__state._M_subexpr]; | |
84 | __results[__state._M_subexpr].second = __current; | |
85 | __results[__state._M_subexpr].matched = true; | |
86 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
87 | __results[__state._M_subexpr] = __back; | |
88 | } | |
89 | else | |
90 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
91 | break; | |
7b86458e TS |
92 | case _S_opcode_line_begin_assertion: |
93 | if (__current == __begin) | |
94 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
95 | break; | |
96 | case _S_opcode_line_end_assertion: | |
97 | if (__current == __end) | |
98 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
99 | break; | |
100 | // By definition. | |
101 | case _S_opcode_word_boundry: | |
102 | { | |
103 | bool __ans = false; | |
104 | if (__current == __begin && this->_M_is_word(*__current)) | |
105 | __ans = true; | |
106 | else if (__current == __end && this->_M_is_word(*__current)) | |
107 | __ans = true; | |
108 | else | |
109 | { | |
110 | auto __pre = __current; | |
111 | --__pre; | |
112 | if (this->_M_is_word(*__current) | |
113 | != this->_M_is_word(*__pre)) | |
114 | __ans = true; | |
115 | } | |
116 | if (__ans == !__state._M_neg) | |
117 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
118 | } | |
119 | break; | |
120 | // Here __state._M_alt offers a single start node for a sub-NFA. | |
121 | // We recursivly invoke our algorithm to match the sub-NFA. | |
122 | case _S_opcode_subexpr_lookahead: | |
123 | { | |
124 | _ResultsT __m; | |
125 | // FIXME Here's not necessarily a DFSExecutor. But we need to | |
126 | // refactor the whole NFA to a recursive tree structure first. | |
127 | _DFSExecutor __sub(this->_M_current, | |
128 | this->_M_end, | |
129 | __m, | |
130 | this->_M_nfa, | |
131 | this->_M_traits, | |
132 | this->_M_flags); | |
133 | __sub._M_start_state = __state._M_alt; | |
134 | if (__sub._M_search_from_first() == !__state._M_neg) | |
135 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
136 | } | |
137 | break; | |
e280b6ff TS |
138 | case _S_opcode_match: |
139 | if (__current != __end && __state._M_matches(*__current)) | |
140 | { | |
141 | ++__current; | |
142 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
143 | --__current; | |
144 | } | |
145 | break; | |
146 | // First fetch the matched result from __results as __submatch; | |
147 | // then compare it with | |
148 | // (__current, __current + (__submatch.second - __submatch.first)) | |
149 | // If matched, keep going; else just return to try another state. | |
150 | case _S_opcode_backref: | |
151 | { | |
152 | auto& __submatch = __results[__state._M_backref_index]; | |
153 | if (!__submatch.matched) | |
154 | break; | |
155 | auto __last = __current; | |
156 | for (auto __tmp = __submatch.first; | |
157 | __last != __end && __tmp != __submatch.second; | |
158 | ++__tmp) | |
159 | ++__last; | |
160 | if (_M_traits.transform(__submatch.first, __submatch.second) | |
161 | == _M_traits.transform(__current, __last)) | |
162 | if (__last != __current) | |
163 | { | |
164 | auto __backup = __current; | |
165 | __current = __last; | |
166 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
167 | __current = __backup; | |
168 | } | |
169 | else | |
170 | __ret = _M_dfs<__match_mode>(__state._M_next); | |
171 | } | |
172 | break; | |
173 | case _S_opcode_accept: | |
174 | if (__match_mode) | |
175 | __ret = __current == __end; | |
176 | else | |
177 | __ret = true; | |
178 | if (__ret) | |
179 | this->_M_results = __results; | |
180 | break; | |
181 | default: | |
182 | _GLIBCXX_DEBUG_ASSERT(false); | |
183 | } | |
6cb784b6 TS |
184 | return __ret; |
185 | } | |
186 | ||
187 | template<typename _BiIter, typename _Alloc, | |
188 | typename _CharT, typename _TraitsT> | |
189 | template<bool __match_mode> | |
7b86458e | 190 | bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: |
6cb784b6 TS |
191 | _M_main_loop() |
192 | { | |
7b86458e | 193 | bool __ret = false; |
6cb784b6 | 194 | while (this->_M_current != this->_M_end) |
e280b6ff TS |
195 | { |
196 | if (!__match_mode) | |
7b86458e TS |
197 | // To keep regex_search greedy, no "return true" here. |
198 | __ret = _M_includes_some() || __ret; | |
e280b6ff TS |
199 | _M_move(); |
200 | ++this->_M_current; | |
201 | _M_e_closure(); | |
202 | } | |
7b86458e TS |
203 | __ret = _M_includes_some() || __ret; |
204 | if (__ret) | |
205 | this->_M_results = _M_cur_results->_M_get(); | |
206 | return __ret; | |
6cb784b6 TS |
207 | } |
208 | ||
6cb784b6 TS |
209 | template<typename _BiIter, typename _Alloc, |
210 | typename _CharT, typename _TraitsT> | |
211 | void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
212 | _M_e_closure() | |
213 | { | |
214 | auto& __current = this->_M_current; | |
215 | std::queue<_StateIdT> __q; | |
216 | std::vector<bool> __in_q(_M_nfa.size(), false); | |
7b86458e TS |
217 | auto& __begin = this->_M_begin; |
218 | auto& __end = this->_M_end; | |
219 | ||
6cb784b6 | 220 | for (auto& __it : _M_covered) |
e280b6ff TS |
221 | { |
222 | __in_q[__it.first] = true; | |
223 | __q.push(__it.first); | |
224 | } | |
6cb784b6 | 225 | while (!__q.empty()) |
e280b6ff TS |
226 | { |
227 | auto __u = __q.front(); | |
228 | __q.pop(); | |
229 | __in_q[__u] = false; | |
230 | const auto& __state = _M_nfa[__u]; | |
6cb784b6 | 231 | |
7b86458e TS |
232 | // Can be implemented using method, but there will be too many |
233 | // arguments. I would use macro function before C++11, but lambda is | |
234 | // a better choice, since hopefully compiler can inline it. | |
e280b6ff TS |
235 | auto __add_visited_state = [&](_StateIdT __v) |
236 | { | |
237 | if (__v == _S_invalid_state_id) | |
238 | return; | |
239 | if (_M_covered.count(__u) != 0 | |
240 | && (_M_covered.count(__v) == 0 | |
7b86458e | 241 | || *_M_covered[__u] < *_M_covered[__v])) |
e280b6ff | 242 | { |
7b86458e TS |
243 | _M_covered[__v] = |
244 | _ResultsPtr(new _ResultsEntry(*_M_covered[__u])); | |
e280b6ff TS |
245 | // if a state is updated, it's outgoing neighbors should be |
246 | // reconsidered too. Push them to the queue. | |
247 | if (!__in_q[__v]) | |
248 | { | |
249 | __in_q[__v] = true; | |
250 | __q.push(__v); | |
251 | } | |
252 | } | |
253 | }; | |
6cb784b6 | 254 | |
7b86458e | 255 | // Identical to DFS's switch part. |
e280b6ff TS |
256 | switch (__state._M_opcode) |
257 | { | |
7b86458e TS |
258 | // Needs to maintain quantifier count vector here. A quantifier |
259 | // must be concerned with a alt node. | |
e280b6ff | 260 | case _S_opcode_alternative: |
7b86458e TS |
261 | { |
262 | __add_visited_state(__state._M_next); | |
263 | auto __back = | |
264 | _M_covered[__u]->_M_quant_keys[__state._M_quant_index]; | |
265 | _M_covered[__u]->_M_inc(__state._M_quant_index, | |
266 | __state._M_neg); | |
267 | __add_visited_state(__state._M_alt); | |
268 | _M_covered[__u]->_M_quant_keys[__state._M_quant_index] | |
269 | = __back; | |
270 | } | |
e280b6ff TS |
271 | break; |
272 | case _S_opcode_subexpr_begin: | |
273 | { | |
7b86458e TS |
274 | auto& __sub = (*_M_covered[__u])[__state._M_subexpr]; |
275 | if (!__sub.matched || __sub.first != __current) | |
276 | { | |
277 | auto __back = __sub.first; | |
278 | __sub.first = __current; | |
279 | __add_visited_state(__state._M_next); | |
280 | __sub.first = __back; | |
281 | } | |
e280b6ff TS |
282 | } |
283 | break; | |
284 | case _S_opcode_subexpr_end: | |
285 | { | |
286 | auto& __cu = *_M_covered[__u]; | |
287 | auto __back = __cu[__state._M_subexpr]; | |
288 | __cu[__state._M_subexpr].second = __current; | |
289 | __cu[__state._M_subexpr].matched = true; | |
290 | __add_visited_state(__state._M_next); | |
291 | __cu[__state._M_subexpr] = __back; | |
292 | } | |
293 | break; | |
7b86458e TS |
294 | case _S_opcode_line_begin_assertion: |
295 | if (__current == __begin) | |
296 | __add_visited_state(__state._M_next); | |
297 | break; | |
298 | case _S_opcode_line_end_assertion: | |
299 | if (__current == __end) | |
300 | __add_visited_state(__state._M_next); | |
301 | break; | |
302 | case _S_opcode_word_boundry: | |
303 | { | |
304 | bool __ans = false; | |
305 | if (__current == __begin && this->_M_is_word(*__current)) | |
306 | __ans = true; | |
307 | else if (__current == __end && this->_M_is_word(*__current)) | |
308 | __ans = true; | |
309 | else | |
310 | { | |
311 | auto __pre = __current; | |
312 | --__pre; | |
313 | if (this->_M_is_word(*__current) | |
314 | != this->_M_is_word(*__pre)) | |
315 | __ans = true; | |
316 | } | |
317 | if (__ans == !__state._M_neg) | |
318 | __add_visited_state(__state._M_next); | |
319 | } | |
320 | break; | |
321 | case _S_opcode_subexpr_lookahead: | |
322 | { | |
323 | _ResultsT __m; | |
324 | // Same comment as in DFS. | |
325 | _BFSExecutor __sub(this->_M_current, | |
326 | this->_M_end, | |
327 | __m, | |
328 | this->_M_nfa, | |
329 | this->_M_traits, | |
330 | this->_M_flags); | |
331 | __sub._M_start_state = __state._M_alt; | |
332 | if (__sub._M_search_from_first() == !__state._M_neg) | |
333 | __add_visited_state(__state._M_next); | |
334 | } | |
335 | break; | |
e280b6ff TS |
336 | case _S_opcode_match: |
337 | break; | |
338 | case _S_opcode_accept: | |
e280b6ff TS |
339 | break; |
340 | default: | |
341 | _GLIBCXX_DEBUG_ASSERT(false); | |
342 | } | |
343 | } | |
6cb784b6 TS |
344 | } |
345 | ||
346 | template<typename _BiIter, typename _Alloc, | |
347 | typename _CharT, typename _TraitsT> | |
348 | void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
349 | _M_move() | |
350 | { | |
351 | decltype(_M_covered) __next; | |
352 | for (auto& __it : _M_covered) | |
e280b6ff TS |
353 | { |
354 | const auto& __state = _M_nfa[__it.first]; | |
355 | if (__state._M_opcode == _S_opcode_match | |
356 | && __state._M_matches(*this->_M_current)) | |
357 | if (__state._M_next != _S_invalid_state_id) | |
358 | if (__next.count(__state._M_next) == 0 | |
7b86458e | 359 | || *__it.second < *__next[__state._M_next]) |
e280b6ff TS |
360 | __next[__state._M_next] = move(__it.second); |
361 | } | |
6cb784b6 TS |
362 | _M_covered = move(__next); |
363 | } | |
364 | ||
365 | template<typename _BiIter, typename _Alloc, | |
366 | typename _CharT, typename _TraitsT> | |
367 | bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: | |
7b86458e | 368 | _M_includes_some() |
6cb784b6 TS |
369 | { |
370 | auto& __s = _M_nfa._M_final_states(); | |
371 | auto& __t = _M_covered; | |
7b86458e | 372 | bool __succ = false; |
6cb784b6 | 373 | if (__s.size() > 0 && __t.size() > 0) |
e280b6ff TS |
374 | { |
375 | auto __first = __s.begin(); | |
376 | auto __second = __t.begin(); | |
377 | while (__first != __s.end() && __second != __t.end()) | |
378 | { | |
379 | if (*__first < __second->first) | |
380 | ++__first; | |
7b86458e | 381 | else if (*__first > __second->first) |
e280b6ff TS |
382 | ++__second; |
383 | else | |
384 | { | |
7b86458e TS |
385 | if (_M_cur_results == nullptr |
386 | || *__second->second < *_M_cur_results) | |
387 | _M_cur_results = | |
388 | _ResultsPtr(new _ResultsEntry(*__second->second)); | |
389 | __succ = true; | |
390 | ++__first; | |
391 | ++__second; | |
e280b6ff TS |
392 | } |
393 | } | |
394 | } | |
7b86458e | 395 | return __succ; |
6cb784b6 TS |
396 | } |
397 | ||
398 | template<typename _BiIter, typename _Alloc, | |
399 | typename _CharT, typename _TraitsT> | |
400 | std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> | |
401 | __get_executor(_BiIter __b, | |
e280b6ff TS |
402 | _BiIter __e, |
403 | match_results<_BiIter, _Alloc>& __m, | |
404 | const basic_regex<_CharT, _TraitsT>& __re, | |
405 | regex_constants::match_flag_type __flags) | |
6cb784b6 TS |
406 | { |
407 | typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> | |
e280b6ff | 408 | _ExecutorPtr; |
6cb784b6 | 409 | typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT; |
1b488e33 | 410 | typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT; |
6cb784b6 | 411 | auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>> |
e280b6ff | 412 | (__re._M_automaton); |
6cb784b6 | 413 | if (__p->_M_has_backref) |
9222fb6f TS |
414 | return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, |
415 | __re._M_traits, __flags)); | |
7b86458e TS |
416 | return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p, |
417 | __re._M_traits, __flags)); | |
6cb784b6 TS |
418 | } |
419 | ||
420 | _GLIBCXX_END_NAMESPACE_VERSION | |
421 | } // namespace __detail | |
422 | } // namespace |