]>
Commit | Line | Data |
---|---|---|
33fbbb76 TS |
1 | // class template regex -*- C++ -*- |
2 | ||
99dee823 | 3 | // Copyright (C) 2013-2021 Free Software Foundation, Inc. |
33fbbb76 TS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | /** | |
26 | * @file bits/regex_scanner.h | |
27 | * This is an internal header file, included by other library headers. | |
28 | * Do not attempt to use it directly. @headername{regex} | |
29 | */ | |
30 | ||
31 | namespace std _GLIBCXX_VISIBILITY(default) | |
32 | { | |
33fbbb76 TS |
33 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
34 | ||
4a15d842 FD |
35 | namespace __detail |
36 | { | |
33fbbb76 TS |
37 | /** |
38 | * @addtogroup regex-detail | |
39 | * @{ | |
40 | */ | |
41 | ||
ddf41e9d TS |
42 | struct _ScannerBase |
43 | { | |
44 | public: | |
45 | /// Token types returned from the scanner. | |
4aebb4e4 | 46 | enum _TokenT : unsigned |
ddf41e9d TS |
47 | { |
48 | _S_token_anychar, | |
49 | _S_token_ord_char, | |
50 | _S_token_oct_num, | |
51 | _S_token_hex_num, | |
52 | _S_token_backref, | |
53 | _S_token_subexpr_begin, | |
54 | _S_token_subexpr_no_group_begin, | |
55 | _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n' | |
56 | _S_token_subexpr_end, | |
57 | _S_token_bracket_begin, | |
58 | _S_token_bracket_neg_begin, | |
59 | _S_token_bracket_end, | |
60 | _S_token_interval_begin, | |
61 | _S_token_interval_end, | |
62 | _S_token_quoted_class, | |
63 | _S_token_char_class_name, | |
64 | _S_token_collsymbol, | |
65 | _S_token_equiv_class_name, | |
66 | _S_token_opt, | |
67 | _S_token_or, | |
68 | _S_token_closure0, | |
69 | _S_token_closure1, | |
ddf41e9d TS |
70 | _S_token_line_begin, |
71 | _S_token_line_end, | |
72 | _S_token_word_bound, // neg if _M_value[0] == 'n' | |
73 | _S_token_comma, | |
74 | _S_token_dup_count, | |
75 | _S_token_eof, | |
4aebb4e4 TS |
76 | _S_token_bracket_dash, |
77 | _S_token_unknown = -1u | |
ddf41e9d TS |
78 | }; |
79 | ||
80 | protected: | |
81 | typedef regex_constants::syntax_option_type _FlagT; | |
82 | ||
83 | enum _StateT | |
84 | { | |
85 | _S_state_normal, | |
86 | _S_state_in_brace, | |
87 | _S_state_in_bracket, | |
88 | }; | |
89 | ||
90 | protected: | |
91 | _ScannerBase(_FlagT __flags) | |
92 | : _M_state(_S_state_normal), | |
93 | _M_flags(__flags), | |
94 | _M_escape_tbl(_M_is_ecma() | |
95 | ? _M_ecma_escape_tbl | |
96 | : _M_awk_escape_tbl), | |
97 | _M_spec_char(_M_is_ecma() | |
98 | ? _M_ecma_spec_char | |
244901a5 | 99 | : _M_flags & regex_constants::basic |
ddf41e9d | 100 | ? _M_basic_spec_char |
244901a5 TS |
101 | : _M_flags & regex_constants::extended |
102 | ? _M_extended_spec_char | |
103 | : _M_flags & regex_constants::grep | |
104 | ? ".[\\*^$\n" | |
105 | : _M_flags & regex_constants::egrep | |
106 | ? ".[\\()*+?{|^$\n" | |
107 | : _M_flags & regex_constants::awk | |
108 | ? _M_extended_spec_char | |
109 | : nullptr), | |
ddf41e9d | 110 | _M_at_bracket_start(false) |
244901a5 | 111 | { __glibcxx_assert(_M_spec_char); } |
ddf41e9d TS |
112 | |
113 | protected: | |
114 | const char* | |
115 | _M_find_escape(char __c) | |
116 | { | |
117 | auto __it = _M_escape_tbl; | |
118 | for (; __it->first != '\0'; ++__it) | |
119 | if (__it->first == __c) | |
120 | return &__it->second; | |
121 | return nullptr; | |
122 | } | |
123 | ||
124 | bool | |
125 | _M_is_ecma() const | |
126 | { return _M_flags & regex_constants::ECMAScript; } | |
127 | ||
128 | bool | |
129 | _M_is_basic() const | |
130 | { return _M_flags & (regex_constants::basic | regex_constants::grep); } | |
131 | ||
132 | bool | |
133 | _M_is_extended() const | |
134 | { | |
135 | return _M_flags & (regex_constants::extended | |
136 | | regex_constants::egrep | |
137 | | regex_constants::awk); | |
138 | } | |
139 | ||
140 | bool | |
141 | _M_is_grep() const | |
142 | { return _M_flags & (regex_constants::grep | regex_constants::egrep); } | |
143 | ||
144 | bool | |
145 | _M_is_awk() const | |
146 | { return _M_flags & regex_constants::awk; } | |
147 | ||
148 | protected: | |
244901a5 | 149 | // TODO: Make them static in the next abi change. |
ddf41e9d TS |
150 | const std::pair<char, _TokenT> _M_token_tbl[9] = |
151 | { | |
152 | {'^', _S_token_line_begin}, | |
153 | {'$', _S_token_line_end}, | |
154 | {'.', _S_token_anychar}, | |
155 | {'*', _S_token_closure0}, | |
156 | {'+', _S_token_closure1}, | |
157 | {'?', _S_token_opt}, | |
158 | {'|', _S_token_or}, | |
159 | {'\n', _S_token_or}, // grep and egrep | |
160 | {'\0', _S_token_or}, | |
161 | }; | |
162 | const std::pair<char, char> _M_ecma_escape_tbl[8] = | |
163 | { | |
164 | {'0', '\0'}, | |
165 | {'b', '\b'}, | |
166 | {'f', '\f'}, | |
167 | {'n', '\n'}, | |
168 | {'r', '\r'}, | |
169 | {'t', '\t'}, | |
170 | {'v', '\v'}, | |
171 | {'\0', '\0'}, | |
172 | }; | |
173 | const std::pair<char, char> _M_awk_escape_tbl[11] = | |
174 | { | |
175 | {'"', '"'}, | |
176 | {'/', '/'}, | |
177 | {'\\', '\\'}, | |
178 | {'a', '\a'}, | |
179 | {'b', '\b'}, | |
180 | {'f', '\f'}, | |
181 | {'n', '\n'}, | |
182 | {'r', '\r'}, | |
183 | {'t', '\t'}, | |
184 | {'v', '\v'}, | |
185 | {'\0', '\0'}, | |
186 | }; | |
187 | const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|"; | |
188 | const char* _M_basic_spec_char = ".[\\*^$"; | |
189 | const char* _M_extended_spec_char = ".[\\()*+?{|^$"; | |
190 | ||
191 | _StateT _M_state; | |
192 | _FlagT _M_flags; | |
193 | _TokenT _M_token; | |
194 | const std::pair<char, char>* _M_escape_tbl; | |
195 | const char* _M_spec_char; | |
196 | bool _M_at_bracket_start; | |
197 | }; | |
198 | ||
33fbbb76 | 199 | /** |
ee54a3b3 | 200 | * @brief Scans an input range for regex tokens. |
33fbbb76 TS |
201 | * |
202 | * The %_Scanner class interprets the regular expression pattern in | |
203 | * the input range passed to its constructor as a sequence of parse | |
204 | * tokens passed to the regular expression compiler. The sequence | |
205 | * of tokens provided depends on the flag settings passed to the | |
206 | * constructor: different regular expression grammars will interpret | |
207 | * the same input pattern in syntactically different ways. | |
208 | */ | |
ddf41e9d | 209 | template<typename _CharT> |
33fbbb76 | 210 | class _Scanner |
ddf41e9d | 211 | : public _ScannerBase |
33fbbb76 TS |
212 | { |
213 | public: | |
ddf41e9d | 214 | typedef const _CharT* _IterT; |
33fbbb76 TS |
215 | typedef std::basic_string<_CharT> _StringT; |
216 | typedef regex_constants::syntax_option_type _FlagT; | |
217 | typedef const std::ctype<_CharT> _CtypeT; | |
218 | ||
ddf41e9d | 219 | _Scanner(_IterT __begin, _IterT __end, |
33fbbb76 TS |
220 | _FlagT __flags, std::locale __loc); |
221 | ||
222 | void | |
223 | _M_advance(); | |
224 | ||
225 | _TokenT | |
226 | _M_get_token() const | |
227 | { return _M_token; } | |
228 | ||
229 | const _StringT& | |
230 | _M_get_value() const | |
231 | { return _M_value; } | |
232 | ||
233 | #ifdef _GLIBCXX_DEBUG | |
234 | std::ostream& | |
235 | _M_print(std::ostream&); | |
236 | #endif | |
237 | ||
238 | private: | |
33fbbb76 TS |
239 | void |
240 | _M_scan_normal(); | |
241 | ||
242 | void | |
243 | _M_scan_in_bracket(); | |
244 | ||
245 | void | |
246 | _M_scan_in_brace(); | |
247 | ||
248 | void | |
249 | _M_eat_escape_ecma(); | |
250 | ||
251 | void | |
252 | _M_eat_escape_posix(); | |
253 | ||
254 | void | |
255 | _M_eat_escape_awk(); | |
256 | ||
257 | void | |
258 | _M_eat_class(char); | |
259 | ||
ddf41e9d TS |
260 | _IterT _M_current; |
261 | _IterT _M_end; | |
33fbbb76 | 262 | _CtypeT& _M_ctype; |
33fbbb76 | 263 | _StringT _M_value; |
33fbbb76 TS |
264 | void (_Scanner::* _M_eat_escape)(); |
265 | }; | |
266 | ||
f0b88346 | 267 | ///@} regex-detail |
33fbbb76 | 268 | } // namespace __detail |
4a15d842 | 269 | _GLIBCXX_END_NAMESPACE_VERSION |
33fbbb76 TS |
270 | } // namespace std |
271 | ||
272 | #include <bits/regex_scanner.tcc> |