1 // class template regex -*- C++ -*-
3 // Copyright (C) 2010-2021 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std
_GLIBCXX_VISIBILITY(default)
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 _GLIBCXX_BEGIN_NAMESPACE_CXX11
39 _GLIBCXX_END_NAMESPACE_CXX11
44 * @addtogroup regex-detail
48 template<typename
, bool, bool>
49 struct _BracketMatcher
;
52 * @brief Builds an NFA from an input iterator range.
54 * The %_TraitsT type should fulfill requirements [28.3].
56 template<typename _TraitsT
>
60 typedef typename
_TraitsT::char_type _CharT
;
61 typedef _NFA
<_TraitsT
> _RegexT
;
62 typedef regex_constants::syntax_option_type _FlagT
;
64 _Compiler(const _CharT
* __b
, const _CharT
* __e
,
65 const typename
_TraitsT::locale_type
& __traits
, _FlagT __flags
);
67 shared_ptr
<const _RegexT
>
69 { return std::move(_M_nfa
); }
72 typedef _Scanner
<_CharT
> _ScannerT
;
73 typedef typename
_TraitsT::string_type _StringT
;
74 typedef typename
_ScannerT::_TokenT _TokenT
;
75 typedef _StateSeq
<_TraitsT
> _StateSeqT
;
76 typedef std::stack
<_StateSeqT
> _StackT
;
77 typedef std::ctype
<_CharT
> _CtypeT
;
79 // accepts a specific token or returns false.
81 _M_match_token(_TokenT __token
);
102 _M_bracket_expression();
104 template<bool __icase
, bool __collate
>
106 _M_insert_any_matcher_ecma();
108 template<bool __icase
, bool __collate
>
110 _M_insert_any_matcher_posix();
112 template<bool __icase
, bool __collate
>
114 _M_insert_char_matcher();
116 template<bool __icase
, bool __collate
>
118 _M_insert_character_class_matcher();
120 template<bool __icase
, bool __collate
>
122 _M_insert_bracket_matcher(bool __neg
);
124 // Returns true if successfully matched one term and should continue.
125 // Returns false if the compiler should move on.
126 template<bool __icase
, bool __collate
>
128 _M_expression_term(pair
<bool, _CharT
>& __last_char
,
129 _BracketMatcher
<_TraitsT
, __icase
, __collate
>&
133 _M_cur_int_value(int __radix
);
141 auto ret
= _M_stack
.top();
147 _S_validate(_FlagT __f
)
149 using namespace regex_constants
;
150 switch (__f
& (ECMAScript
|basic
|extended
|awk
|grep
|egrep
))
160 return __f
| ECMAScript
;
162 std::__throw_regex_error(_S_grammar
, "conflicting grammar options");
167 _ScannerT _M_scanner
;
168 shared_ptr
<_RegexT
> _M_nfa
;
171 const _TraitsT
& _M_traits
;
172 const _CtypeT
& _M_ctype
;
176 template<typename _TraitsT
, bool __icase
, bool __collate
>
177 class _RegexTranslatorBase
180 typedef typename
_TraitsT::char_type _CharT
;
181 typedef typename
_TraitsT::string_type _StringT
;
182 typedef _StringT _StrTransT
;
185 _RegexTranslatorBase(const _TraitsT
& __traits
)
186 : _M_traits(__traits
)
190 _M_translate(_CharT __ch
) const
193 return _M_traits
.translate_nocase(__ch
);
195 return _M_traits
.translate(__ch
);
201 _M_transform(_CharT __ch
) const
203 _StrTransT
__str(1, __ch
);
204 return _M_traits
.transform(__str
.begin(), __str
.end());
207 // See LWG 523. It's not efficiently implementable when _TraitsT is not
208 // std::regex_traits<>, and __collate is true. See specializations for
209 // implementations of other cases.
211 _M_match_range(const _StrTransT
& __first
, const _StrTransT
& __last
,
212 const _StrTransT
& __s
) const
213 { return __first
<= __s
&& __s
<= __last
; }
216 bool _M_in_range_icase(_CharT __first
, _CharT __last
, _CharT __ch
) const
218 typedef std::ctype
<_CharT
> __ctype_type
;
219 const auto& __fctyp
= use_facet
<__ctype_type
>(this->_M_traits
.getloc());
220 auto __lower
= __fctyp
.tolower(__ch
);
221 auto __upper
= __fctyp
.toupper(__ch
);
222 return (__first
<= __lower
&& __lower
<= __last
)
223 || (__first
<= __upper
&& __upper
<= __last
);
226 const _TraitsT
& _M_traits
;
229 template<typename _TraitsT
, bool __icase
, bool __collate
>
230 class _RegexTranslator
231 : public _RegexTranslatorBase
<_TraitsT
, __icase
, __collate
>
234 typedef _RegexTranslatorBase
<_TraitsT
, __icase
, __collate
> _Base
;
238 template<typename _TraitsT
, bool __icase
>
239 class _RegexTranslator
<_TraitsT
, __icase
, false>
240 : public _RegexTranslatorBase
<_TraitsT
, __icase
, false>
243 typedef _RegexTranslatorBase
<_TraitsT
, __icase
, false> _Base
;
244 typedef typename
_Base::_CharT _CharT
;
245 typedef _CharT _StrTransT
;
250 _M_transform(_CharT __ch
) const
254 _M_match_range(_CharT __first
, _CharT __last
, _CharT __ch
) const
257 return __first
<= __ch
&& __ch
<= __last
;
258 return this->_M_in_range_icase(__first
, __last
, __ch
);
262 template<typename _CharType
>
263 class _RegexTranslator
<std::regex_traits
<_CharType
>, true, true>
264 : public _RegexTranslatorBase
<std::regex_traits
<_CharType
>, true, true>
267 typedef _RegexTranslatorBase
<std::regex_traits
<_CharType
>, true, true>
269 typedef typename
_Base::_CharT _CharT
;
270 typedef typename
_Base::_StrTransT _StrTransT
;
275 _M_match_range(const _StrTransT
& __first
, const _StrTransT
& __last
,
276 const _StrTransT
& __str
) const
278 __glibcxx_assert(__first
.size() == 1);
279 __glibcxx_assert(__last
.size() == 1);
280 __glibcxx_assert(__str
.size() == 1);
281 return this->_M_in_range_icase(__first
[0], __last
[0], __str
[0]);
285 template<typename _TraitsT
>
286 class _RegexTranslator
<_TraitsT
, false, false>
289 typedef typename
_TraitsT::char_type _CharT
;
290 typedef _CharT _StrTransT
;
293 _RegexTranslator(const _TraitsT
&)
297 _M_translate(_CharT __ch
) const
301 _M_transform(_CharT __ch
) const
305 _M_match_range(_CharT __first
, _CharT __last
, _CharT __ch
) const
306 { return __first
<= __ch
&& __ch
<= __last
; }
309 template<typename _TraitsT
, bool __is_ecma
, bool __icase
, bool __collate
>
312 template<typename _TraitsT
, bool __icase
, bool __collate
>
313 struct _AnyMatcher
<_TraitsT
, false, __icase
, __collate
>
315 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
316 typedef typename
_TransT::_CharT _CharT
;
319 _AnyMatcher(const _TraitsT
& __traits
)
320 : _M_translator(__traits
)
324 operator()(_CharT __ch
) const
326 static auto __nul
= _M_translator
._M_translate('\0');
327 return _M_translator
._M_translate(__ch
) != __nul
;
330 _TransT _M_translator
;
333 template<typename _TraitsT
, bool __icase
, bool __collate
>
334 struct _AnyMatcher
<_TraitsT
, true, __icase
, __collate
>
336 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
337 typedef typename
_TransT::_CharT _CharT
;
340 _AnyMatcher(const _TraitsT
& __traits
)
341 : _M_translator(__traits
)
345 operator()(_CharT __ch
) const
346 { return _M_apply(__ch
, typename is_same
<_CharT
, char>::type()); }
349 _M_apply(_CharT __ch
, true_type
) const
351 auto __c
= _M_translator
._M_translate(__ch
);
352 auto __n
= _M_translator
._M_translate('\n');
353 auto __r
= _M_translator
._M_translate('\r');
354 return __c
!= __n
&& __c
!= __r
;
358 _M_apply(_CharT __ch
, false_type
) const
360 auto __c
= _M_translator
._M_translate(__ch
);
361 auto __n
= _M_translator
._M_translate('\n');
362 auto __r
= _M_translator
._M_translate('\r');
363 auto __u2028
= _M_translator
._M_translate(u
'\u2028');
364 auto __u2029
= _M_translator
._M_translate(u
'\u2029');
365 return __c
!= __n
&& __c
!= __r
&& __c
!= __u2028
&& __c
!= __u2029
;
368 _TransT _M_translator
;
371 template<typename _TraitsT
, bool __icase
, bool __collate
>
374 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
375 typedef typename
_TransT::_CharT _CharT
;
377 _CharMatcher(_CharT __ch
, const _TraitsT
& __traits
)
378 : _M_translator(__traits
), _M_ch(_M_translator
._M_translate(__ch
))
382 operator()(_CharT __ch
) const
383 { return _M_ch
== _M_translator
._M_translate(__ch
); }
385 _TransT _M_translator
;
389 /// Matches a character range (bracket expression)
390 template<typename _TraitsT
, bool __icase
, bool __collate
>
391 struct _BracketMatcher
394 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
395 typedef typename
_TransT::_CharT _CharT
;
396 typedef typename
_TransT::_StrTransT _StrTransT
;
397 typedef typename
_TraitsT::string_type _StringT
;
398 typedef typename
_TraitsT::char_class_type _CharClassT
;
401 _BracketMatcher(bool __is_non_matching
,
402 const _TraitsT
& __traits
)
403 : _M_class_set(0), _M_translator(__traits
), _M_traits(__traits
),
404 _M_is_non_matching(__is_non_matching
)
408 operator()(_CharT __ch
) const
410 _GLIBCXX_DEBUG_ASSERT(_M_is_ready
);
411 return _M_apply(__ch
, _UseCache());
415 _M_add_char(_CharT __c
)
417 _M_char_set
.push_back(_M_translator
._M_translate(__c
));
418 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
422 _M_add_collate_element(const _StringT
& __s
)
424 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
425 __s
.data() + __s
.size());
427 __throw_regex_error(regex_constants::error_collate
,
428 "Invalid collate element.");
429 _M_char_set
.push_back(_M_translator
._M_translate(__st
[0]));
430 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
435 _M_add_equivalence_class(const _StringT
& __s
)
437 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
438 __s
.data() + __s
.size());
440 __throw_regex_error(regex_constants::error_collate
,
441 "Invalid equivalence class.");
442 __st
= _M_traits
.transform_primary(__st
.data(),
443 __st
.data() + __st
.size());
444 _M_equiv_set
.push_back(__st
);
445 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
448 // __neg should be true for \D, \S and \W only.
450 _M_add_character_class(const _StringT
& __s
, bool __neg
)
452 auto __mask
= _M_traits
.lookup_classname(__s
.data(),
453 __s
.data() + __s
.size(),
456 __throw_regex_error(regex_constants::error_collate
,
457 "Invalid character class.");
459 _M_class_set
|= __mask
;
461 _M_neg_class_set
.push_back(__mask
);
462 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
466 _M_make_range(_CharT __l
, _CharT __r
)
469 __throw_regex_error(regex_constants::error_range
,
470 "Invalid range in bracket expression.");
471 _M_range_set
.push_back(make_pair(_M_translator
._M_transform(__l
),
472 _M_translator
._M_transform(__r
)));
473 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
479 std::sort(_M_char_set
.begin(), _M_char_set
.end());
480 auto __end
= std::unique(_M_char_set
.begin(), _M_char_set
.end());
481 _M_char_set
.erase(__end
, _M_char_set
.end());
482 _M_make_cache(_UseCache());
483 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= true);
487 // Currently we only use the cache for char
488 using _UseCache
= typename
std::is_same
<_CharT
, char>::type
;
490 static constexpr size_t
492 1ul << (sizeof(_CharT
) * __CHAR_BIT__
* int(_UseCache::value
));
495 using _CacheT
= std::__conditional_t
<_UseCache::value
,
496 std::bitset
<_S_cache_size
>,
498 using _UnsignedCharT
= typename
std::make_unsigned
<_CharT
>::type
;
501 _M_apply(_CharT __ch
, false_type
) const;
504 _M_apply(_CharT __ch
, true_type
) const
505 { return _M_cache
[static_cast<_UnsignedCharT
>(__ch
)]; }
508 _M_make_cache(true_type
)
510 for (unsigned __i
= 0; __i
< _M_cache
.size(); __i
++)
511 _M_cache
[__i
] = _M_apply(static_cast<_CharT
>(__i
), false_type());
515 _M_make_cache(false_type
)
519 _GLIBCXX_STD_C::vector
<_CharT
> _M_char_set
;
520 _GLIBCXX_STD_C::vector
<_StringT
> _M_equiv_set
;
521 _GLIBCXX_STD_C::vector
<pair
<_StrTransT
, _StrTransT
>> _M_range_set
;
522 _GLIBCXX_STD_C::vector
<_CharClassT
> _M_neg_class_set
;
523 _CharClassT _M_class_set
;
524 _TransT _M_translator
;
525 const _TraitsT
& _M_traits
;
526 bool _M_is_non_matching
;
528 #ifdef _GLIBCXX_DEBUG
529 bool _M_is_ready
= false;
534 } // namespace __detail
535 _GLIBCXX_END_NAMESPACE_VERSION
538 #include <bits/regex_compiler.tcc>