1 // Locale support (codecvt) -*- C++ -*-
3 // Copyright (C) 2000-2020 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file bits/codecvt.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{locale}
31 // ISO C++ 14882: 22.2.1.5 Template class codecvt
34 // Written by Benjamin Kosnik <bkoz@redhat.com>
39 #pragma GCC system_header
41 namespace std
_GLIBCXX_VISIBILITY(default)
43 _GLIBCXX_BEGIN_NAMESPACE_VERSION
45 /// Empty base class for codecvt facet [22.2.1.5].
59 * @brief Common base for codecvt functions.
61 * This template class provides implementations of the public functions
62 * that forward to the protected virtual functions.
64 * This template also provides abstract stubs for the protected virtual
67 template<typename _InternT
, typename _ExternT
, typename _StateT
>
68 class __codecvt_abstract_base
69 : public locale::facet
, public codecvt_base
73 typedef codecvt_base::result result
;
74 typedef _InternT intern_type
;
75 typedef _ExternT extern_type
;
76 typedef _StateT state_type
;
78 // 22.2.1.5.1 codecvt members
80 * @brief Convert from internal to external character set.
82 * Converts input string of intern_type to output string of
83 * extern_type. This is analogous to wcsrtombs. It does this by
84 * calling codecvt::do_out.
86 * The source and destination character sets are determined by the
87 * facet's locale, internal and external types.
89 * The characters in [from,from_end) are converted and written to
90 * [to,to_end). from_next and to_next are set to point to the
91 * character following the last successfully converted character,
92 * respectively. If the result needed no conversion, from_next and
93 * to_next are not affected.
95 * The @a state argument should be initialized if the input is at the
96 * beginning and carried from a previous call if continuing
97 * conversion. There are no guarantees about how @a state is used.
99 * The result returned is a member of codecvt_base::result. If
100 * all the input is converted, returns codecvt_base::ok. If no
101 * conversion is necessary, returns codecvt_base::noconv. If
102 * the input ends early or there is insufficient space in the
103 * output, returns codecvt_base::partial. Otherwise the
104 * conversion failed and codecvt_base::error is returned.
106 * @param __state Persistent conversion state data.
107 * @param __from Start of input.
108 * @param __from_end End of input.
109 * @param __from_next Returns start of unconverted data.
110 * @param __to Start of output buffer.
111 * @param __to_end End of output buffer.
112 * @param __to_next Returns start of unused output area.
113 * @return codecvt_base::result.
116 out(state_type
& __state
, const intern_type
* __from
,
117 const intern_type
* __from_end
, const intern_type
*& __from_next
,
118 extern_type
* __to
, extern_type
* __to_end
,
119 extern_type
*& __to_next
) const
121 return this->do_out(__state
, __from
, __from_end
, __from_next
,
122 __to
, __to_end
, __to_next
);
126 * @brief Reset conversion state.
128 * Writes characters to output that would restore @a state to initial
129 * conditions. The idea is that if a partial conversion occurs, then
130 * the converting the characters written by this function would leave
131 * the state in initial conditions, rather than partial conversion
132 * state. It does this by calling codecvt::do_unshift().
134 * For example, if 4 external characters always converted to 1 internal
135 * character, and input to in() had 6 external characters with state
136 * saved, this function would write two characters to the output and
137 * set the state to initialized conditions.
139 * The source and destination character sets are determined by the
140 * facet's locale, internal and external types.
142 * The result returned is a member of codecvt_base::result. If the
143 * state could be reset and data written, returns codecvt_base::ok. If
144 * no conversion is necessary, returns codecvt_base::noconv. If the
145 * output has insufficient space, returns codecvt_base::partial.
146 * Otherwise the reset failed and codecvt_base::error is returned.
148 * @param __state Persistent conversion state data.
149 * @param __to Start of output buffer.
150 * @param __to_end End of output buffer.
151 * @param __to_next Returns start of unused output area.
152 * @return codecvt_base::result.
155 unshift(state_type
& __state
, extern_type
* __to
, extern_type
* __to_end
,
156 extern_type
*& __to_next
) const
157 { return this->do_unshift(__state
, __to
,__to_end
,__to_next
); }
160 * @brief Convert from external to internal character set.
162 * Converts input string of extern_type to output string of
163 * intern_type. This is analogous to mbsrtowcs. It does this by
164 * calling codecvt::do_in.
166 * The source and destination character sets are determined by the
167 * facet's locale, internal and external types.
169 * The characters in [from,from_end) are converted and written to
170 * [to,to_end). from_next and to_next are set to point to the
171 * character following the last successfully converted character,
172 * respectively. If the result needed no conversion, from_next and
173 * to_next are not affected.
175 * The @a state argument should be initialized if the input is at the
176 * beginning and carried from a previous call if continuing
177 * conversion. There are no guarantees about how @a state is used.
179 * The result returned is a member of codecvt_base::result. If
180 * all the input is converted, returns codecvt_base::ok. If no
181 * conversion is necessary, returns codecvt_base::noconv. If
182 * the input ends early or there is insufficient space in the
183 * output, returns codecvt_base::partial. Otherwise the
184 * conversion failed and codecvt_base::error is returned.
186 * @param __state Persistent conversion state data.
187 * @param __from Start of input.
188 * @param __from_end End of input.
189 * @param __from_next Returns start of unconverted data.
190 * @param __to Start of output buffer.
191 * @param __to_end End of output buffer.
192 * @param __to_next Returns start of unused output area.
193 * @return codecvt_base::result.
196 in(state_type
& __state
, const extern_type
* __from
,
197 const extern_type
* __from_end
, const extern_type
*& __from_next
,
198 intern_type
* __to
, intern_type
* __to_end
,
199 intern_type
*& __to_next
) const
201 return this->do_in(__state
, __from
, __from_end
, __from_next
,
202 __to
, __to_end
, __to_next
);
206 encoding() const throw()
207 { return this->do_encoding(); }
210 always_noconv() const throw()
211 { return this->do_always_noconv(); }
214 length(state_type
& __state
, const extern_type
* __from
,
215 const extern_type
* __end
, size_t __max
) const
216 { return this->do_length(__state
, __from
, __end
, __max
); }
219 max_length() const throw()
220 { return this->do_max_length(); }
224 __codecvt_abstract_base(size_t __refs
= 0) : locale::facet(__refs
) { }
227 ~__codecvt_abstract_base() { }
230 * @brief Convert from internal to external character set.
232 * Converts input string of intern_type to output string of
233 * extern_type. This function is a hook for derived classes to change
234 * the value returned. @see out for more information.
237 do_out(state_type
& __state
, const intern_type
* __from
,
238 const intern_type
* __from_end
, const intern_type
*& __from_next
,
239 extern_type
* __to
, extern_type
* __to_end
,
240 extern_type
*& __to_next
) const = 0;
243 do_unshift(state_type
& __state
, extern_type
* __to
,
244 extern_type
* __to_end
, extern_type
*& __to_next
) const = 0;
247 do_in(state_type
& __state
, const extern_type
* __from
,
248 const extern_type
* __from_end
, const extern_type
*& __from_next
,
249 intern_type
* __to
, intern_type
* __to_end
,
250 intern_type
*& __to_next
) const = 0;
253 do_encoding() const throw() = 0;
256 do_always_noconv() const throw() = 0;
259 do_length(state_type
&, const extern_type
* __from
,
260 const extern_type
* __end
, size_t __max
) const = 0;
263 do_max_length() const throw() = 0;
267 * @brief Primary class template codecvt.
270 * NB: Generic, mostly useless implementation.
273 template<typename _InternT
, typename _ExternT
, typename _StateT
>
275 : public __codecvt_abstract_base
<_InternT
, _ExternT
, _StateT
>
279 typedef codecvt_base::result result
;
280 typedef _InternT intern_type
;
281 typedef _ExternT extern_type
;
282 typedef _StateT state_type
;
285 __c_locale _M_c_locale_codecvt
;
288 static locale::id id
;
291 codecvt(size_t __refs
= 0)
292 : __codecvt_abstract_base
<_InternT
, _ExternT
, _StateT
> (__refs
),
293 _M_c_locale_codecvt(0)
297 codecvt(__c_locale __cloc
, size_t __refs
= 0);
304 do_out(state_type
& __state
, const intern_type
* __from
,
305 const intern_type
* __from_end
, const intern_type
*& __from_next
,
306 extern_type
* __to
, extern_type
* __to_end
,
307 extern_type
*& __to_next
) const;
310 do_unshift(state_type
& __state
, extern_type
* __to
,
311 extern_type
* __to_end
, extern_type
*& __to_next
) const;
314 do_in(state_type
& __state
, const extern_type
* __from
,
315 const extern_type
* __from_end
, const extern_type
*& __from_next
,
316 intern_type
* __to
, intern_type
* __to_end
,
317 intern_type
*& __to_next
) const;
320 do_encoding() const throw();
323 do_always_noconv() const throw();
326 do_length(state_type
&, const extern_type
* __from
,
327 const extern_type
* __end
, size_t __max
) const;
330 do_max_length() const throw();
333 template<typename _InternT
, typename _ExternT
, typename _StateT
>
334 locale::id codecvt
<_InternT
, _ExternT
, _StateT
>::id
;
336 /// class codecvt<char, char, mbstate_t> specialization.
338 class codecvt
<char, char, mbstate_t>
339 : public __codecvt_abstract_base
<char, char, mbstate_t>
341 friend class messages
<char>;
345 typedef char intern_type
;
346 typedef char extern_type
;
347 typedef mbstate_t state_type
;
350 __c_locale _M_c_locale_codecvt
;
353 static locale::id id
;
356 codecvt(size_t __refs
= 0);
359 codecvt(__c_locale __cloc
, size_t __refs
= 0);
366 do_out(state_type
& __state
, const intern_type
* __from
,
367 const intern_type
* __from_end
, const intern_type
*& __from_next
,
368 extern_type
* __to
, extern_type
* __to_end
,
369 extern_type
*& __to_next
) const;
372 do_unshift(state_type
& __state
, extern_type
* __to
,
373 extern_type
* __to_end
, extern_type
*& __to_next
) const;
376 do_in(state_type
& __state
, const extern_type
* __from
,
377 const extern_type
* __from_end
, const extern_type
*& __from_next
,
378 intern_type
* __to
, intern_type
* __to_end
,
379 intern_type
*& __to_next
) const;
382 do_encoding() const throw();
385 do_always_noconv() const throw();
388 do_length(state_type
&, const extern_type
* __from
,
389 const extern_type
* __end
, size_t __max
) const;
392 do_max_length() const throw();
395 #ifdef _GLIBCXX_USE_WCHAR_T
396 /** @brief Class codecvt<wchar_t, char, mbstate_t> specialization.
398 * Converts between narrow and wide characters in the native character set
401 class codecvt
<wchar_t, char, mbstate_t>
402 : public __codecvt_abstract_base
<wchar_t, char, mbstate_t>
404 friend class messages
<wchar_t>;
408 typedef wchar_t intern_type
;
409 typedef char extern_type
;
410 typedef mbstate_t state_type
;
413 __c_locale _M_c_locale_codecvt
;
416 static locale::id id
;
419 codecvt(size_t __refs
= 0);
422 codecvt(__c_locale __cloc
, size_t __refs
= 0);
429 do_out(state_type
& __state
, const intern_type
* __from
,
430 const intern_type
* __from_end
, const intern_type
*& __from_next
,
431 extern_type
* __to
, extern_type
* __to_end
,
432 extern_type
*& __to_next
) const;
435 do_unshift(state_type
& __state
,
436 extern_type
* __to
, extern_type
* __to_end
,
437 extern_type
*& __to_next
) const;
440 do_in(state_type
& __state
,
441 const extern_type
* __from
, const extern_type
* __from_end
,
442 const extern_type
*& __from_next
,
443 intern_type
* __to
, intern_type
* __to_end
,
444 intern_type
*& __to_next
) const;
447 int do_encoding() const throw();
450 bool do_always_noconv() const throw();
453 int do_length(state_type
&, const extern_type
* __from
,
454 const extern_type
* __end
, size_t __max
) const;
457 do_max_length() const throw();
459 #endif //_GLIBCXX_USE_WCHAR_T
461 #if __cplusplus >= 201103L
462 /** @brief Class codecvt<char16_t, char, mbstate_t> specialization.
464 * Converts between UTF-16 and UTF-8.
467 class codecvt
<char16_t
, char, mbstate_t>
468 : public __codecvt_abstract_base
<char16_t
, char, mbstate_t>
472 typedef char16_t intern_type
;
473 typedef char extern_type
;
474 typedef mbstate_t state_type
;
477 static locale::id id
;
480 codecvt(size_t __refs
= 0)
481 : __codecvt_abstract_base
<char16_t
, char, mbstate_t>(__refs
) { }
488 do_out(state_type
& __state
, const intern_type
* __from
,
489 const intern_type
* __from_end
, const intern_type
*& __from_next
,
490 extern_type
* __to
, extern_type
* __to_end
,
491 extern_type
*& __to_next
) const;
494 do_unshift(state_type
& __state
,
495 extern_type
* __to
, extern_type
* __to_end
,
496 extern_type
*& __to_next
) const;
499 do_in(state_type
& __state
,
500 const extern_type
* __from
, const extern_type
* __from_end
,
501 const extern_type
*& __from_next
,
502 intern_type
* __to
, intern_type
* __to_end
,
503 intern_type
*& __to_next
) const;
506 int do_encoding() const throw();
509 bool do_always_noconv() const throw();
512 int do_length(state_type
&, const extern_type
* __from
,
513 const extern_type
* __end
, size_t __max
) const;
516 do_max_length() const throw();
519 /** @brief Class codecvt<char32_t, char, mbstate_t> specialization.
521 * Converts between UTF-32 and UTF-8.
524 class codecvt
<char32_t
, char, mbstate_t>
525 : public __codecvt_abstract_base
<char32_t
, char, mbstate_t>
529 typedef char32_t intern_type
;
530 typedef char extern_type
;
531 typedef mbstate_t state_type
;
534 static locale::id id
;
537 codecvt(size_t __refs
= 0)
538 : __codecvt_abstract_base
<char32_t
, char, mbstate_t>(__refs
) { }
545 do_out(state_type
& __state
, const intern_type
* __from
,
546 const intern_type
* __from_end
, const intern_type
*& __from_next
,
547 extern_type
* __to
, extern_type
* __to_end
,
548 extern_type
*& __to_next
) const;
551 do_unshift(state_type
& __state
,
552 extern_type
* __to
, extern_type
* __to_end
,
553 extern_type
*& __to_next
) const;
556 do_in(state_type
& __state
,
557 const extern_type
* __from
, const extern_type
* __from_end
,
558 const extern_type
*& __from_next
,
559 intern_type
* __to
, intern_type
* __to_end
,
560 intern_type
*& __to_next
) const;
563 int do_encoding() const throw();
566 bool do_always_noconv() const throw();
569 int do_length(state_type
&, const extern_type
* __from
,
570 const extern_type
* __end
, size_t __max
) const;
573 do_max_length() const throw();
576 #ifdef _GLIBCXX_USE_CHAR8_T
577 /** @brief Class codecvt<char16_t, char8_t, mbstate_t> specialization.
579 * Converts between UTF-16 and UTF-8.
582 class codecvt
<char16_t
, char8_t
, mbstate_t>
583 : public __codecvt_abstract_base
<char16_t
, char8_t
, mbstate_t>
587 typedef char16_t intern_type
;
588 typedef char8_t extern_type
;
589 typedef mbstate_t state_type
;
592 static locale::id id
;
595 codecvt(size_t __refs
= 0)
596 : __codecvt_abstract_base
<char16_t
, char8_t
, mbstate_t>(__refs
) { }
603 do_out(state_type
& __state
, const intern_type
* __from
,
604 const intern_type
* __from_end
, const intern_type
*& __from_next
,
605 extern_type
* __to
, extern_type
* __to_end
,
606 extern_type
*& __to_next
) const;
609 do_unshift(state_type
& __state
,
610 extern_type
* __to
, extern_type
* __to_end
,
611 extern_type
*& __to_next
) const;
614 do_in(state_type
& __state
,
615 const extern_type
* __from
, const extern_type
* __from_end
,
616 const extern_type
*& __from_next
,
617 intern_type
* __to
, intern_type
* __to_end
,
618 intern_type
*& __to_next
) const;
621 int do_encoding() const throw();
624 bool do_always_noconv() const throw();
627 int do_length(state_type
&, const extern_type
* __from
,
628 const extern_type
* __end
, size_t __max
) const;
631 do_max_length() const throw();
634 /** @brief Class codecvt<char32_t, char8_t, mbstate_t> specialization.
636 * Converts between UTF-32 and UTF-8.
639 class codecvt
<char32_t
, char8_t
, mbstate_t>
640 : public __codecvt_abstract_base
<char32_t
, char8_t
, mbstate_t>
644 typedef char32_t intern_type
;
645 typedef char8_t extern_type
;
646 typedef mbstate_t state_type
;
649 static locale::id id
;
652 codecvt(size_t __refs
= 0)
653 : __codecvt_abstract_base
<char32_t
, char8_t
, mbstate_t>(__refs
) { }
660 do_out(state_type
& __state
, const intern_type
* __from
,
661 const intern_type
* __from_end
, const intern_type
*& __from_next
,
662 extern_type
* __to
, extern_type
* __to_end
,
663 extern_type
*& __to_next
) const;
666 do_unshift(state_type
& __state
,
667 extern_type
* __to
, extern_type
* __to_end
,
668 extern_type
*& __to_next
) const;
671 do_in(state_type
& __state
,
672 const extern_type
* __from
, const extern_type
* __from_end
,
673 const extern_type
*& __from_next
,
674 intern_type
* __to
, intern_type
* __to_end
,
675 intern_type
*& __to_next
) const;
678 int do_encoding() const throw();
681 bool do_always_noconv() const throw();
684 int do_length(state_type
&, const extern_type
* __from
,
685 const extern_type
* __end
, size_t __max
) const;
688 do_max_length() const throw();
690 #endif // _GLIBCXX_USE_CHAR8_T
694 /// class codecvt_byname [22.2.1.6].
695 template<typename _InternT
, typename _ExternT
, typename _StateT
>
696 class codecvt_byname
: public codecvt
<_InternT
, _ExternT
, _StateT
>
700 codecvt_byname(const char* __s
, size_t __refs
= 0)
701 : codecvt
<_InternT
, _ExternT
, _StateT
>(__refs
)
703 if (__builtin_strcmp(__s
, "C") != 0
704 && __builtin_strcmp(__s
, "POSIX") != 0)
706 this->_S_destroy_c_locale(this->_M_c_locale_codecvt
);
707 this->_S_create_c_locale(this->_M_c_locale_codecvt
, __s
);
711 #if __cplusplus >= 201103L
713 codecvt_byname(const string
& __s
, size_t __refs
= 0)
714 : codecvt_byname(__s
.c_str(), __refs
) { }
719 ~codecvt_byname() { }
722 #if __cplusplus >= 201103L
724 class codecvt_byname
<char16_t
, char, mbstate_t>
725 : public codecvt
<char16_t
, char, mbstate_t>
729 codecvt_byname(const char*, size_t __refs
= 0)
730 : codecvt
<char16_t
, char, mbstate_t>(__refs
) { }
733 codecvt_byname(const string
& __s
, size_t __refs
= 0)
734 : codecvt_byname(__s
.c_str(), __refs
) { }
738 ~codecvt_byname() { }
742 class codecvt_byname
<char32_t
, char, mbstate_t>
743 : public codecvt
<char32_t
, char, mbstate_t>
747 codecvt_byname(const char*, size_t __refs
= 0)
748 : codecvt
<char32_t
, char, mbstate_t>(__refs
) { }
751 codecvt_byname(const string
& __s
, size_t __refs
= 0)
752 : codecvt_byname(__s
.c_str(), __refs
) { }
756 ~codecvt_byname() { }
759 #if defined(_GLIBCXX_USE_CHAR8_T)
761 class codecvt_byname
<char16_t
, char8_t
, mbstate_t>
762 : public codecvt
<char16_t
, char8_t
, mbstate_t>
766 codecvt_byname(const char* __s
, size_t __refs
= 0)
767 : codecvt
<char16_t
, char8_t
, mbstate_t>(__refs
) { }
770 codecvt_byname(const string
& __s
, size_t __refs
= 0)
771 : codecvt_byname(__s
.c_str(), __refs
) { }
775 ~codecvt_byname() { }
779 class codecvt_byname
<char32_t
, char8_t
, mbstate_t>
780 : public codecvt
<char32_t
, char8_t
, mbstate_t>
784 codecvt_byname(const char* __s
, size_t __refs
= 0)
785 : codecvt
<char32_t
, char8_t
, mbstate_t>(__refs
) { }
788 codecvt_byname(const string
& __s
, size_t __refs
= 0)
789 : codecvt_byname(__s
.c_str(), __refs
) { }
793 ~codecvt_byname() { }
799 // Inhibit implicit instantiations for required instantiations,
800 // which are defined via explicit instantiations elsewhere.
801 #if _GLIBCXX_EXTERN_TEMPLATE
802 extern template class codecvt_byname
<char, char, mbstate_t>;
805 const codecvt
<char, char, mbstate_t>&
806 use_facet
<codecvt
<char, char, mbstate_t> >(const locale
&);
810 has_facet
<codecvt
<char, char, mbstate_t> >(const locale
&);
812 #ifdef _GLIBCXX_USE_WCHAR_T
813 extern template class codecvt_byname
<wchar_t, char, mbstate_t>;
816 const codecvt
<wchar_t, char, mbstate_t>&
817 use_facet
<codecvt
<wchar_t, char, mbstate_t> >(const locale
&);
821 has_facet
<codecvt
<wchar_t, char, mbstate_t> >(const locale
&);
824 #if __cplusplus >= 201103L
825 extern template class codecvt_byname
<char16_t
, char, mbstate_t>;
826 extern template class codecvt_byname
<char32_t
, char, mbstate_t>;
828 #if defined(_GLIBCXX_USE_CHAR8_T)
829 extern template class codecvt_byname
<char16_t
, char8_t
, mbstate_t>;
830 extern template class codecvt_byname
<char32_t
, char8_t
, mbstate_t>;
837 _GLIBCXX_END_NAMESPACE_VERSION