1 // std::codecvt implementation details, GNU version -*- C++ -*-
3 // Copyright (C) 2002-2023 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt
29 // Written by Benjamin Kosnik <bkoz@redhat.com>
32 #include <cstdlib> // For MB_CUR_MAX
33 #include <climits> // For MB_LEN_MAX
34 #include <bits/c++locale_internal.h>
36 namespace std
_GLIBCXX_VISIBILITY(default)
38 _GLIBCXX_BEGIN_NAMESPACE_VERSION
41 #ifdef _GLIBCXX_USE_WCHAR_T
43 codecvt
<wchar_t, char, mbstate_t>::
44 do_out(state_type
& __state
, const intern_type
* __from
,
45 const intern_type
* __from_end
, const intern_type
*& __from_next
,
46 extern_type
* __to
, extern_type
* __to_end
,
47 extern_type
*& __to_next
) const
50 state_type
__tmp_state(__state
);
52 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
53 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
56 // wcsnrtombs is *very* fast but stops if encounters NUL characters:
57 // in case we fall back to wcrtomb and then continue, in a loop.
58 // NB: wcsnrtombs is a GNU extension
59 for (__from_next
= __from
, __to_next
= __to
;
60 __from_next
< __from_end
&& __to_next
< __to_end
63 const intern_type
* __from_chunk_end
= wmemchr(__from_next
, L
'\0',
64 __from_end
- __from_next
);
65 if (!__from_chunk_end
)
66 __from_chunk_end
= __from_end
;
69 const size_t __conv
= wcsnrtombs(__to_next
, &__from_next
,
70 __from_chunk_end
- __from_next
,
71 __to_end
- __to_next
, &__state
);
72 if (__conv
== static_cast<size_t>(-1))
74 // In case of error, in order to stop at the exact place we
75 // have to start again from the beginning with a series of
77 for (; __from
< __from_next
; ++__from
)
78 __to_next
+= wcrtomb(__to_next
, *__from
, &__tmp_state
);
79 __state
= __tmp_state
;
82 else if (__from_next
&& __from_next
< __from_chunk_end
)
89 __from_next
= __from_chunk_end
;
93 if (__from_next
< __from_end
&& __ret
== ok
)
95 extern_type __buf
[MB_LEN_MAX
];
96 __tmp_state
= __state
;
97 const size_t __conv2
= wcrtomb(__buf
, *__from_next
, &__tmp_state
);
98 if (__conv2
> static_cast<size_t>(__to_end
- __to_next
))
102 memcpy(__to_next
, __buf
, __conv2
);
103 __state
= __tmp_state
;
104 __to_next
+= __conv2
;
110 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
118 codecvt
<wchar_t, char, mbstate_t>::
119 do_in(state_type
& __state
, const extern_type
* __from
,
120 const extern_type
* __from_end
, const extern_type
*& __from_next
,
121 intern_type
* __to
, intern_type
* __to_end
,
122 intern_type
*& __to_next
) const
125 state_type
__tmp_state(__state
);
127 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
128 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
131 // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
132 // in case we store a L'\0' and then continue, in a loop.
133 // NB: mbsnrtowcs is a GNU extension
134 for (__from_next
= __from
, __to_next
= __to
;
135 __from_next
< __from_end
&& __to_next
< __to_end
138 const extern_type
* __from_chunk_end
;
139 __from_chunk_end
= static_cast<const extern_type
*>(memchr(__from_next
, '\0',
142 if (!__from_chunk_end
)
143 __from_chunk_end
= __from_end
;
145 __from
= __from_next
;
146 size_t __conv
= mbsnrtowcs(__to_next
, &__from_next
,
147 __from_chunk_end
- __from_next
,
148 __to_end
- __to_next
, &__state
);
149 if (__conv
== static_cast<size_t>(-1))
151 // In case of error, in order to stop at the exact place we
152 // have to start again from the beginning with a series of
154 for (;; ++__to_next
, __from
+= __conv
)
156 __conv
= mbrtowc(__to_next
, __from
, __from_end
- __from
,
158 if (__conv
== static_cast<size_t>(-1)
159 || __conv
== static_cast<size_t>(-2))
162 __from_next
= __from
;
163 __state
= __tmp_state
;
166 else if (__from_next
&& __from_next
< __from_chunk_end
)
168 // It is unclear what to return in this case (see DR 382).
174 __from_next
= __from_chunk_end
;
178 if (__from_next
< __from_end
&& __ret
== ok
)
180 if (__to_next
< __to_end
)
182 // XXX Probably wrong for stateful encodings
183 __tmp_state
= __state
;
185 *__to_next
++ = L
'\0';
192 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
200 codecvt
<wchar_t, char, mbstate_t>::
201 do_encoding() const throw()
203 // XXX This implementation assumes that the encoding is
204 // stateless and is either single-byte or variable-width.
206 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
207 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
211 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
218 codecvt
<wchar_t, char, mbstate_t>::
219 do_max_length() const throw()
221 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
222 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
224 // XXX Probably wrong for stateful encodings.
225 int __ret
= MB_CUR_MAX
;
226 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
233 codecvt
<wchar_t, char, mbstate_t>::
234 do_length(state_type
& __state
, const extern_type
* __from
,
235 const extern_type
* __end
, size_t __max
) const
238 state_type
__tmp_state(__state
);
240 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
241 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
244 // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
245 // in case we advance past it and then continue, in a loop.
246 // NB: mbsnrtowcs is a GNU extension
248 // A dummy internal buffer is needed in order for mbsnrtocws to consider
249 // its fourth parameter (it wouldn't with NULL as first parameter).
250 wchar_t* __to
= static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
252 while (__from
< __end
&& __max
)
254 const extern_type
* __from_chunk_end
;
255 __from_chunk_end
= static_cast<const extern_type
*>(memchr(__from
, '\0',
258 if (!__from_chunk_end
)
259 __from_chunk_end
= __end
;
261 const extern_type
* __tmp_from
= __from
;
262 size_t __conv
= mbsnrtowcs(__to
, &__from
,
263 __from_chunk_end
- __from
,
265 if (__conv
== static_cast<size_t>(-1))
267 // In case of error, in order to stop at the exact place we
268 // have to start again from the beginning with a series of
270 for (__from
= __tmp_from
;; __from
+= __conv
)
272 __conv
= mbrtowc(0, __from
, __end
- __from
,
274 if (__conv
== static_cast<size_t>(-1)
275 || __conv
== static_cast<size_t>(-2))
278 __state
= __tmp_state
;
279 __ret
+= __from
- __tmp_from
;
283 __from
= __from_chunk_end
;
285 __ret
+= __from
- __tmp_from
;
288 if (__from
< __end
&& __max
)
290 // XXX Probably wrong for stateful encodings
291 __tmp_state
= __state
;
298 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
306 _GLIBCXX_END_NAMESPACE_VERSION