]>
Commit | Line | Data |
---|---|---|
a314f26a | 1 | // std::codecvt implementation details, GNU version -*- C++ -*- |
2 | ||
f1717362 | 3 | // Copyright (C) 2002-2016 Free Software Foundation, Inc. |
a314f26a | 4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
6bc9506f | 8 | // Free Software Foundation; either version 3, or (at your option) |
a314f26a | 9 | // any later version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
6bc9506f | 16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
a314f26a | 24 | |
25 | // | |
26 | // ISO C++ 14882: 22.2.1.5 - Template class codecvt | |
27 | // | |
28 | ||
29 | // Written by Benjamin Kosnik <bkoz@redhat.com> | |
30 | ||
31 | #include <locale> | |
b83f6bce | 32 | #include <cstdlib> // For MB_CUR_MAX |
33 | #include <climits> // For MB_LEN_MAX | |
7f713ccf | 34 | #include <bits/c++locale_internal.h> |
a314f26a | 35 | |
2948dd21 | 36 | namespace std _GLIBCXX_VISIBILITY(default) |
37 | { | |
38 | _GLIBCXX_BEGIN_NAMESPACE_VERSION | |
1069247d | 39 | |
a314f26a | 40 | // Specializations. |
5a64d8cf | 41 | #ifdef _GLIBCXX_USE_WCHAR_T |
a314f26a | 42 | codecvt_base::result |
43 | codecvt<wchar_t, char, mbstate_t>:: | |
44 | do_out(state_type& __state, const intern_type* __from, | |
45 | const intern_type* __from_end, const intern_type*& __from_next, | |
46 | extern_type* __to, extern_type* __to_end, | |
47 | extern_type*& __to_next) const | |
48 | { | |
4229b2e4 | 49 | result __ret = ok; |
bc607063 | 50 | state_type __tmp_state(__state); |
d08559a9 | 51 | |
a314f26a | 52 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) |
53 | __c_locale __old = __uselocale(_M_c_locale_codecvt); | |
54 | #endif | |
4229b2e4 | 55 | |
bc607063 | 56 | // wcsnrtombs is *very* fast but stops if encounters NUL characters: |
57 | // in case we fall back to wcrtomb and then continue, in a loop. | |
58 | // NB: wcsnrtombs is a GNU extension | |
cd6e64dd | 59 | for (__from_next = __from, __to_next = __to; |
60 | __from_next < __from_end && __to_next < __to_end | |
61 | && __ret == ok;) | |
4229b2e4 | 62 | { |
bc607063 | 63 | const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', |
64 | __from_end - __from_next); | |
65 | if (!__from_chunk_end) | |
66 | __from_chunk_end = __from_end; | |
67 | ||
cd6e64dd | 68 | __from = __from_next; |
bc607063 | 69 | const size_t __conv = wcsnrtombs(__to_next, &__from_next, |
70 | __from_chunk_end - __from_next, | |
71 | __to_end - __to_next, &__state); | |
72 | if (__conv == static_cast<size_t>(-1)) | |
73 | { | |
74 | // In case of error, in order to stop at the exact place we | |
75 | // have to start again from the beginning with a series of | |
76 | // wcrtomb. | |
cd6e64dd | 77 | for (; __from < __from_next; ++__from) |
78 | __to_next += wcrtomb(__to_next, *__from, &__tmp_state); | |
79 | __state = __tmp_state; | |
bc607063 | 80 | __ret = error; |
81 | } | |
82 | else if (__from_next && __from_next < __from_chunk_end) | |
83 | { | |
84 | __to_next += __conv; | |
85 | __ret = partial; | |
86 | } | |
87 | else | |
4229b2e4 | 88 | { |
bc607063 | 89 | __from_next = __from_chunk_end; |
90 | __to_next += __conv; | |
91 | } | |
92 | ||
93 | if (__from_next < __from_end && __ret == ok) | |
94 | { | |
95 | extern_type __buf[MB_LEN_MAX]; | |
96 | __tmp_state = __state; | |
4aa6630b | 97 | const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state); |
98 | if (__conv2 > static_cast<size_t>(__to_end - __to_next)) | |
bc607063 | 99 | __ret = partial; |
100 | else | |
d08559a9 | 101 | { |
4aa6630b | 102 | memcpy(__to_next, __buf, __conv2); |
bc607063 | 103 | __state = __tmp_state; |
4aa6630b | 104 | __to_next += __conv2; |
bc607063 | 105 | ++__from_next; |
d08559a9 | 106 | } |
4229b2e4 | 107 | } |
4229b2e4 | 108 | } |
109 | ||
a314f26a | 110 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) |
111 | __uselocale(__old); | |
112 | #endif | |
113 | ||
a314f26a | 114 | return __ret; |
115 | } | |
116 | ||
117 | codecvt_base::result | |
118 | codecvt<wchar_t, char, mbstate_t>:: | |
119 | do_in(state_type& __state, const extern_type* __from, | |
120 | const extern_type* __from_end, const extern_type*& __from_next, | |
121 | intern_type* __to, intern_type* __to_end, | |
122 | intern_type*& __to_next) const | |
123 | { | |
4229b2e4 | 124 | result __ret = ok; |
4229b2e4 | 125 | state_type __tmp_state(__state); |
51a4e5b9 | 126 | |
a314f26a | 127 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) |
128 | __c_locale __old = __uselocale(_M_c_locale_codecvt); | |
129 | #endif | |
4229b2e4 | 130 | |
51a4e5b9 | 131 | // mbsnrtowcs is *very* fast but stops if encounters NUL characters: |
132 | // in case we store a L'\0' and then continue, in a loop. | |
133 | // NB: mbsnrtowcs is a GNU extension | |
cd6e64dd | 134 | for (__from_next = __from, __to_next = __to; |
135 | __from_next < __from_end && __to_next < __to_end | |
136 | && __ret == ok;) | |
4229b2e4 | 137 | { |
51a4e5b9 | 138 | const extern_type* __from_chunk_end; |
139 | __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', | |
140 | __from_end | |
141 | - __from_next)); | |
142 | if (!__from_chunk_end) | |
143 | __from_chunk_end = __from_end; | |
144 | ||
cd6e64dd | 145 | __from = __from_next; |
786b40bb | 146 | size_t __conv = mbsnrtowcs(__to_next, &__from_next, |
147 | __from_chunk_end - __from_next, | |
148 | __to_end - __to_next, &__state); | |
4229b2e4 | 149 | if (__conv == static_cast<size_t>(-1)) |
150 | { | |
51a4e5b9 | 151 | // In case of error, in order to stop at the exact place we |
152 | // have to start again from the beginning with a series of | |
153 | // mbrtowc. | |
786b40bb | 154 | for (;; ++__to_next, __from += __conv) |
cd6e64dd | 155 | { |
786b40bb | 156 | __conv = mbrtowc(__to_next, __from, __from_end - __from, |
157 | &__tmp_state); | |
158 | if (__conv == static_cast<size_t>(-1) | |
159 | || __conv == static_cast<size_t>(-2)) | |
cd6e64dd | 160 | break; |
cd6e64dd | 161 | } |
162 | __from_next = __from; | |
51a4e5b9 | 163 | __state = __tmp_state; |
4229b2e4 | 164 | __ret = error; |
4229b2e4 | 165 | } |
51a4e5b9 | 166 | else if (__from_next && __from_next < __from_chunk_end) |
4229b2e4 | 167 | { |
51a4e5b9 | 168 | // It is unclear what to return in this case (see DR 382). |
169 | __to_next += __conv; | |
4229b2e4 | 170 | __ret = partial; |
4229b2e4 | 171 | } |
51a4e5b9 | 172 | else |
4229b2e4 | 173 | { |
51a4e5b9 | 174 | __from_next = __from_chunk_end; |
175 | __to_next += __conv; | |
4229b2e4 | 176 | } |
177 | ||
51a4e5b9 | 178 | if (__from_next < __from_end && __ret == ok) |
179 | { | |
180 | if (__to_next < __to_end) | |
181 | { | |
182 | // XXX Probably wrong for stateful encodings | |
cd6e64dd | 183 | __tmp_state = __state; |
51a4e5b9 | 184 | ++__from_next; |
185 | *__to_next++ = L'\0'; | |
186 | } | |
187 | else | |
188 | __ret = partial; | |
189 | } | |
4229b2e4 | 190 | } |
191 | ||
a314f26a | 192 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) |
193 | __uselocale(__old); | |
194 | #endif | |
195 | ||
4229b2e4 | 196 | return __ret; |
197 | } | |
198 | ||
199 | int | |
200 | codecvt<wchar_t, char, mbstate_t>:: | |
201 | do_encoding() const throw() | |
202 | { | |
203 | // XXX This implementation assumes that the encoding is | |
204 | // stateless and is either single-byte or variable-width. | |
205 | int __ret = 0; | |
206 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) | |
207 | __c_locale __old = __uselocale(_M_c_locale_codecvt); | |
208 | #endif | |
209 | if (MB_CUR_MAX == 1) | |
210 | __ret = 1; | |
211 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) | |
212 | __uselocale(__old); | |
213 | #endif | |
214 | return __ret; | |
215 | } | |
216 | ||
217 | int | |
218 | codecvt<wchar_t, char, mbstate_t>:: | |
219 | do_max_length() const throw() | |
220 | { | |
221 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) | |
222 | __c_locale __old = __uselocale(_M_c_locale_codecvt); | |
223 | #endif | |
224 | // XXX Probably wrong for stateful encodings. | |
225 | int __ret = MB_CUR_MAX; | |
226 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) | |
227 | __uselocale(__old); | |
228 | #endif | |
229 | return __ret; | |
230 | } | |
231 | ||
232 | int | |
233 | codecvt<wchar_t, char, mbstate_t>:: | |
234 | do_length(state_type& __state, const extern_type* __from, | |
235 | const extern_type* __end, size_t __max) const | |
236 | { | |
237 | int __ret = 0; | |
238 | state_type __tmp_state(__state); | |
cd6e64dd | 239 | |
4229b2e4 | 240 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) |
241 | __c_locale __old = __uselocale(_M_c_locale_codecvt); | |
242 | #endif | |
243 | ||
cd6e64dd | 244 | // mbsnrtowcs is *very* fast but stops if encounters NUL characters: |
245 | // in case we advance past it and then continue, in a loop. | |
246 | // NB: mbsnrtowcs is a GNU extension | |
247 | ||
248 | // A dummy internal buffer is needed in order for mbsnrtocws to consider | |
249 | // its fourth parameter (it wouldn't with NULL as first parameter). | |
250 | wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) | |
251 | * __max)); | |
4229b2e4 | 252 | while (__from < __end && __max) |
a314f26a | 253 | { |
cd6e64dd | 254 | const extern_type* __from_chunk_end; |
255 | __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', | |
256 | __end | |
257 | - __from)); | |
258 | if (!__from_chunk_end) | |
259 | __from_chunk_end = __end; | |
260 | ||
261 | const extern_type* __tmp_from = __from; | |
786b40bb | 262 | size_t __conv = mbsnrtowcs(__to, &__from, |
263 | __from_chunk_end - __from, | |
264 | __max, &__state); | |
4229b2e4 | 265 | if (__conv == static_cast<size_t>(-1)) |
266 | { | |
cd6e64dd | 267 | // In case of error, in order to stop at the exact place we |
268 | // have to start again from the beginning with a series of | |
269 | // mbrtowc. | |
786b40bb | 270 | for (__from = __tmp_from;; __from += __conv) |
cd6e64dd | 271 | { |
9c371452 | 272 | __conv = mbrtowc(0, __from, __end - __from, |
786b40bb | 273 | &__tmp_state); |
274 | if (__conv == static_cast<size_t>(-1) | |
275 | || __conv == static_cast<size_t>(-2)) | |
cd6e64dd | 276 | break; |
cd6e64dd | 277 | } |
278 | __state = __tmp_state; | |
279 | __ret += __from - __tmp_from; | |
4229b2e4 | 280 | break; |
281 | } | |
cd6e64dd | 282 | if (!__from) |
283 | __from = __from_chunk_end; | |
284 | ||
285 | __ret += __from - __tmp_from; | |
286 | __max -= __conv; | |
287 | ||
288 | if (__from < __end && __max) | |
4229b2e4 | 289 | { |
290 | // XXX Probably wrong for stateful encodings | |
cd6e64dd | 291 | __tmp_state = __state; |
292 | ++__from; | |
293 | ++__ret; | |
294 | --__max; | |
4229b2e4 | 295 | } |
a314f26a | 296 | } |
4229b2e4 | 297 | |
298 | #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) | |
299 | __uselocale(__old); | |
300 | #endif | |
cd6e64dd | 301 | |
a314f26a | 302 | return __ret; |
303 | } | |
304 | #endif | |
1069247d | 305 | |
2948dd21 | 306 | _GLIBCXX_END_NAMESPACE_VERSION |
307 | } // namespace |