]>
Commit | Line | Data |
---|---|---|
a98e4e62 JM |
1 | // std::codecvt implementation details, DragonFly version -*- C++ -*- |
2 | ||
7adcbafe | 3 | // Copyright (C) 2015-2022 Free Software Foundation, Inc. |
a98e4e62 JM |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | // | |
26 | // ISO C++ 14882: 22.2.1.5 - Template class codecvt | |
27 | // | |
28 | ||
29 | // Written by Benjamin Kosnik <bkoz@redhat.com> | |
30 | // Modified for DragonFly by John Marino <gnugcc@marino.st> | |
31 | ||
32 | #include <locale> | |
33 | #include <cstring> | |
34 | #include <cstdlib> // For MB_CUR_MAX | |
35 | #include <climits> // For MB_LEN_MAX | |
36 | ||
37 | namespace std _GLIBCXX_VISIBILITY(default) | |
38 | { | |
39 | _GLIBCXX_BEGIN_NAMESPACE_VERSION | |
40 | ||
41 | // Specializations. | |
42 | #ifdef _GLIBCXX_USE_WCHAR_T | |
43 | codecvt_base::result | |
44 | codecvt<wchar_t, char, mbstate_t>:: | |
f92ab29f | 45 | do_out(state_type& __state, const intern_type* __from, |
a98e4e62 JM |
46 | const intern_type* __from_end, const intern_type*& __from_next, |
47 | extern_type* __to, extern_type* __to_end, | |
48 | extern_type*& __to_next) const | |
49 | { | |
50 | result __ret = ok; | |
51 | state_type __tmp_state(__state); | |
52 | ||
53 | __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); | |
54 | ||
55 | // wcsnrtombs is *very* fast but stops if encounters NUL characters: | |
56 | // in case we fall back to wcrtomb and then continue, in a loop. | |
57 | // NB: wcsnrtombs is a GNU extension | |
58 | for (__from_next = __from, __to_next = __to; | |
59 | __from_next < __from_end && __to_next < __to_end | |
60 | && __ret == ok;) | |
61 | { | |
62 | const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', | |
63 | __from_end - __from_next); | |
64 | if (!__from_chunk_end) | |
65 | __from_chunk_end = __from_end; | |
66 | ||
67 | __from = __from_next; | |
68 | const size_t __conv = wcsnrtombs(__to_next, &__from_next, | |
69 | __from_chunk_end - __from_next, | |
70 | __to_end - __to_next, &__state); | |
71 | if (__conv == static_cast<size_t>(-1)) | |
72 | { | |
73 | // In case of error, in order to stop at the exact place we | |
74 | // have to start again from the beginning with a series of | |
75 | // wcrtomb. | |
76 | for (; __from < __from_next; ++__from) | |
77 | __to_next += wcrtomb(__to_next, *__from, &__tmp_state); | |
78 | __state = __tmp_state; | |
79 | __ret = error; | |
80 | } | |
81 | else if (__from_next && __from_next < __from_chunk_end) | |
82 | { | |
83 | __to_next += __conv; | |
84 | __ret = partial; | |
85 | } | |
86 | else | |
87 | { | |
88 | __from_next = __from_chunk_end; | |
89 | __to_next += __conv; | |
90 | } | |
91 | ||
92 | if (__from_next < __from_end && __ret == ok) | |
93 | { | |
94 | extern_type __buf[MB_LEN_MAX]; | |
95 | __tmp_state = __state; | |
96 | const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state); | |
97 | if (__conv2 > static_cast<size_t>(__to_end - __to_next)) | |
98 | __ret = partial; | |
99 | else | |
100 | { | |
101 | memcpy(__to_next, __buf, __conv2); | |
102 | __state = __tmp_state; | |
103 | __to_next += __conv2; | |
104 | ++__from_next; | |
105 | } | |
106 | } | |
107 | } | |
108 | ||
109 | uselocale((locale_t)__old); | |
110 | ||
f92ab29f | 111 | return __ret; |
a98e4e62 | 112 | } |
f92ab29f | 113 | |
a98e4e62 JM |
114 | codecvt_base::result |
115 | codecvt<wchar_t, char, mbstate_t>:: | |
f92ab29f | 116 | do_in(state_type& __state, const extern_type* __from, |
a98e4e62 JM |
117 | const extern_type* __from_end, const extern_type*& __from_next, |
118 | intern_type* __to, intern_type* __to_end, | |
119 | intern_type*& __to_next) const | |
120 | { | |
121 | result __ret = ok; | |
122 | state_type __tmp_state(__state); | |
123 | ||
124 | __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); | |
125 | ||
126 | // mbsnrtowcs is *very* fast but stops if encounters NUL characters: | |
127 | // in case we store a L'\0' and then continue, in a loop. | |
128 | // NB: mbsnrtowcs is a GNU extension | |
129 | for (__from_next = __from, __to_next = __to; | |
130 | __from_next < __from_end && __to_next < __to_end | |
131 | && __ret == ok;) | |
132 | { | |
133 | const extern_type* __from_chunk_end; | |
134 | __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', | |
135 | __from_end | |
136 | - __from_next)); | |
137 | if (!__from_chunk_end) | |
138 | __from_chunk_end = __from_end; | |
139 | ||
140 | __from = __from_next; | |
141 | size_t __conv = mbsnrtowcs(__to_next, &__from_next, | |
142 | __from_chunk_end - __from_next, | |
143 | __to_end - __to_next, &__state); | |
144 | if (__conv == static_cast<size_t>(-1)) | |
145 | { | |
146 | // In case of error, in order to stop at the exact place we | |
147 | // have to start again from the beginning with a series of | |
148 | // mbrtowc. | |
149 | for (;; ++__to_next, __from += __conv) | |
150 | { | |
151 | __conv = mbrtowc(__to_next, __from, __from_end - __from, | |
152 | &__tmp_state); | |
153 | if (__conv == static_cast<size_t>(-1) | |
154 | || __conv == static_cast<size_t>(-2)) | |
155 | break; | |
156 | } | |
157 | __from_next = __from; | |
f92ab29f | 158 | __state = __tmp_state; |
a98e4e62 JM |
159 | __ret = error; |
160 | } | |
161 | else if (__from_next && __from_next < __from_chunk_end) | |
162 | { | |
f92ab29f | 163 | // It is unclear what to return in this case (see DR 382). |
a98e4e62 JM |
164 | __to_next += __conv; |
165 | __ret = partial; | |
166 | } | |
167 | else | |
168 | { | |
169 | __from_next = __from_chunk_end; | |
170 | __to_next += __conv; | |
171 | } | |
172 | ||
173 | if (__from_next < __from_end && __ret == ok) | |
174 | { | |
175 | if (__to_next < __to_end) | |
176 | { | |
177 | // XXX Probably wrong for stateful encodings | |
f92ab29f | 178 | __tmp_state = __state; |
a98e4e62 JM |
179 | ++__from_next; |
180 | *__to_next++ = L'\0'; | |
181 | } | |
182 | else | |
183 | __ret = partial; | |
184 | } | |
185 | } | |
186 | ||
187 | uselocale((locale_t)__old); | |
188 | ||
f92ab29f | 189 | return __ret; |
a98e4e62 JM |
190 | } |
191 | ||
f92ab29f | 192 | int |
a98e4e62 JM |
193 | codecvt<wchar_t, char, mbstate_t>:: |
194 | do_encoding() const throw() | |
195 | { | |
196 | // XXX This implementation assumes that the encoding is | |
197 | // stateless and is either single-byte or variable-width. | |
198 | int __ret = 0; | |
199 | __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); | |
200 | if (MB_CUR_MAX == 1) | |
201 | __ret = 1; | |
202 | uselocale((locale_t)__old); | |
203 | return __ret; | |
f92ab29f | 204 | } |
a98e4e62 | 205 | |
f92ab29f | 206 | int |
a98e4e62 JM |
207 | codecvt<wchar_t, char, mbstate_t>:: |
208 | do_max_length() const throw() | |
209 | { | |
210 | __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); | |
211 | // XXX Probably wrong for stateful encodings. | |
212 | int __ret = MB_CUR_MAX; | |
213 | uselocale((locale_t)__old); | |
214 | return __ret; | |
215 | } | |
f92ab29f CG |
216 | |
217 | int | |
a98e4e62 JM |
218 | codecvt<wchar_t, char, mbstate_t>:: |
219 | do_length(state_type& __state, const extern_type* __from, | |
220 | const extern_type* __end, size_t __max) const | |
221 | { | |
222 | int __ret = 0; | |
223 | state_type __tmp_state(__state); | |
224 | ||
225 | __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); | |
226 | ||
227 | // mbsnrtowcs is *very* fast but stops if encounters NUL characters: | |
228 | // in case we advance past it and then continue, in a loop. | |
229 | // NB: mbsnrtowcs is a GNU extension | |
f92ab29f | 230 | |
a98e4e62 JM |
231 | // A dummy internal buffer is needed in order for mbsnrtocws to consider |
232 | // its fourth parameter (it wouldn't with NULL as first parameter). | |
f92ab29f | 233 | wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) |
a98e4e62 JM |
234 | * __max)); |
235 | while (__from < __end && __max) | |
236 | { | |
237 | const extern_type* __from_chunk_end; | |
238 | __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', | |
239 | __end | |
240 | - __from)); | |
241 | if (!__from_chunk_end) | |
242 | __from_chunk_end = __end; | |
243 | ||
244 | const extern_type* __tmp_from = __from; | |
245 | size_t __conv = mbsnrtowcs(__to, &__from, | |
246 | __from_chunk_end - __from, | |
247 | __max, &__state); | |
248 | if (__conv == static_cast<size_t>(-1)) | |
249 | { | |
250 | // In case of error, in order to stop at the exact place we | |
251 | // have to start again from the beginning with a series of | |
252 | // mbrtowc. | |
253 | for (__from = __tmp_from;; __from += __conv) | |
254 | { | |
255 | __conv = mbrtowc(0, __from, __end - __from, | |
256 | &__tmp_state); | |
257 | if (__conv == static_cast<size_t>(-1) | |
258 | || __conv == static_cast<size_t>(-2)) | |
259 | break; | |
260 | } | |
261 | __state = __tmp_state; | |
262 | __ret += __from - __tmp_from; | |
263 | break; | |
264 | } | |
265 | if (!__from) | |
266 | __from = __from_chunk_end; | |
f92ab29f | 267 | |
a98e4e62 JM |
268 | __ret += __from - __tmp_from; |
269 | __max -= __conv; | |
270 | ||
271 | if (__from < __end && __max) | |
272 | { | |
273 | // XXX Probably wrong for stateful encodings | |
274 | __tmp_state = __state; | |
275 | ++__from; | |
276 | ++__ret; | |
277 | --__max; | |
278 | } | |
279 | } | |
280 | ||
281 | uselocale((locale_t)__old); | |
282 | ||
f92ab29f | 283 | return __ret; |
a98e4e62 JM |
284 | } |
285 | #endif | |
286 | ||
287 | _GLIBCXX_END_NAMESPACE_VERSION | |
288 | } // namespace |