]>
Commit | Line | Data |
---|---|---|
33590f13 BK |
1 | // Locale support (codecvt) -*- C++ -*- |
2 | ||
6f48900c | 3 | // Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. |
33590f13 BK |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 2, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // You should have received a copy of the GNU General Public License along | |
17 | // with this library; see the file COPYING. If not, write to the Free | |
18 | // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
19 | // USA. | |
20 | ||
21 | // As a special exception, you may use this file as part of a free software | |
22 | // library without restriction. Specifically, if other files instantiate | |
23 | // templates or use macros or inline functions from this file, or you compile | |
24 | // this file and link it with other files to produce an executable, this | |
25 | // file does not by itself cause the resulting executable to be covered by | |
26 | // the GNU General Public License. This exception does not however | |
27 | // invalidate any other reasons why the executable file might be covered by | |
28 | // the GNU General Public License. | |
29 | ||
30 | // | |
31 | // ISO C++ 14882: 22.2.1.5 Template class codecvt | |
32 | // | |
33 | ||
34 | // Warning: this file is not meant for user inclusion. Use <locale>. | |
35 | ||
36 | // Written by Benjamin Kosnik <bkoz@cygnus.com> | |
37 | ||
6f48900c BK |
38 | // XXX |
39 | // Define this here to codecvt.cc can have _S_max_size definition. | |
40 | #define _GLIBCPP_USE___ENC_TRAITS 1 | |
33590f13 BK |
41 | |
42 | #if _GLIBCPP_USE_SHADOW_HEADERS | |
43 | using _C_legacy::CODESET; | |
44 | #endif | |
45 | ||
6f48900c BK |
46 | // Extension to use icov for dealing with character encodings, |
47 | // including conversions and comparisons between various character | |
48 | // sets. This object encapsulates data that may need to be shared between | |
49 | // char_traits, codecvt and ctype. | |
33590f13 BK |
50 | class __enc_traits |
51 | { | |
52 | public: | |
53 | // Types: | |
54 | // NB: A conversion descriptor subsumes and enhances the | |
55 | // functionality of a simple state type such as mbstate_t. | |
56 | typedef iconv_t __desc_type; | |
57 | ||
58 | protected: | |
59 | // Data Members: | |
60 | // Max size of charset encoding name | |
61 | static const int _S_max_size = 32; | |
62 | // Name of internal character set encoding. | |
63 | char _M_int_enc[_S_max_size]; | |
64 | // Name of external character set encoding. | |
65 | char _M_ext_enc[_S_max_size]; | |
66 | ||
67 | // Conversion descriptor between external encoding to internal encoding. | |
68 | __desc_type _M_in_desc; | |
69 | // Conversion descriptor between internal encoding to external encoding. | |
70 | __desc_type _M_out_desc; | |
71 | ||
72 | // Details the byte-order marker for the external encoding, if necessary. | |
73 | int _M_ext_bom; | |
74 | ||
75 | // Details the byte-order marker for the internal encoding, if necessary. | |
76 | int _M_int_bom; | |
77 | ||
78 | public: | |
6f48900c BK |
79 | explicit __enc_traits() |
80 | : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) | |
81 | { | |
82 | memset(_M_int_enc, 0, _S_max_size); | |
83 | memset(_M_ext_enc, 0, _S_max_size); | |
84 | } | |
85 | ||
86 | explicit __enc_traits(const locale& __loc) | |
33590f13 BK |
87 | : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) |
88 | { | |
89 | // __intc_end = whatever we are using internally, which is | |
1a808622 | 90 | // UCS4 (linux, solaris) |
33590f13 BK |
91 | // UCS2 == UNICODE (microsoft, java, aix, whatever...) |
92 | // XXX Currently don't know how to get this data from target system... | |
93 | strcpy(_M_int_enc, "UCS4"); | |
94 | ||
95 | // __extc_end = external codeset in current locale | |
96 | // XXX There has got to be a better way to do this. | |
97 | __c_locale __cloc; | |
98 | locale::facet::_S_create_c_locale(__cloc, __loc.name().c_str()); | |
99 | strcpy(_M_ext_enc, __nl_langinfo_l(CODESET, __cloc)); | |
100 | locale::facet::_S_destroy_c_locale(__cloc); | |
101 | } | |
102 | ||
6f48900c BK |
103 | explicit __enc_traits(const char* __int, const char* __ext, |
104 | int __ibom = 0, int __ebom = 0) | |
33590f13 BK |
105 | : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) |
106 | { | |
107 | strncpy(_M_int_enc, __int, _S_max_size); | |
108 | strncpy(_M_ext_enc, __ext, _S_max_size); | |
109 | } | |
110 | ||
111 | // 21.1.2 traits typedefs | |
112 | // p4 | |
113 | // typedef STATE_T state_type | |
114 | // requires: state_type shall meet the requirements of | |
115 | // CopyConstructible types (20.1.3) | |
6f48900c | 116 | __enc_traits(const __enc_traits& __obj): _M_in_desc(0), _M_out_desc(0) |
33590f13 BK |
117 | { |
118 | strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size); | |
119 | strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size); | |
120 | _M_ext_bom = __obj._M_ext_bom; | |
121 | _M_int_bom = __obj._M_int_bom; | |
122 | } | |
123 | ||
6f48900c BK |
124 | // Need assignment operator as well. |
125 | __enc_traits& | |
126 | operator=(const __enc_traits& __obj) | |
127 | { | |
128 | strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size); | |
129 | strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size); | |
130 | _M_in_desc = 0; | |
131 | _M_out_desc = 0; | |
132 | _M_ext_bom = __obj._M_ext_bom; | |
133 | _M_int_bom = __obj._M_int_bom; | |
134 | } | |
135 | ||
33590f13 BK |
136 | ~__enc_traits() |
137 | { | |
1a808622 BK |
138 | __desc_type __err = reinterpret_cast<iconv_t>(-1); |
139 | if (_M_in_desc && _M_in_desc != __err) | |
140 | iconv_close(_M_in_desc); | |
141 | if (_M_out_desc && _M_out_desc != __err) | |
142 | iconv_close(_M_out_desc); | |
33590f13 BK |
143 | } |
144 | ||
33590f13 BK |
145 | void |
146 | _M_init() | |
147 | { | |
6f48900c BK |
148 | const __desc_type __err = reinterpret_cast<iconv_t>(-1); |
149 | if (!_M_in_desc) | |
150 | { | |
151 | _M_in_desc = iconv_open(_M_int_enc, _M_ext_enc); | |
152 | if (_M_in_desc == __err) | |
153 | __throw_runtime_error("creating iconv input descriptor failed."); | |
154 | } | |
155 | if (!_M_out_desc) | |
156 | { | |
157 | _M_out_desc = iconv_open(_M_ext_enc, _M_int_enc); | |
158 | if (_M_out_desc == __err) | |
159 | __throw_runtime_error("creating iconv output descriptor failed."); | |
160 | } | |
33590f13 BK |
161 | } |
162 | ||
163 | bool | |
164 | _M_good() | |
165 | { | |
6f48900c | 166 | const __desc_type __err = reinterpret_cast<iconv_t>(-1); |
1a808622 BK |
167 | bool __test = _M_in_desc && _M_in_desc != __err; |
168 | __test &= _M_out_desc && _M_out_desc != __err; | |
169 | return __test; | |
33590f13 BK |
170 | } |
171 | ||
172 | const __desc_type* | |
173 | _M_get_in_descriptor() | |
174 | { return &_M_in_desc; } | |
175 | ||
176 | const __desc_type* | |
177 | _M_get_out_descriptor() | |
178 | { return &_M_out_desc; } | |
179 | ||
33590f13 BK |
180 | int |
181 | _M_get_external_bom() | |
182 | { return _M_ext_bom; } | |
183 | ||
184 | int | |
185 | _M_get_internal_bom() | |
186 | { return _M_int_bom; } | |
6f48900c BK |
187 | |
188 | const char* | |
189 | _M_get_internal_enc() | |
190 | { return _M_int_enc; } | |
191 | ||
192 | const char* | |
193 | _M_get_external_enc() | |
194 | { return _M_ext_enc; } | |
33590f13 BK |
195 | }; |
196 | ||
197 | // Partial specialization | |
198 | // This specialization takes advantage of iconv to provide code | |
199 | // conversions between a large number of character encodings. | |
200 | template<typename _InternT, typename _ExternT> | |
201 | class codecvt<_InternT, _ExternT, __enc_traits> | |
202 | : public __codecvt_abstract_base<_InternT, _ExternT, __enc_traits> | |
203 | { | |
204 | public: | |
205 | // Types: | |
206 | typedef codecvt_base::result result; | |
207 | typedef _InternT intern_type; | |
208 | typedef _ExternT extern_type; | |
209 | typedef __enc_traits state_type; | |
210 | typedef __enc_traits::__desc_type __desc_type; | |
211 | typedef __enc_traits __enc_type; | |
212 | ||
213 | // Data Members: | |
214 | static locale::id id; | |
215 | ||
216 | explicit | |
217 | codecvt(size_t __refs = 0) | |
218 | : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) | |
219 | { } | |
220 | ||
221 | explicit | |
222 | codecvt(__enc_type* __enc, size_t __refs = 0) | |
223 | : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) | |
224 | { } | |
225 | ||
226 | protected: | |
227 | virtual | |
228 | ~codecvt() { } | |
229 | ||
230 | virtual result | |
231 | do_out(state_type& __state, const intern_type* __from, | |
232 | const intern_type* __from_end, const intern_type*& __from_next, | |
233 | extern_type* __to, extern_type* __to_end, | |
234 | extern_type*& __to_next) const; | |
235 | ||
236 | virtual result | |
237 | do_unshift(state_type& __state, extern_type* __to, | |
238 | extern_type* __to_end, extern_type*& __to_next) const; | |
239 | ||
240 | virtual result | |
241 | do_in(state_type& __state, const extern_type* __from, | |
242 | const extern_type* __from_end, const extern_type*& __from_next, | |
243 | intern_type* __to, intern_type* __to_end, | |
244 | intern_type*& __to_next) const; | |
245 | ||
246 | virtual int | |
247 | do_encoding() const throw(); | |
248 | ||
249 | virtual bool | |
250 | do_always_noconv() const throw(); | |
251 | ||
252 | virtual int | |
253 | do_length(const state_type&, const extern_type* __from, | |
254 | const extern_type* __end, size_t __max) const; | |
255 | ||
256 | virtual int | |
257 | do_max_length() const throw(); | |
258 | }; | |
259 | ||
260 | template<typename _InternT, typename _ExternT> | |
261 | locale::id | |
262 | codecvt<_InternT, _ExternT, __enc_traits>::id; | |
263 | ||
264 | // This adaptor works around the signature problems of the second | |
265 | // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 | |
07814743 BK |
266 | // uses 'char**', which matches the POSIX 1003.1-2001 standard. |
267 | // Using this adaptor, g++ will do the work for us. | |
33590f13 BK |
268 | template<typename _T> |
269 | inline size_t | |
07814743 BK |
270 | __iconv_adaptor(size_t(*__func)(iconv_t, _T, size_t*, char**, size_t*), |
271 | iconv_t __cd, char** __inbuf, size_t* __inbytes, | |
272 | char** __outbuf, size_t* __outbytes) | |
273 | { return __func(__cd, (_T)__inbuf, __inbytes, __outbuf, __outbytes); } | |
33590f13 BK |
274 | |
275 | template<typename _InternT, typename _ExternT> | |
276 | codecvt_base::result | |
277 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
278 | do_out(state_type& __state, const intern_type* __from, | |
279 | const intern_type* __from_end, const intern_type*& __from_next, | |
280 | extern_type* __to, extern_type* __to_end, | |
281 | extern_type*& __to_next) const | |
282 | { | |
283 | result __ret = error; | |
284 | if (__state._M_good()) | |
285 | { | |
286 | typedef state_type::__desc_type __desc_type; | |
287 | const __desc_type* __desc = __state._M_get_out_descriptor(); | |
288 | const size_t __fmultiple = sizeof(intern_type) / sizeof(char); | |
07814743 | 289 | size_t __fbytes = __fmultiple * (__from_end - __from); |
33590f13 | 290 | const size_t __tmultiple = sizeof(extern_type) / sizeof(char); |
07814743 | 291 | size_t __tbytes = __tmultiple * (__to_end - __to); |
33590f13 BK |
292 | |
293 | // Argument list for iconv specifies a byte sequence. Thus, | |
294 | // all to/from arrays must be brutally casted to char*. | |
295 | char* __cto = reinterpret_cast<char*>(__to); | |
296 | char* __cfrom; | |
297 | size_t __conv; | |
298 | ||
299 | // Some encodings need a byte order marker as the first item | |
300 | // in the byte stream, to designate endian-ness. The default | |
301 | // value for the byte order marker is NULL, so if this is | |
302 | // the case, it's not necessary and we can just go on our | |
303 | // merry way. | |
304 | int __int_bom = __state._M_get_internal_bom(); | |
305 | if (__int_bom) | |
306 | { | |
307 | size_t __size = __from_end - __from; | |
308 | intern_type* __cfixed = static_cast<intern_type*>(__builtin_alloca(sizeof(intern_type) * (__size + 1))); | |
309 | __cfixed[0] = static_cast<intern_type>(__int_bom); | |
310 | char_traits<intern_type>::copy(__cfixed + 1, __from, __size); | |
311 | __cfrom = reinterpret_cast<char*>(__cfixed); | |
312 | __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, | |
07814743 | 313 | &__fbytes, &__cto, &__tbytes); |
33590f13 BK |
314 | } |
315 | else | |
316 | { | |
317 | intern_type* __cfixed = const_cast<intern_type*>(__from); | |
318 | __cfrom = reinterpret_cast<char*>(__cfixed); | |
07814743 BK |
319 | __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, &__fbytes, |
320 | &__cto, &__tbytes); | |
33590f13 BK |
321 | } |
322 | ||
323 | if (__conv != size_t(-1)) | |
324 | { | |
325 | __from_next = reinterpret_cast<const intern_type*>(__cfrom); | |
326 | __to_next = reinterpret_cast<extern_type*>(__cto); | |
327 | __ret = ok; | |
328 | } | |
329 | else | |
330 | { | |
07814743 | 331 | if (__fbytes < __fmultiple * (__from_end - __from)) |
33590f13 BK |
332 | { |
333 | __from_next = reinterpret_cast<const intern_type*>(__cfrom); | |
334 | __to_next = reinterpret_cast<extern_type*>(__cto); | |
335 | __ret = partial; | |
336 | } | |
337 | else | |
338 | __ret = error; | |
339 | } | |
340 | } | |
341 | return __ret; | |
342 | } | |
343 | ||
344 | template<typename _InternT, typename _ExternT> | |
345 | codecvt_base::result | |
346 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
347 | do_unshift(state_type& __state, extern_type* __to, | |
348 | extern_type* __to_end, extern_type*& __to_next) const | |
349 | { | |
350 | result __ret = error; | |
351 | if (__state._M_good()) | |
352 | { | |
353 | typedef state_type::__desc_type __desc_type; | |
354 | const __desc_type* __desc = __state._M_get_in_descriptor(); | |
355 | const size_t __tmultiple = sizeof(intern_type) / sizeof(char); | |
356 | size_t __tlen = __tmultiple * (__to_end - __to); | |
357 | ||
358 | // Argument list for iconv specifies a byte sequence. Thus, | |
359 | // all to/from arrays must be brutally casted to char*. | |
360 | char* __cto = reinterpret_cast<char*>(__to); | |
361 | size_t __conv = __iconv_adaptor(iconv,*__desc, NULL, NULL, | |
362 | &__cto, &__tlen); | |
363 | ||
364 | if (__conv != size_t(-1)) | |
365 | { | |
366 | __to_next = reinterpret_cast<extern_type*>(__cto); | |
367 | if (__tlen == __tmultiple * (__to_end - __to)) | |
368 | __ret = noconv; | |
369 | else if (__tlen == 0) | |
370 | __ret = ok; | |
371 | else | |
372 | __ret = partial; | |
373 | } | |
374 | else | |
375 | __ret = error; | |
376 | } | |
377 | return __ret; | |
378 | } | |
379 | ||
380 | template<typename _InternT, typename _ExternT> | |
381 | codecvt_base::result | |
382 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
383 | do_in(state_type& __state, const extern_type* __from, | |
384 | const extern_type* __from_end, const extern_type*& __from_next, | |
385 | intern_type* __to, intern_type* __to_end, | |
386 | intern_type*& __to_next) const | |
387 | { | |
388 | result __ret = error; | |
389 | if (__state._M_good()) | |
390 | { | |
391 | typedef state_type::__desc_type __desc_type; | |
392 | const __desc_type* __desc = __state._M_get_in_descriptor(); | |
393 | const size_t __fmultiple = sizeof(extern_type) / sizeof(char); | |
394 | size_t __flen = __fmultiple * (__from_end - __from); | |
395 | const size_t __tmultiple = sizeof(intern_type) / sizeof(char); | |
396 | size_t __tlen = __tmultiple * (__to_end - __to); | |
397 | ||
398 | // Argument list for iconv specifies a byte sequence. Thus, | |
399 | // all to/from arrays must be brutally casted to char*. | |
400 | char* __cto = reinterpret_cast<char*>(__to); | |
401 | char* __cfrom; | |
402 | size_t __conv; | |
403 | ||
404 | // Some encodings need a byte order marker as the first item | |
405 | // in the byte stream, to designate endian-ness. The default | |
406 | // value for the byte order marker is NULL, so if this is | |
407 | // the case, it's not necessary and we can just go on our | |
408 | // merry way. | |
409 | int __ext_bom = __state._M_get_external_bom(); | |
410 | if (__ext_bom) | |
411 | { | |
412 | size_t __size = __from_end - __from; | |
413 | extern_type* __cfixed = static_cast<extern_type*>(__builtin_alloca(sizeof(extern_type) * (__size + 1))); | |
414 | __cfixed[0] = static_cast<extern_type>(__ext_bom); | |
415 | char_traits<extern_type>::copy(__cfixed + 1, __from, __size); | |
416 | __cfrom = reinterpret_cast<char*>(__cfixed); | |
417 | __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, | |
418 | &__flen, &__cto, &__tlen); | |
419 | } | |
420 | else | |
421 | { | |
422 | extern_type* __cfixed = const_cast<extern_type*>(__from); | |
423 | __cfrom = reinterpret_cast<char*>(__cfixed); | |
424 | __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, | |
425 | &__flen, &__cto, &__tlen); | |
426 | } | |
427 | ||
428 | ||
429 | if (__conv != size_t(-1)) | |
430 | { | |
431 | __from_next = reinterpret_cast<const extern_type*>(__cfrom); | |
432 | __to_next = reinterpret_cast<intern_type*>(__cto); | |
433 | __ret = ok; | |
434 | } | |
435 | else | |
436 | { | |
437 | if (__flen < static_cast<size_t>(__from_end - __from)) | |
438 | { | |
439 | __from_next = reinterpret_cast<const extern_type*>(__cfrom); | |
440 | __to_next = reinterpret_cast<intern_type*>(__cto); | |
441 | __ret = partial; | |
442 | } | |
443 | else | |
444 | __ret = error; | |
445 | } | |
446 | } | |
447 | return __ret; | |
448 | } | |
449 | ||
450 | template<typename _InternT, typename _ExternT> | |
451 | int | |
452 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
453 | do_encoding() const throw() | |
07814743 BK |
454 | { |
455 | int __ret = 0; | |
456 | if (sizeof(_ExternT) <= sizeof(_InternT)) | |
457 | __ret = sizeof(_InternT)/sizeof(_ExternT); | |
458 | return __ret; | |
459 | } | |
33590f13 BK |
460 | |
461 | template<typename _InternT, typename _ExternT> | |
462 | bool | |
463 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
464 | do_always_noconv() const throw() | |
465 | { return false; } | |
466 | ||
467 | template<typename _InternT, typename _ExternT> | |
468 | int | |
469 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
470 | do_length(const state_type&, const extern_type* __from, | |
471 | const extern_type* __end, size_t __max) const | |
472 | { return min(__max, static_cast<size_t>(__end - __from)); } | |
473 | ||
474 | #ifdef _GLIBCPP_RESOLVE_LIB_DEFECTS | |
475 | // 74. Garbled text for codecvt::do_max_length | |
476 | template<typename _InternT, typename _ExternT> | |
477 | int | |
478 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
479 | do_max_length() const throw() | |
480 | { return 1; } | |
481 | #endif |