]>
Commit | Line | Data |
---|---|---|
33590f13 BK |
1 | // Locale support (codecvt) -*- C++ -*- |
2 | ||
6f48900c | 3 | // Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. |
33590f13 BK |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 2, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // You should have received a copy of the GNU General Public License along | |
17 | // with this library; see the file COPYING. If not, write to the Free | |
18 | // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
19 | // USA. | |
20 | ||
21 | // As a special exception, you may use this file as part of a free software | |
22 | // library without restriction. Specifically, if other files instantiate | |
23 | // templates or use macros or inline functions from this file, or you compile | |
24 | // this file and link it with other files to produce an executable, this | |
25 | // file does not by itself cause the resulting executable to be covered by | |
26 | // the GNU General Public License. This exception does not however | |
27 | // invalidate any other reasons why the executable file might be covered by | |
28 | // the GNU General Public License. | |
29 | ||
30 | // | |
31 | // ISO C++ 14882: 22.2.1.5 Template class codecvt | |
32 | // | |
33 | ||
34 | // Warning: this file is not meant for user inclusion. Use <locale>. | |
35 | ||
36 | // Written by Benjamin Kosnik <bkoz@cygnus.com> | |
37 | ||
6f48900c BK |
38 | // XXX |
39 | // Define this here to codecvt.cc can have _S_max_size definition. | |
40 | #define _GLIBCPP_USE___ENC_TRAITS 1 | |
33590f13 | 41 | |
6f48900c BK |
42 | // Extension to use icov for dealing with character encodings, |
43 | // including conversions and comparisons between various character | |
44 | // sets. This object encapsulates data that may need to be shared between | |
45 | // char_traits, codecvt and ctype. | |
33590f13 BK |
46 | class __enc_traits |
47 | { | |
48 | public: | |
49 | // Types: | |
50 | // NB: A conversion descriptor subsumes and enhances the | |
51 | // functionality of a simple state type such as mbstate_t. | |
52 | typedef iconv_t __desc_type; | |
53 | ||
54 | protected: | |
55 | // Data Members: | |
56 | // Max size of charset encoding name | |
57 | static const int _S_max_size = 32; | |
58 | // Name of internal character set encoding. | |
59 | char _M_int_enc[_S_max_size]; | |
60 | // Name of external character set encoding. | |
61 | char _M_ext_enc[_S_max_size]; | |
62 | ||
63 | // Conversion descriptor between external encoding to internal encoding. | |
64 | __desc_type _M_in_desc; | |
65 | // Conversion descriptor between internal encoding to external encoding. | |
66 | __desc_type _M_out_desc; | |
67 | ||
68 | // Details the byte-order marker for the external encoding, if necessary. | |
69 | int _M_ext_bom; | |
70 | ||
71 | // Details the byte-order marker for the internal encoding, if necessary. | |
72 | int _M_int_bom; | |
73 | ||
74 | public: | |
6f48900c BK |
75 | explicit __enc_traits() |
76 | : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) | |
77 | { | |
78 | memset(_M_int_enc, 0, _S_max_size); | |
79 | memset(_M_ext_enc, 0, _S_max_size); | |
80 | } | |
81 | ||
6f48900c BK |
82 | explicit __enc_traits(const char* __int, const char* __ext, |
83 | int __ibom = 0, int __ebom = 0) | |
33590f13 BK |
84 | : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) |
85 | { | |
86 | strncpy(_M_int_enc, __int, _S_max_size); | |
87 | strncpy(_M_ext_enc, __ext, _S_max_size); | |
88 | } | |
89 | ||
90 | // 21.1.2 traits typedefs | |
91 | // p4 | |
92 | // typedef STATE_T state_type | |
93 | // requires: state_type shall meet the requirements of | |
94 | // CopyConstructible types (20.1.3) | |
6f48900c | 95 | __enc_traits(const __enc_traits& __obj): _M_in_desc(0), _M_out_desc(0) |
33590f13 BK |
96 | { |
97 | strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size); | |
98 | strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size); | |
99 | _M_ext_bom = __obj._M_ext_bom; | |
100 | _M_int_bom = __obj._M_int_bom; | |
101 | } | |
102 | ||
6f48900c BK |
103 | // Need assignment operator as well. |
104 | __enc_traits& | |
105 | operator=(const __enc_traits& __obj) | |
106 | { | |
107 | strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size); | |
108 | strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size); | |
109 | _M_in_desc = 0; | |
110 | _M_out_desc = 0; | |
111 | _M_ext_bom = __obj._M_ext_bom; | |
112 | _M_int_bom = __obj._M_int_bom; | |
7aacf989 | 113 | return *this; |
6f48900c BK |
114 | } |
115 | ||
33590f13 BK |
116 | ~__enc_traits() |
117 | { | |
1a808622 BK |
118 | __desc_type __err = reinterpret_cast<iconv_t>(-1); |
119 | if (_M_in_desc && _M_in_desc != __err) | |
120 | iconv_close(_M_in_desc); | |
121 | if (_M_out_desc && _M_out_desc != __err) | |
122 | iconv_close(_M_out_desc); | |
33590f13 BK |
123 | } |
124 | ||
33590f13 BK |
125 | void |
126 | _M_init() | |
127 | { | |
6f48900c BK |
128 | const __desc_type __err = reinterpret_cast<iconv_t>(-1); |
129 | if (!_M_in_desc) | |
130 | { | |
131 | _M_in_desc = iconv_open(_M_int_enc, _M_ext_enc); | |
132 | if (_M_in_desc == __err) | |
133 | __throw_runtime_error("creating iconv input descriptor failed."); | |
134 | } | |
135 | if (!_M_out_desc) | |
136 | { | |
137 | _M_out_desc = iconv_open(_M_ext_enc, _M_int_enc); | |
138 | if (_M_out_desc == __err) | |
139 | __throw_runtime_error("creating iconv output descriptor failed."); | |
140 | } | |
33590f13 BK |
141 | } |
142 | ||
143 | bool | |
144 | _M_good() | |
145 | { | |
6f48900c | 146 | const __desc_type __err = reinterpret_cast<iconv_t>(-1); |
1a808622 BK |
147 | bool __test = _M_in_desc && _M_in_desc != __err; |
148 | __test &= _M_out_desc && _M_out_desc != __err; | |
149 | return __test; | |
33590f13 BK |
150 | } |
151 | ||
152 | const __desc_type* | |
153 | _M_get_in_descriptor() | |
154 | { return &_M_in_desc; } | |
155 | ||
156 | const __desc_type* | |
157 | _M_get_out_descriptor() | |
158 | { return &_M_out_desc; } | |
159 | ||
33590f13 BK |
160 | int |
161 | _M_get_external_bom() | |
162 | { return _M_ext_bom; } | |
163 | ||
164 | int | |
165 | _M_get_internal_bom() | |
166 | { return _M_int_bom; } | |
6f48900c BK |
167 | |
168 | const char* | |
169 | _M_get_internal_enc() | |
170 | { return _M_int_enc; } | |
171 | ||
172 | const char* | |
173 | _M_get_external_enc() | |
174 | { return _M_ext_enc; } | |
33590f13 BK |
175 | }; |
176 | ||
177 | // Partial specialization | |
178 | // This specialization takes advantage of iconv to provide code | |
179 | // conversions between a large number of character encodings. | |
180 | template<typename _InternT, typename _ExternT> | |
181 | class codecvt<_InternT, _ExternT, __enc_traits> | |
182 | : public __codecvt_abstract_base<_InternT, _ExternT, __enc_traits> | |
183 | { | |
184 | public: | |
185 | // Types: | |
186 | typedef codecvt_base::result result; | |
187 | typedef _InternT intern_type; | |
188 | typedef _ExternT extern_type; | |
189 | typedef __enc_traits state_type; | |
190 | typedef __enc_traits::__desc_type __desc_type; | |
191 | typedef __enc_traits __enc_type; | |
192 | ||
193 | // Data Members: | |
194 | static locale::id id; | |
195 | ||
196 | explicit | |
197 | codecvt(size_t __refs = 0) | |
198 | : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) | |
199 | { } | |
200 | ||
201 | explicit | |
202 | codecvt(__enc_type* __enc, size_t __refs = 0) | |
203 | : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) | |
204 | { } | |
205 | ||
206 | protected: | |
207 | virtual | |
208 | ~codecvt() { } | |
209 | ||
210 | virtual result | |
211 | do_out(state_type& __state, const intern_type* __from, | |
212 | const intern_type* __from_end, const intern_type*& __from_next, | |
213 | extern_type* __to, extern_type* __to_end, | |
214 | extern_type*& __to_next) const; | |
215 | ||
216 | virtual result | |
217 | do_unshift(state_type& __state, extern_type* __to, | |
218 | extern_type* __to_end, extern_type*& __to_next) const; | |
219 | ||
220 | virtual result | |
221 | do_in(state_type& __state, const extern_type* __from, | |
222 | const extern_type* __from_end, const extern_type*& __from_next, | |
223 | intern_type* __to, intern_type* __to_end, | |
224 | intern_type*& __to_next) const; | |
225 | ||
226 | virtual int | |
227 | do_encoding() const throw(); | |
228 | ||
229 | virtual bool | |
230 | do_always_noconv() const throw(); | |
231 | ||
232 | virtual int | |
233 | do_length(const state_type&, const extern_type* __from, | |
234 | const extern_type* __end, size_t __max) const; | |
235 | ||
236 | virtual int | |
237 | do_max_length() const throw(); | |
238 | }; | |
239 | ||
240 | template<typename _InternT, typename _ExternT> | |
241 | locale::id | |
242 | codecvt<_InternT, _ExternT, __enc_traits>::id; | |
243 | ||
244 | // This adaptor works around the signature problems of the second | |
245 | // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 | |
07814743 BK |
246 | // uses 'char**', which matches the POSIX 1003.1-2001 standard. |
247 | // Using this adaptor, g++ will do the work for us. | |
33590f13 BK |
248 | template<typename _T> |
249 | inline size_t | |
07814743 BK |
250 | __iconv_adaptor(size_t(*__func)(iconv_t, _T, size_t*, char**, size_t*), |
251 | iconv_t __cd, char** __inbuf, size_t* __inbytes, | |
252 | char** __outbuf, size_t* __outbytes) | |
253 | { return __func(__cd, (_T)__inbuf, __inbytes, __outbuf, __outbytes); } | |
33590f13 BK |
254 | |
255 | template<typename _InternT, typename _ExternT> | |
256 | codecvt_base::result | |
257 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
258 | do_out(state_type& __state, const intern_type* __from, | |
259 | const intern_type* __from_end, const intern_type*& __from_next, | |
260 | extern_type* __to, extern_type* __to_end, | |
261 | extern_type*& __to_next) const | |
262 | { | |
263 | result __ret = error; | |
264 | if (__state._M_good()) | |
265 | { | |
266 | typedef state_type::__desc_type __desc_type; | |
267 | const __desc_type* __desc = __state._M_get_out_descriptor(); | |
99b629fa | 268 | const size_t __fmultiple = sizeof(intern_type); |
07814743 | 269 | size_t __fbytes = __fmultiple * (__from_end - __from); |
99b629fa | 270 | const size_t __tmultiple = sizeof(extern_type); |
07814743 | 271 | size_t __tbytes = __tmultiple * (__to_end - __to); |
33590f13 BK |
272 | |
273 | // Argument list for iconv specifies a byte sequence. Thus, | |
274 | // all to/from arrays must be brutally casted to char*. | |
275 | char* __cto = reinterpret_cast<char*>(__to); | |
276 | char* __cfrom; | |
277 | size_t __conv; | |
278 | ||
279 | // Some encodings need a byte order marker as the first item | |
280 | // in the byte stream, to designate endian-ness. The default | |
281 | // value for the byte order marker is NULL, so if this is | |
282 | // the case, it's not necessary and we can just go on our | |
283 | // merry way. | |
284 | int __int_bom = __state._M_get_internal_bom(); | |
285 | if (__int_bom) | |
286 | { | |
287 | size_t __size = __from_end - __from; | |
288 | intern_type* __cfixed = static_cast<intern_type*>(__builtin_alloca(sizeof(intern_type) * (__size + 1))); | |
289 | __cfixed[0] = static_cast<intern_type>(__int_bom); | |
290 | char_traits<intern_type>::copy(__cfixed + 1, __from, __size); | |
291 | __cfrom = reinterpret_cast<char*>(__cfixed); | |
292 | __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, | |
07814743 | 293 | &__fbytes, &__cto, &__tbytes); |
33590f13 BK |
294 | } |
295 | else | |
296 | { | |
297 | intern_type* __cfixed = const_cast<intern_type*>(__from); | |
298 | __cfrom = reinterpret_cast<char*>(__cfixed); | |
07814743 BK |
299 | __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, &__fbytes, |
300 | &__cto, &__tbytes); | |
33590f13 BK |
301 | } |
302 | ||
303 | if (__conv != size_t(-1)) | |
304 | { | |
305 | __from_next = reinterpret_cast<const intern_type*>(__cfrom); | |
306 | __to_next = reinterpret_cast<extern_type*>(__cto); | |
307 | __ret = ok; | |
308 | } | |
309 | else | |
310 | { | |
07814743 | 311 | if (__fbytes < __fmultiple * (__from_end - __from)) |
33590f13 BK |
312 | { |
313 | __from_next = reinterpret_cast<const intern_type*>(__cfrom); | |
314 | __to_next = reinterpret_cast<extern_type*>(__cto); | |
315 | __ret = partial; | |
316 | } | |
317 | else | |
318 | __ret = error; | |
319 | } | |
320 | } | |
321 | return __ret; | |
322 | } | |
323 | ||
324 | template<typename _InternT, typename _ExternT> | |
325 | codecvt_base::result | |
326 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
327 | do_unshift(state_type& __state, extern_type* __to, | |
328 | extern_type* __to_end, extern_type*& __to_next) const | |
329 | { | |
330 | result __ret = error; | |
331 | if (__state._M_good()) | |
332 | { | |
333 | typedef state_type::__desc_type __desc_type; | |
334 | const __desc_type* __desc = __state._M_get_in_descriptor(); | |
99b629fa | 335 | const size_t __tmultiple = sizeof(intern_type); |
33590f13 BK |
336 | size_t __tlen = __tmultiple * (__to_end - __to); |
337 | ||
338 | // Argument list for iconv specifies a byte sequence. Thus, | |
339 | // all to/from arrays must be brutally casted to char*. | |
340 | char* __cto = reinterpret_cast<char*>(__to); | |
341 | size_t __conv = __iconv_adaptor(iconv,*__desc, NULL, NULL, | |
342 | &__cto, &__tlen); | |
343 | ||
344 | if (__conv != size_t(-1)) | |
345 | { | |
346 | __to_next = reinterpret_cast<extern_type*>(__cto); | |
347 | if (__tlen == __tmultiple * (__to_end - __to)) | |
348 | __ret = noconv; | |
349 | else if (__tlen == 0) | |
350 | __ret = ok; | |
351 | else | |
352 | __ret = partial; | |
353 | } | |
354 | else | |
355 | __ret = error; | |
356 | } | |
357 | return __ret; | |
358 | } | |
359 | ||
360 | template<typename _InternT, typename _ExternT> | |
361 | codecvt_base::result | |
362 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
363 | do_in(state_type& __state, const extern_type* __from, | |
364 | const extern_type* __from_end, const extern_type*& __from_next, | |
365 | intern_type* __to, intern_type* __to_end, | |
366 | intern_type*& __to_next) const | |
367 | { | |
368 | result __ret = error; | |
369 | if (__state._M_good()) | |
370 | { | |
371 | typedef state_type::__desc_type __desc_type; | |
372 | const __desc_type* __desc = __state._M_get_in_descriptor(); | |
99b629fa | 373 | const size_t __fmultiple = sizeof(extern_type); |
33590f13 | 374 | size_t __flen = __fmultiple * (__from_end - __from); |
99b629fa | 375 | const size_t __tmultiple = sizeof(intern_type); |
33590f13 BK |
376 | size_t __tlen = __tmultiple * (__to_end - __to); |
377 | ||
378 | // Argument list for iconv specifies a byte sequence. Thus, | |
379 | // all to/from arrays must be brutally casted to char*. | |
380 | char* __cto = reinterpret_cast<char*>(__to); | |
381 | char* __cfrom; | |
382 | size_t __conv; | |
383 | ||
384 | // Some encodings need a byte order marker as the first item | |
385 | // in the byte stream, to designate endian-ness. The default | |
386 | // value for the byte order marker is NULL, so if this is | |
387 | // the case, it's not necessary and we can just go on our | |
388 | // merry way. | |
389 | int __ext_bom = __state._M_get_external_bom(); | |
390 | if (__ext_bom) | |
391 | { | |
392 | size_t __size = __from_end - __from; | |
393 | extern_type* __cfixed = static_cast<extern_type*>(__builtin_alloca(sizeof(extern_type) * (__size + 1))); | |
394 | __cfixed[0] = static_cast<extern_type>(__ext_bom); | |
395 | char_traits<extern_type>::copy(__cfixed + 1, __from, __size); | |
396 | __cfrom = reinterpret_cast<char*>(__cfixed); | |
397 | __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, | |
398 | &__flen, &__cto, &__tlen); | |
399 | } | |
400 | else | |
401 | { | |
402 | extern_type* __cfixed = const_cast<extern_type*>(__from); | |
403 | __cfrom = reinterpret_cast<char*>(__cfixed); | |
404 | __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, | |
405 | &__flen, &__cto, &__tlen); | |
406 | } | |
407 | ||
408 | ||
409 | if (__conv != size_t(-1)) | |
410 | { | |
411 | __from_next = reinterpret_cast<const extern_type*>(__cfrom); | |
412 | __to_next = reinterpret_cast<intern_type*>(__cto); | |
413 | __ret = ok; | |
414 | } | |
415 | else | |
416 | { | |
417 | if (__flen < static_cast<size_t>(__from_end - __from)) | |
418 | { | |
419 | __from_next = reinterpret_cast<const extern_type*>(__cfrom); | |
420 | __to_next = reinterpret_cast<intern_type*>(__cto); | |
421 | __ret = partial; | |
422 | } | |
423 | else | |
424 | __ret = error; | |
425 | } | |
426 | } | |
427 | return __ret; | |
428 | } | |
429 | ||
430 | template<typename _InternT, typename _ExternT> | |
431 | int | |
432 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
433 | do_encoding() const throw() | |
07814743 BK |
434 | { |
435 | int __ret = 0; | |
436 | if (sizeof(_ExternT) <= sizeof(_InternT)) | |
437 | __ret = sizeof(_InternT)/sizeof(_ExternT); | |
438 | return __ret; | |
439 | } | |
33590f13 BK |
440 | |
441 | template<typename _InternT, typename _ExternT> | |
442 | bool | |
443 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
444 | do_always_noconv() const throw() | |
445 | { return false; } | |
446 | ||
447 | template<typename _InternT, typename _ExternT> | |
448 | int | |
449 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
450 | do_length(const state_type&, const extern_type* __from, | |
451 | const extern_type* __end, size_t __max) const | |
4977bab6 | 452 | { return std::min(__max, static_cast<size_t>(__end - __from)); } |
33590f13 BK |
453 | |
454 | #ifdef _GLIBCPP_RESOLVE_LIB_DEFECTS | |
455 | // 74. Garbled text for codecvt::do_max_length | |
456 | template<typename _InternT, typename _ExternT> | |
457 | int | |
458 | codecvt<_InternT, _ExternT, __enc_traits>:: | |
459 | do_max_length() const throw() | |
460 | { return 1; } | |
461 | #endif |