]> git.ipfire.org Git - thirdparty/gcc.git/blame - libstdc++-v3/include/ext/codecvt_specializations.h
Daily bump.
[thirdparty/gcc.git] / libstdc++-v3 / include / ext / codecvt_specializations.h
CommitLineData
33590f13
BK
1// Locale support (codecvt) -*- C++ -*-
2
6ab63942 3// Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006
825bd0e1 4// Free Software Foundation, Inc.
33590f13
BK
5//
6// This file is part of the GNU ISO C++ Library. This library is free
7// software; you can redistribute it and/or modify it under the
8// terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 2, or (at your option)
10// any later version.
11
12// This library is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15// GNU General Public License for more details.
16
17// You should have received a copy of the GNU General Public License along
18// with this library; see the file COPYING. If not, write to the Free
83f51799 19// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
33590f13
BK
20// USA.
21
22// As a special exception, you may use this file as part of a free software
23// library without restriction. Specifically, if other files instantiate
24// templates or use macros or inline functions from this file, or you compile
25// this file and link it with other files to produce an executable, this
26// file does not by itself cause the resulting executable to be covered by
27// the GNU General Public License. This exception does not however
28// invalidate any other reasons why the executable file might be covered by
29// the GNU General Public License.
30
31//
32// ISO C++ 14882: 22.2.1.5 Template class codecvt
33//
34
825bd0e1 35// Written by Benjamin Kosnik <bkoz@redhat.com>
33590f13 36
6ab63942 37/** @file ext/codecvt_specializations.h
825bd0e1 38 * This file is a GNU extension to the Standard C++ Library.
00aca6e8
BK
39 */
40
6ab63942
PC
41#ifndef _EXT_CODECVT_SPECIALIZATIONS_H
42#define _EXT_CODECVT_SPECIALIZATIONS_H 1
43
44#ifdef _GLIBCXX_USE_ICONV
45
46#include <bits/c++config.h>
47#include <locale>
48#include <iconv.h>
49
6f48900c 50 // XXX
691d0896 51 // Define this here so codecvt.cc can have _S_max_size definition.
825bd0e1 52#define _GLIBCXX_USE_ENCODING_STATE 1
33590f13 53
3cbc7af0
BK
54_GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx)
55
6309eefc
BK
56 /// @brief Extension to use icov for dealing with character encodings.
57 // This includes conversions and comparisons between various character
6f48900c
BK
58 // sets. This object encapsulates data that may need to be shared between
59 // char_traits, codecvt and ctype.
825bd0e1 60 class encoding_state
33590f13
BK
61 {
62 public:
63 // Types:
64 // NB: A conversion descriptor subsumes and enhances the
65 // functionality of a simple state type such as mbstate_t.
825bd0e1 66 typedef iconv_t descriptor_type;
33590f13
BK
67
68 protected:
33590f13 69 // Name of internal character set encoding.
825bd0e1
BK
70 std::string _M_int_enc;
71
33590f13 72 // Name of external character set encoding.
825bd0e1 73 std::string _M_ext_enc;
33590f13
BK
74
75 // Conversion descriptor between external encoding to internal encoding.
825bd0e1
BK
76 descriptor_type _M_in_desc;
77
33590f13 78 // Conversion descriptor between internal encoding to external encoding.
825bd0e1 79 descriptor_type _M_out_desc;
33590f13 80
825bd0e1 81 // The byte-order marker for the external encoding, if necessary.
33590f13
BK
82 int _M_ext_bom;
83
825bd0e1 84 // The byte-order marker for the internal encoding, if necessary.
33590f13
BK
85 int _M_int_bom;
86
825bd0e1
BK
87 // Number of external bytes needed to construct one complete
88 // character in the internal encoding.
89 // NB: -1 indicates variable, or stateful, encodings.
90 int _M_bytes;
6f48900c 91
825bd0e1
BK
92 public:
93 explicit
94 encoding_state()
95 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
96 { }
97
98 explicit
99 encoding_state(const char* __int, const char* __ext,
100 int __ibom = 0, int __ebom = 0, int __bytes = 1)
101 : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
102 _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
103 { init(); }
33590f13
BK
104
105 // 21.1.2 traits typedefs
106 // p4
107 // typedef STATE_T state_type
108 // requires: state_type shall meet the requirements of
109 // CopyConstructible types (20.1.3)
a5f105b5
BK
110 // NB: This does not preseve the actual state of the conversion
111 // descriptor member, but it does duplicate the encoding
112 // information.
825bd0e1
BK
113 encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
114 { construct(__obj); }
33590f13 115
6f48900c 116 // Need assignment operator as well.
825bd0e1
BK
117 encoding_state&
118 operator=(const encoding_state& __obj)
6f48900c 119 {
825bd0e1 120 construct(__obj);
7aacf989 121 return *this;
6f48900c
BK
122 }
123
825bd0e1
BK
124 ~encoding_state()
125 { destroy(); }
33590f13 126
825bd0e1
BK
127 bool
128 good() const throw()
129 {
130 const descriptor_type __err = reinterpret_cast<iconv_t>(-1);
131 bool __test = _M_in_desc && _M_in_desc != __err;
132 __test &= _M_out_desc && _M_out_desc != __err;
133 return __test;
134 }
135
136 int
137 character_ratio() const
138 { return _M_bytes; }
139
140 const std::string
141 internal_encoding() const
142 { return _M_int_enc; }
143
144 int
145 internal_bom() const
146 { return _M_int_bom; }
147
148 const std::string
149 external_encoding() const
150 { return _M_ext_enc; }
151
152 int
153 external_bom() const
154 { return _M_ext_bom; }
155
156 const descriptor_type&
157 in_descriptor() const
158 { return _M_in_desc; }
159
160 const descriptor_type&
161 out_descriptor() const
162 { return _M_out_desc; }
163
164 protected:
33590f13 165 void
825bd0e1 166 init()
33590f13 167 {
825bd0e1
BK
168 const descriptor_type __err = reinterpret_cast<iconv_t>(-1);
169 const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
170 if (!_M_in_desc && __have_encodings)
6f48900c 171 {
825bd0e1 172 _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
6f48900c 173 if (_M_in_desc == __err)
825bd0e1
BK
174 std::__throw_runtime_error(__N("encoding_state::_M_init "
175 "creating iconv input descriptor failed"));
6f48900c 176 }
825bd0e1 177 if (!_M_out_desc && __have_encodings)
6f48900c 178 {
825bd0e1 179 _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
6f48900c 180 if (_M_out_desc == __err)
825bd0e1 181 std::__throw_runtime_error(__N("encoding_state::_M_init "
ba9119ec 182 "creating iconv output descriptor failed"));
6f48900c 183 }
33590f13
BK
184 }
185
a5f105b5 186 void
825bd0e1 187 construct(const encoding_state& __obj)
a5f105b5 188 {
825bd0e1
BK
189 destroy();
190 _M_int_enc = __obj._M_int_enc;
191 _M_ext_enc = __obj._M_ext_enc;
192 _M_ext_bom = __obj._M_ext_bom;
193 _M_int_bom = __obj._M_int_bom;
194 _M_bytes = __obj._M_bytes;
195 init();
196 }
197
198 void
199 destroy() throw()
200 {
201 const descriptor_type __err = reinterpret_cast<iconv_t>(-1);
a5f105b5
BK
202 if (_M_in_desc && _M_in_desc != __err)
203 {
204 iconv_close(_M_in_desc);
205 _M_in_desc = 0;
206 }
207 if (_M_out_desc && _M_out_desc != __err)
208 {
209 iconv_close(_M_out_desc);
210 _M_out_desc = 0;
211 }
212 }
825bd0e1 213 };
a5f105b5 214
825bd0e1
BK
215 /// @brief encoding_char_traits.
216 // Custom traits type with encoding_state for the state type, and the
217 // associated fpos<encoding_state> for the position type, all other
218 // bits equivalent to the required char_traits instantiations.
219 template<typename _CharT>
220 struct encoding_char_traits : public std::char_traits<_CharT>
221 {
222 typedef encoding_state state_type;
223 typedef typename std::fpos<state_type> pos_type;
224 };
6f48900c 225
3cbc7af0
BK
226_GLIBCXX_END_NAMESPACE
227
228
229_GLIBCXX_BEGIN_NAMESPACE(std)
230
825bd0e1 231 using __gnu_cxx::encoding_state;
33590f13 232
825bd0e1 233 /// @brief codecvt<InternT, _ExternT, encoding_state> specialization.
00aca6e8
BK
234 // This partial specialization takes advantage of iconv to provide
235 // code conversions between a large number of character encodings.
33590f13 236 template<typename _InternT, typename _ExternT>
825bd0e1
BK
237 class codecvt<_InternT, _ExternT, encoding_state>
238 : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
33590f13
BK
239 {
240 public:
241 // Types:
242 typedef codecvt_base::result result;
243 typedef _InternT intern_type;
244 typedef _ExternT extern_type;
825bd0e1
BK
245 typedef __gnu_cxx::encoding_state state_type;
246 typedef state_type::descriptor_type descriptor_type;
33590f13
BK
247
248 // Data Members:
249 static locale::id id;
250
251 explicit
252 codecvt(size_t __refs = 0)
253 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
254 { }
255
256 explicit
825bd0e1 257 codecvt(state_type& __enc, size_t __refs = 0)
33590f13
BK
258 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
259 { }
260
825bd0e1 261 protected:
33590f13
BK
262 virtual
263 ~codecvt() { }
264
265 virtual result
266 do_out(state_type& __state, const intern_type* __from,
267 const intern_type* __from_end, const intern_type*& __from_next,
268 extern_type* __to, extern_type* __to_end,
269 extern_type*& __to_next) const;
270
271 virtual result
272 do_unshift(state_type& __state, extern_type* __to,
273 extern_type* __to_end, extern_type*& __to_next) const;
274
275 virtual result
276 do_in(state_type& __state, const extern_type* __from,
277 const extern_type* __from_end, const extern_type*& __from_next,
278 intern_type* __to, intern_type* __to_end,
279 intern_type*& __to_next) const;
280
281 virtual int
282 do_encoding() const throw();
283
284 virtual bool
285 do_always_noconv() const throw();
286
287 virtual int
e61c8e23 288 do_length(state_type&, const extern_type* __from,
33590f13
BK
289 const extern_type* __end, size_t __max) const;
290
291 virtual int
292 do_max_length() const throw();
293 };
294
295 template<typename _InternT, typename _ExternT>
296 locale::id
825bd0e1 297 codecvt<_InternT, _ExternT, encoding_state>::id;
33590f13
BK
298
299 // This adaptor works around the signature problems of the second
300 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2
07814743
BK
301 // uses 'char**', which matches the POSIX 1003.1-2001 standard.
302 // Using this adaptor, g++ will do the work for us.
33590f13
BK
303 template<typename _T>
304 inline size_t
07814743
BK
305 __iconv_adaptor(size_t(*__func)(iconv_t, _T, size_t*, char**, size_t*),
306 iconv_t __cd, char** __inbuf, size_t* __inbytes,
307 char** __outbuf, size_t* __outbytes)
308 { return __func(__cd, (_T)__inbuf, __inbytes, __outbuf, __outbytes); }
33590f13
BK
309
310 template<typename _InternT, typename _ExternT>
311 codecvt_base::result
825bd0e1 312 codecvt<_InternT, _ExternT, encoding_state>::
33590f13
BK
313 do_out(state_type& __state, const intern_type* __from,
314 const intern_type* __from_end, const intern_type*& __from_next,
315 extern_type* __to, extern_type* __to_end,
316 extern_type*& __to_next) const
317 {
8fbc5ae7 318 result __ret = codecvt_base::error;
825bd0e1 319 if (__state.good())
33590f13 320 {
825bd0e1 321 const descriptor_type& __desc = __state.out_descriptor();
99b629fa 322 const size_t __fmultiple = sizeof(intern_type);
07814743 323 size_t __fbytes = __fmultiple * (__from_end - __from);
99b629fa 324 const size_t __tmultiple = sizeof(extern_type);
07814743 325 size_t __tbytes = __tmultiple * (__to_end - __to);
33590f13
BK
326
327 // Argument list for iconv specifies a byte sequence. Thus,
328 // all to/from arrays must be brutally casted to char*.
329 char* __cto = reinterpret_cast<char*>(__to);
330 char* __cfrom;
331 size_t __conv;
332
333 // Some encodings need a byte order marker as the first item
334 // in the byte stream, to designate endian-ness. The default
335 // value for the byte order marker is NULL, so if this is
336 // the case, it's not necessary and we can just go on our
337 // merry way.
825bd0e1 338 int __int_bom = __state.internal_bom();
33590f13
BK
339 if (__int_bom)
340 {
341 size_t __size = __from_end - __from;
e76382b5
VR
342 intern_type* __cfixed = static_cast<intern_type*>
343 (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
33590f13
BK
344 __cfixed[0] = static_cast<intern_type>(__int_bom);
345 char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
346 __cfrom = reinterpret_cast<char*>(__cfixed);
825bd0e1 347 __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
07814743 348 &__fbytes, &__cto, &__tbytes);
33590f13
BK
349 }
350 else
351 {
352 intern_type* __cfixed = const_cast<intern_type*>(__from);
353 __cfrom = reinterpret_cast<char*>(__cfixed);
825bd0e1 354 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
07814743 355 &__cto, &__tbytes);
33590f13
BK
356 }
357
358 if (__conv != size_t(-1))
359 {
360 __from_next = reinterpret_cast<const intern_type*>(__cfrom);
361 __to_next = reinterpret_cast<extern_type*>(__cto);
8fbc5ae7 362 __ret = codecvt_base::ok;
33590f13
BK
363 }
364 else
365 {
07814743 366 if (__fbytes < __fmultiple * (__from_end - __from))
33590f13
BK
367 {
368 __from_next = reinterpret_cast<const intern_type*>(__cfrom);
369 __to_next = reinterpret_cast<extern_type*>(__cto);
8fbc5ae7 370 __ret = codecvt_base::partial;
33590f13
BK
371 }
372 else
8fbc5ae7 373 __ret = codecvt_base::error;
33590f13
BK
374 }
375 }
376 return __ret;
377 }
378
379 template<typename _InternT, typename _ExternT>
380 codecvt_base::result
825bd0e1 381 codecvt<_InternT, _ExternT, encoding_state>::
33590f13
BK
382 do_unshift(state_type& __state, extern_type* __to,
383 extern_type* __to_end, extern_type*& __to_next) const
384 {
8fbc5ae7 385 result __ret = codecvt_base::error;
825bd0e1 386 if (__state.good())
33590f13 387 {
825bd0e1 388 const descriptor_type& __desc = __state.in_descriptor();
99b629fa 389 const size_t __tmultiple = sizeof(intern_type);
33590f13
BK
390 size_t __tlen = __tmultiple * (__to_end - __to);
391
392 // Argument list for iconv specifies a byte sequence. Thus,
393 // all to/from arrays must be brutally casted to char*.
394 char* __cto = reinterpret_cast<char*>(__to);
825bd0e1 395 size_t __conv = __iconv_adaptor(iconv,__desc, NULL, NULL,
33590f13
BK
396 &__cto, &__tlen);
397
398 if (__conv != size_t(-1))
399 {
400 __to_next = reinterpret_cast<extern_type*>(__cto);
401 if (__tlen == __tmultiple * (__to_end - __to))
8fbc5ae7 402 __ret = codecvt_base::noconv;
33590f13 403 else if (__tlen == 0)
8fbc5ae7 404 __ret = codecvt_base::ok;
33590f13 405 else
8fbc5ae7 406 __ret = codecvt_base::partial;
33590f13
BK
407 }
408 else
8fbc5ae7 409 __ret = codecvt_base::error;
33590f13
BK
410 }
411 return __ret;
412 }
413
414 template<typename _InternT, typename _ExternT>
415 codecvt_base::result
825bd0e1 416 codecvt<_InternT, _ExternT, encoding_state>::
33590f13
BK
417 do_in(state_type& __state, const extern_type* __from,
418 const extern_type* __from_end, const extern_type*& __from_next,
419 intern_type* __to, intern_type* __to_end,
420 intern_type*& __to_next) const
421 {
8fbc5ae7 422 result __ret = codecvt_base::error;
825bd0e1 423 if (__state.good())
33590f13 424 {
825bd0e1 425 const descriptor_type& __desc = __state.in_descriptor();
99b629fa 426 const size_t __fmultiple = sizeof(extern_type);
33590f13 427 size_t __flen = __fmultiple * (__from_end - __from);
99b629fa 428 const size_t __tmultiple = sizeof(intern_type);
33590f13
BK
429 size_t __tlen = __tmultiple * (__to_end - __to);
430
431 // Argument list for iconv specifies a byte sequence. Thus,
432 // all to/from arrays must be brutally casted to char*.
433 char* __cto = reinterpret_cast<char*>(__to);
434 char* __cfrom;
435 size_t __conv;
436
437 // Some encodings need a byte order marker as the first item
438 // in the byte stream, to designate endian-ness. The default
439 // value for the byte order marker is NULL, so if this is
440 // the case, it's not necessary and we can just go on our
441 // merry way.
825bd0e1 442 int __ext_bom = __state.external_bom();
33590f13
BK
443 if (__ext_bom)
444 {
445 size_t __size = __from_end - __from;
e76382b5
VR
446 extern_type* __cfixed = static_cast<extern_type*>
447 (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
33590f13
BK
448 __cfixed[0] = static_cast<extern_type>(__ext_bom);
449 char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
450 __cfrom = reinterpret_cast<char*>(__cfixed);
825bd0e1 451 __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
33590f13
BK
452 &__flen, &__cto, &__tlen);
453 }
454 else
455 {
456 extern_type* __cfixed = const_cast<extern_type*>(__from);
457 __cfrom = reinterpret_cast<char*>(__cfixed);
825bd0e1 458 __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
33590f13
BK
459 &__flen, &__cto, &__tlen);
460 }
461
462
463 if (__conv != size_t(-1))
464 {
465 __from_next = reinterpret_cast<const extern_type*>(__cfrom);
466 __to_next = reinterpret_cast<intern_type*>(__cto);
8fbc5ae7 467 __ret = codecvt_base::ok;
33590f13
BK
468 }
469 else
470 {
471 if (__flen < static_cast<size_t>(__from_end - __from))
472 {
473 __from_next = reinterpret_cast<const extern_type*>(__cfrom);
474 __to_next = reinterpret_cast<intern_type*>(__cto);
8fbc5ae7 475 __ret = codecvt_base::partial;
33590f13
BK
476 }
477 else
8fbc5ae7 478 __ret = codecvt_base::error;
33590f13
BK
479 }
480 }
481 return __ret;
482 }
483
484 template<typename _InternT, typename _ExternT>
485 int
825bd0e1 486 codecvt<_InternT, _ExternT, encoding_state>::
33590f13 487 do_encoding() const throw()
07814743
BK
488 {
489 int __ret = 0;
490 if (sizeof(_ExternT) <= sizeof(_InternT))
825bd0e1 491 __ret = sizeof(_InternT) / sizeof(_ExternT);
07814743
BK
492 return __ret;
493 }
33590f13
BK
494
495 template<typename _InternT, typename _ExternT>
496 bool
825bd0e1 497 codecvt<_InternT, _ExternT, encoding_state>::
33590f13
BK
498 do_always_noconv() const throw()
499 { return false; }
500
501 template<typename _InternT, typename _ExternT>
502 int
825bd0e1 503 codecvt<_InternT, _ExternT, encoding_state>::
e61c8e23 504 do_length(state_type&, const extern_type* __from,
33590f13 505 const extern_type* __end, size_t __max) const
4977bab6 506 { return std::min(__max, static_cast<size_t>(__end - __from)); }
33590f13 507
f5677b15
PC
508 // _GLIBCXX_RESOLVE_LIB_DEFECTS
509 // 74. Garbled text for codecvt::do_max_length
33590f13
BK
510 template<typename _InternT, typename _ExternT>
511 int
825bd0e1 512 codecvt<_InternT, _ExternT, encoding_state>::
33590f13
BK
513 do_max_length() const throw()
514 { return 1; }
3cbc7af0
BK
515
516_GLIBCXX_END_NAMESPACE
f5677b15 517
6ab63942
PC
518#endif
519
520#endif