]>
git.ipfire.org Git - thirdparty/gcc.git/blob - libstdc++-v3/testsuite/22_locale/codecvt/unicode/char.cc
1 // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
3 // Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 2, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License along
17 // with this library; see the file COPYING. If not, write to the Free
18 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
21 // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
23 #include <testsuite_hooks.h>
25 #ifdef _GLIBCXX_USE___ENC_TRAITS
27 // Need some char_traits specializations for this to work.
28 typedef unsigned short unicode_t
;
33 struct char_traits
<unicode_t
>
35 typedef unicode_t char_type
;
36 // Unsigned as wint_t is unsigned.
37 typedef unsigned long int_type
;
38 typedef streampos pos_type
;
39 typedef streamoff off_type
;
40 typedef mbstate_t state_type
;
43 assign(char_type
& __c1
, const char_type
& __c2
);
46 eq(const char_type
& __c1
, const char_type
& __c2
);
49 lt(const char_type
& __c1
, const char_type
& __c2
);
52 compare(const char_type
* __s1
, const char_type
* __s2
, size_t __n
)
53 { return memcmp(__s1
, __s2
, __n
); }
56 length(const char_type
* __s
);
58 static const char_type
*
59 find(const char_type
* __s
, size_t __n
, const char_type
& __a
);
62 move(char_type
* __s1
, const char_type
* __s2
, size_t __n
);
65 copy(char_type
* __s1
, const char_type
* __s2
, size_t __n
)
66 { return static_cast<char_type
*>(memcpy(__s1
, __s2
, __n
)); }
69 assign(char_type
* __s
, size_t __n
, char_type __a
);
72 to_char_type(const int_type
& __c
);
75 to_int_type(const char_type
& __c
);
78 eq_int_type(const int_type
& __c1
, const int_type
& __c2
);
84 not_eof(const int_type
& __c
);
89 > how do I check that these conversions are correct?
90 Very easy. Since all the characters are from ASCII you simply
91 zero-extend the values.
93 drepper$ echo 'black pearl jasmine tea' | od -t x1
94 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
95 0000020 69 6e 65 20 74 65 61 0a
97 So the UCS-2 string is
99 0x0062, 0x006c, 0x0061, ...
101 You get the idea. With iconv() you have to take care of the
102 byte-order, though. UCS-2 can mean little- or big endian. Looking at
107 it shows that the other byte-order is used (25856 == 0x6500).
112 initialize_state(std::__enc_traits
& state
)
115 // Partial specialization using __enc_traits.
116 // codecvt<unicode_t, char, __enc_traits>
117 // UNICODE - UCS2 (big endian)
121 typedef codecvt_base::result result
;
122 typedef unicode_t int_type
;
123 typedef char ext_type
;
124 typedef __enc_traits enc_type
;
125 typedef codecvt
<int_type
, ext_type
, enc_type
> unicode_codecvt
;
126 typedef char_traits
<int_type
> int_traits
;
127 typedef char_traits
<ext_type
> ext_traits
;
129 bool test
__attribute__((unused
)) = true;
130 const ext_type
* e_lit
= "black pearl jasmine tea";
131 int size
= strlen(e_lit
);
133 char i_lit_base
[50] __attribute__((aligned(__alignof__(int_type
)))) =
135 0x00, 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20,
136 0x00, 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20,
137 0x00, 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e,
138 0x00, 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0
140 const int_type
* i_lit
= reinterpret_cast<int_type
*>(i_lit_base
);
142 const ext_type
* efrom_next
;
143 const int_type
* ifrom_next
;
144 ext_type
* e_arr
= new ext_type
[size
+ 1];
146 int_type
* i_arr
= new int_type
[size
+ 1];
149 // construct a locale object with the specialized facet.
150 locale
loc(locale::classic(), new unicode_codecvt
);
151 // sanity check the constructed locale has the specialized facet.
152 VERIFY( has_facet
<unicode_codecvt
>(loc
) );
153 const unicode_codecvt
& cvt
= use_facet
<unicode_codecvt
>(loc
);
156 // unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
157 unicode_codecvt::state_type
state01("UCS-2BE", "ISO-8859-15", 0, 0);
158 initialize_state(state01
);
159 // internal encoding is bigger because of bom
160 result r1
= cvt
.in(state01
, e_lit
, e_lit
+ size
, efrom_next
,
161 i_arr
, i_arr
+ size
+ 1, ito_next
);
162 VERIFY( r1
== codecvt_base::ok
);
163 VERIFY( !int_traits::compare(i_arr
, i_lit
, size
) );
164 VERIFY( efrom_next
== e_lit
+ size
);
165 VERIFY( ito_next
== i_arr
+ size
);
168 unicode_codecvt::state_type
state02("UCS-2BE", "ISO-8859-15", 0, 0);
169 initialize_state(state02
);
170 result r2
= cvt
.out(state02
, i_lit
, i_lit
+ size
, ifrom_next
,
171 e_arr
, e_arr
+ size
, eto_next
);
172 VERIFY( r2
== codecvt_base::ok
);
173 VERIFY( !ext_traits::compare(e_arr
, e_lit
, size
) );
174 VERIFY( ifrom_next
== i_lit
+ size
);
175 VERIFY( eto_next
== e_arr
+ size
);
178 ext_traits::copy(e_arr
, e_lit
, size
);
179 unicode_codecvt::state_type
state03("UCS-2BE", "ISO-8859-15", 0, 0);
180 initialize_state(state03
);
181 result r3
= cvt
.unshift(state03
, e_arr
, e_arr
+ size
, eto_next
);
182 VERIFY( r3
== codecvt_base::noconv
);
183 VERIFY( !ext_traits::compare(e_arr
, e_lit
, size
) );
184 VERIFY( eto_next
== e_arr
);
186 int i
= cvt
.encoding();
187 VERIFY( i
== 2 ); // Target-dependent.
189 VERIFY( !cvt
.always_noconv() );
191 unicode_codecvt::state_type
state04("UCS-2BE", "ISO-8859-15", 0, 0);
192 initialize_state(state04
);
193 int j
= cvt
.length(state03
, e_lit
, e_lit
+ size
, 5);
196 int k
= cvt
.max_length();
203 // Partial specialization using __enc_traits.
204 // codecvt<unicode_t, char, __enc_traits>
205 // UNICODE - UCS2 (little endian)
209 typedef codecvt_base::result result
;
210 typedef unsigned short unicode_t
;
211 typedef unicode_t int_type
;
212 typedef char ext_type
;
213 typedef __enc_traits enc_type
;
214 typedef codecvt
<int_type
, ext_type
, enc_type
> unicode_codecvt
;
215 typedef char_traits
<int_type
> int_traits
;
216 typedef char_traits
<ext_type
> ext_traits
;
218 bool test
__attribute__((unused
)) = true;
219 const ext_type
* e_lit
= "black pearl jasmine tea";
220 int size
= strlen(e_lit
);
222 char i_lit_base
[50] __attribute__((aligned(__alignof__(int_type
)))) =
224 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20, 0x00,
225 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20, 0x00,
226 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e, 0x00,
227 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0, 0x00
229 const int_type
* i_lit
= reinterpret_cast<int_type
*>(i_lit_base
);
231 const ext_type
* efrom_next
;
232 const int_type
* ifrom_next
;
233 ext_type
* e_arr
= new ext_type
[size
+ 1];
235 int_type
* i_arr
= new int_type
[size
+ 1];
238 // construct a locale object with the specialized facet.
239 locale
loc(locale::classic(), new unicode_codecvt
);
240 // sanity check the constructed locale has the specialized facet.
241 VERIFY( has_facet
<unicode_codecvt
>(loc
) );
242 const unicode_codecvt
& cvt
= use_facet
<unicode_codecvt
>(loc
);
245 unicode_codecvt::state_type
state01("UCS-2LE", "ISO-8859-15", 0, 0);
246 initialize_state(state01
);
247 // internal encoding is bigger because of bom
248 result r1
= cvt
.in(state01
, e_lit
, e_lit
+ size
, efrom_next
,
249 i_arr
, i_arr
+ size
+ 1, ito_next
);
250 VERIFY( r1
== codecvt_base::ok
);
251 VERIFY( !int_traits::compare(i_arr
, i_lit
, size
) );
252 VERIFY( efrom_next
== e_lit
+ size
);
253 VERIFY( ito_next
== i_arr
+ size
);
256 unicode_codecvt::state_type
state02("UCS-2LE", "ISO-8859-15", 0, 0);
257 initialize_state(state02
);
258 result r2
= cvt
.out(state02
, i_lit
, i_lit
+ size
, ifrom_next
,
259 e_arr
, e_arr
+ size
, eto_next
);
260 VERIFY( r2
== codecvt_base::ok
);
261 VERIFY( !ext_traits::compare(e_arr
, e_lit
, size
) );
262 VERIFY( ifrom_next
== i_lit
+ size
);
263 VERIFY( eto_next
== e_arr
+ size
);
266 ext_traits::copy(e_arr
, e_lit
, size
);
267 unicode_codecvt::state_type
state03("UCS-2LE", "ISO-8859-15", 0, 0);
268 initialize_state(state03
);
269 result r3
= cvt
.unshift(state03
, e_arr
, e_arr
+ size
, eto_next
);
270 VERIFY( r3
== codecvt_base::noconv
);
271 VERIFY( !ext_traits::compare(e_arr
, e_lit
, size
) );
272 VERIFY( eto_next
== e_arr
);
274 int i
= cvt
.encoding();
275 VERIFY( i
== 2 ); // Target-dependent.
277 VERIFY( !cvt
.always_noconv() );
279 unicode_codecvt::state_type
state04("UCS-2LE", "ISO-8859-15", 0, 0);
280 initialize_state(state04
);
281 int j
= cvt
.length(state03
, e_lit
, e_lit
+ size
, 5);
284 int k
= cvt
.max_length();
291 #endif // _GLIBCXX_USE___ENC_TRAITS
295 #if _GLIBCXX_USE___ENC_TRAITS