]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconvdata/tst-table-from.c
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / iconvdata / tst-table-from.c
CommitLineData
04277e02 1/* Copyright (C) 2000-2019 Free Software Foundation, Inc.
b79f74cd
UD
2 This file is part of the GNU C Library.
3 Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
4
5 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
b79f74cd
UD
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
b79f74cd 14
41bdb6e2 15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
b79f74cd
UD
18
19/* Create a table from CHARSET to Unicode.
20 This is a good test for CHARSET's iconv() module, in particular the
21 FROM_LOOP BODY macro. */
22
23#include <stddef.h>
24#include <stdio.h>
25#include <stdlib.h>
5f7d74ab 26#include <string.h>
b79f74cd
UD
27#include <iconv.h>
28#include <errno.h>
29
601d2942
UD
30/* If nonzero, ignore conversions outside Unicode plane 0. */
31static int bmp_only;
32
b79f74cd
UD
33/* Converts a byte buffer to a hexadecimal string. */
34static const char*
35hexbuf (unsigned char buf[], unsigned int buflen)
36{
37 static char msg[50];
38
39 switch (buflen)
40 {
41 case 1:
42 sprintf (msg, "0x%02X", buf[0]);
43 break;
44 case 2:
45 sprintf (msg, "0x%02X%02X", buf[0], buf[1]);
46 break;
47 case 3:
48 sprintf (msg, "0x%02X%02X%02X", buf[0], buf[1], buf[2]);
49 break;
50 case 4:
51 sprintf (msg, "0x%02X%02X%02X%02X", buf[0], buf[1], buf[2], buf[3]);
52 break;
53 default:
54 abort ();
55 }
56 return msg;
57}
58
fa00744e 59/* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (12 bytes)
b79f74cd
UD
60 using the conversion descriptor CD. Returns the number of written bytes,
61 or 0 if ambiguous, or -1 if invalid. */
62static int
63try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out)
64{
65 const char *inbuf = (const char *) buf;
66 size_t inbytesleft = buflen;
67 char *outbuf = (char *) out;
fa00744e 68 size_t outbytesleft = 12;
2373b30e
UD
69 size_t result;
70
71 iconv (cd, NULL, NULL, NULL, NULL);
72 result = iconv (cd, (char **) &inbuf, &inbytesleft, &outbuf, &outbytesleft);
73 if (result != (size_t)(-1))
74 result = iconv (cd, NULL, NULL, &outbuf, &outbytesleft);
75
b79f74cd
UD
76 if (result == (size_t)(-1))
77 {
78 if (errno == EILSEQ)
79 {
80 return -1;
81 }
82 else if (errno == EINVAL)
83 {
84 return 0;
85 }
86 else
87 {
88 int saved_errno = errno;
89 fprintf (stderr, "%s: iconv error: ", hexbuf (buf, buflen));
90 errno = saved_errno;
91 perror ("");
92 exit (1);
93 }
94 }
95 else
96 {
97 if (inbytesleft != 0)
98 {
99 fprintf (stderr, "%s: inbytes = %ld, outbytes = %ld\n",
100 hexbuf (buf, buflen),
101 (long) (buflen - inbytesleft),
fa00744e 102 (long) (12 - outbytesleft));
b79f74cd
UD
103 exit (1);
104 }
fa00744e 105 return 12 - outbytesleft;
b79f74cd
UD
106 }
107}
108
2373b30e
UD
109/* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
110static const char *
b79f74cd
UD
111utf8_decode (const unsigned char *out, unsigned int outlen)
112{
fa00744e 113 static char hexbuf[84];
2373b30e
UD
114 char *p = hexbuf;
115
116 while (outlen > 0)
117 {
118 if (p > hexbuf)
119 *p++ = ' ';
120
121 if (out[0] < 0x80)
122 {
123 sprintf (p, "0x%04X", out[0]);
124 out += 1; outlen -= 1;
125 }
126 else if (out[0] >= 0xc0 && out[0] < 0xe0 && outlen >= 2)
127 {
128 sprintf (p, "0x%04X", ((out[0] & 0x1f) << 6) + (out[1] & 0x3f));
129 out += 2; outlen -= 2;
130 }
131 else if (out[0] >= 0xe0 && out[0] < 0xf0 && outlen >= 3)
132 {
133 sprintf (p, "0x%04X", ((out[0] & 0x0f) << 12)
134 + ((out[1] & 0x3f) << 6) + (out[2] & 0x3f));
135 out += 3; outlen -= 3;
136 }
137 else if (out[0] >= 0xf0 && out[0] < 0xf8 && outlen >= 4)
138 {
139 sprintf (p, "0x%04X", ((out[0] & 0x07) << 18)
140 + ((out[1] & 0x3f) << 12)
141 + ((out[2] & 0x3f) << 6) + (out[3] & 0x3f));
142 out += 4; outlen -= 4;
143 }
144 else if (out[0] >= 0xf8 && out[0] < 0xfc && outlen >= 5)
145 {
146 sprintf (p, "0x%04X", ((out[0] & 0x03) << 24)
147 + ((out[1] & 0x3f) << 18)
148 + ((out[2] & 0x3f) << 12)
149 + ((out[3] & 0x3f) << 6) + (out[4] & 0x3f));
150 out += 5; outlen -= 5;
151 }
152 else if (out[0] >= 0xfc && out[0] < 0xfe && outlen >= 6)
153 {
154 sprintf (p, "0x%04X", ((out[0] & 0x01) << 30)
155 + ((out[1] & 0x3f) << 24)
156 + ((out[2] & 0x3f) << 18)
157 + ((out[3] & 0x3f) << 12)
158 + ((out[4] & 0x3f) << 6) + (out[5] & 0x3f));
159 out += 6; outlen -= 6;
160 }
161 else
162 {
163 sprintf (p, "0x????");
164 out += 1; outlen -= 1;
165 }
166
601d2942
UD
167 if (bmp_only && strlen (p) > 6)
168 /* Ignore conversions outside Unicode plane 0. */
169 return NULL;
170
2373b30e
UD
171 p += strlen (p);
172 }
173
174 return hexbuf;
b79f74cd
UD
175}
176
177int
178main (int argc, char *argv[])
179{
180 const char *charset;
181 iconv_t cd;
601d2942 182 int search_depth;
b79f74cd
UD
183
184 if (argc != 2)
185 {
e2806aaf 186 fprintf (stderr, "Usage: tst-table-from charset\n");
b79f74cd
UD
187 exit (1);
188 }
189 charset = argv[1];
190
191 cd = iconv_open ("UTF-8", charset);
192 if (cd == (iconv_t)(-1))
193 {
194 perror ("iconv_open");
195 exit (1);
196 }
197
601d2942
UD
198 /* When testing UTF-8 or GB18030, stop at 0x10000, otherwise the output
199 file gets too big. */
200 bmp_only = (strcmp (charset, "UTF-8") == 0
201 || strcmp (charset, "GB18030") == 0);
202 search_depth = (strcmp (charset, "UTF-8") == 0 ? 3 : 4);
203
b79f74cd 204 {
fa00744e 205 unsigned char out[12];
b79f74cd
UD
206 unsigned char buf[4];
207 unsigned int i0, i1, i2, i3;
208 int result;
209
210 for (i0 = 0; i0 < 0x100; i0++)
211 {
212 buf[0] = i0;
213 result = try (cd, buf, 1, out);
214 if (result < 0)
215 {
216 }
217 else if (result > 0)
218 {
601d2942
UD
219 const char *unicode = utf8_decode (out, result);
220 if (unicode != NULL)
221 printf ("0x%02X\t%s\n", i0, unicode);
b79f74cd
UD
222 }
223 else
224 {
225 for (i1 = 0; i1 < 0x100; i1++)
226 {
227 buf[1] = i1;
228 result = try (cd, buf, 2, out);
229 if (result < 0)
230 {
231 }
232 else if (result > 0)
233 {
601d2942
UD
234 const char *unicode = utf8_decode (out, result);
235 if (unicode != NULL)
236 printf ("0x%02X%02X\t%s\n", i0, i1, unicode);
b79f74cd
UD
237 }
238 else
239 {
240 for (i2 = 0; i2 < 0x100; i2++)
241 {
242 buf[2] = i2;
243 result = try (cd, buf, 3, out);
244 if (result < 0)
245 {
246 }
247 else if (result > 0)
248 {
601d2942
UD
249 const char *unicode = utf8_decode (out, result);
250 if (unicode != NULL)
251 printf ("0x%02X%02X%02X\t%s\n",
252 i0, i1, i2, unicode);
b79f74cd 253 }
601d2942 254 else if (search_depth > 3)
b79f74cd
UD
255 {
256 for (i3 = 0; i3 < 0x100; i3++)
257 {
258 buf[3] = i3;
259 result = try (cd, buf, 4, out);
260 if (result < 0)
261 {
262 }
263 else if (result > 0)
264 {
601d2942
UD
265 const char *unicode =
266 utf8_decode (out, result);
267 if (unicode != NULL)
268 printf ("0x%02X%02X%02X%02X\t%s\n",
269 i0, i1, i2, i3, unicode);
b79f74cd
UD
270 }
271 else
272 {
273 fprintf (stderr,
274 "%s: incomplete byte sequence\n",
275 hexbuf (buf, 4));
276 exit (1);
277 }
278 }
279 }
280 }
281 }
282 }
283 }
284 }
285 }
286
287 if (iconv_close (cd) < 0)
288 {
289 perror ("iconv_close");
290 exit (1);
291 }
292
755104ed 293 if (ferror (stdin) || fflush (stdout) || ferror (stdout))
b79f74cd
UD
294 {
295 fprintf (stderr, "I/O error\n");
296 exit (1);
297 }
298
bf4de8f3 299 return 0;
b79f74cd 300}