]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconvdata/johab.c
Link extra-libs consistently with libc and ld.so.
[thirdparty/glibc.git] / iconvdata / johab.c
CommitLineData
a44d2393 1/* Mapping tables for JOHAB handling.
568035b7 2 Copyright (C) 1998-2013 Free Software Foundation, Inc.
a44d2393 3 This file is part of the GNU C Library.
8619129f
UD
4 Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5 and Ulrich Drepper <drepper@cygnus.com>, 1998.
a44d2393
UD
6
7 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
a44d2393
UD
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 15 Lesser General Public License for more details.
a44d2393 16
41bdb6e2 17 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
a44d2393 20
55985355 21#include <dlfcn.h>
2aea1d79 22#include <stdint.h>
a44d2393
UD
23#include <ksc5601.h>
24
a44d2393
UD
25/* The table for Bit pattern to Hangul Jamo
26 5 bits each are used to encode
27 leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28 and trailing consonants(27 + 1 filler).
29
30 KS C 5601-1992 Annex 3 Table 2
31 0 : Filler, -1: invalid, >= 1 : valid
32
33 */
8619129f 34static const int init[32] =
a44d2393
UD
35{
36 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
38};
8619129f 39static const int mid[32] =
a44d2393
UD
40{
41 -1, -1, 0, 1, 2, 3, 4, 5,
42 -1, -1, 6, 7, 8, 9, 10, 11,
43 -1, -1, 12, 13, 14, 15, 16, 17,
44 -1, -1, 18, 19, 20, 21, -1, -1
45};
8619129f 46static const int final[32] =
a44d2393
UD
47{
48 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49 -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
50};
51
52/*
53 Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54 defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
55
56 It's to be considered later which Jamo block to use, Compatibility
57 block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58
59 */
8619129f 60static const uint32_t init_to_ucs[19] =
a44d2393
UD
61{
62 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64 0x314c, 0x314d, 0x314e
65};
66
c63598bf 67static const uint32_t final_to_ucs[31] =
a44d2393
UD
68{
69 L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
079e46f0
UD
70 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f,
71 0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0', L'\0',
c63598bf 72 L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
a44d2393
UD
73};
74
75/* The following three arrays are used to convert
76 precomposed Hangul syllables in [0xac00,0xd???]
77 to Jamo bit patterns for Johab encoding
78
79 cf. : KS C 5601-1992, Annex3 Table 2
80
81 Arrays are used to speed up things although it's possible
82 to get the same result arithmetically.
83
84 */
8619129f 85static const int init_to_bit[19] =
a44d2393
UD
86{
87 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90 0xd000
91};
92
8619129f 93static const int mid_to_bit[21] =
a44d2393 94{
aae30307 95 0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
a44d2393
UD
96 0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97 0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98 0x0340, 0x0360, 0x0380, 0x03a0
99};
100
8619129f 101static const int final_to_bit[28] =
a44d2393
UD
102{
103 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105};
106
107/* The conversion table from
108 UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109 to Johab
110
111 cf. 1. KS C 5601-1992 Annex 3 Table 2
112 2. Unicode 2.0 manual
113
114 */
8619129f 115static const uint16_t jamo_from_ucs_table[51] =
a44d2393
UD
116{
117 0x8841, 0x8c41,
118 0x8444,
119 0x9041,
120 0x8446, 0x8447,
121 0x9441, 0x9841, 0x9c41,
079e46f0 122 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 0x8450,
a44d2393
UD
123 0xa041, 0xa441, 0xa841,
124 0x8454,
125 0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
079e46f0 126 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041,
a44d2393
UD
127 0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128 0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130 0x8741, 0x8761, 0x8781, 0x87a1
131};
132
133
aae30307 134static uint32_t
8619129f 135johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
a44d2393
UD
136{
137 if (idx <= 0xdefe)
8619129f
UD
138 return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139 - (c2 > 0x90 ? 0x43 : 0x31)];
a44d2393 140 else
8619129f
UD
141 return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142 - (c2 > 0x90 ? 0x43 : 0x31)];
a44d2393 143}
8619129f 144/* Definitions used in the body of the `gconv' function. */
9b26f5c4 145#define CHARSET_NAME "JOHAB//"
8619129f
UD
146#define FROM_LOOP from_johab
147#define TO_LOOP to_johab
148#define DEFINE_INIT 1
149#define DEFINE_FINI 1
150#define MIN_NEEDED_FROM 1
151#define MAX_NEEDED_FROM 2
152#define MIN_NEEDED_TO 4
153
154
155/* First define the conversion function from JOHAB to UCS4. */
156#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
157#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
158#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
159#define LOOPFCT FROM_LOOP
160#define BODY \
161 { \
162 uint32_t ch = *inptr; \
163 \
079e46f0
UD
164 if (ch <= 0x7f) \
165 { \
166 /* Plain ISO646-KR. */ \
167 if (ch == 0x5c) \
168 ch = 0x20a9; /* half-width Korean Currency WON sign */ \
169 ++inptr; \
170 } \
8619129f
UD
171 /* Johab : 1. Hangul \
172 1st byte : 0x84-0xd3 \
173 2nd byte : 0x41-0x7e, 0x81-0xfe \
174 2. Hanja & Symbol : \
175 1st byte : 0xd8-0xde, 0xe0-0xf9 \
176 2nd byte : 0x31-0x7e, 0x91-0xfe \
177 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ \
178 else \
179 { \
db2d05f9
UD
180 if (__builtin_expect (ch > 0xf9, 0) \
181 || __builtin_expect (ch == 0xdf, 0) \
182 || (__builtin_expect (ch > 0x7e, 0) && ch < 0x84) \
183 || (__builtin_expect (ch > 0xd3, 0) && ch < 0xd9)) \
8619129f
UD
184 { \
185 /* These are illegal. */ \
e438a468 186 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8619129f
UD
187 } \
188 else \
189 { \
190 /* Two-byte character. First test whether the next \
191 character is also available. */ \
192 uint32_t ch2; \
193 uint_fast32_t idx; \
194 \
55985355 195 if (__builtin_expect (inptr + 1 >= inend, 0)) \
8619129f
UD
196 { \
197 /* The second character is not available. Store the \
198 intermediate result. */ \
d64b6ad0 199 result = __GCONV_INCOMPLETE_INPUT; \
8619129f
UD
200 break; \
201 } \
202 \
203 ch2 = inptr[1]; \
204 idx = ch * 256 + ch2; \
db2d05f9 205 if (__builtin_expect (ch <= 0xd3, 1)) \
8619129f
UD
206 { \
207 /* Hangul */ \
0e15c4b6 208 int_fast32_t i, m, f; \
8619129f
UD
209 \
210 i = init[(idx & 0x7c00) >> 10]; \
211 m = mid[(idx & 0x03e0) >> 5]; \
212 f = final[idx & 0x001f]; \
213 \
0e15c4b6
UD
214 if (__builtin_expect (i == -1, 0) \
215 || __builtin_expect (m == -1, 0) \
216 || __builtin_expect (f == -1, 0)) \
8619129f
UD
217 { \
218 /* This is illegal. */ \
e438a468 219 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8619129f
UD
220 } \
221 else if (i > 0 && m > 0) \
222 ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; \
6796bc80 223 else if (i > 0 && m == 0 && f == 0) \
8619129f 224 ch = init_to_ucs[i - 1]; \
6796bc80 225 else if (i == 0 && m > 0 && f == 0) \
8619129f 226 ch = 0x314e + m; /* 0x314f + m - 1 */ \
0e15c4b6 227 else if (__builtin_expect ((i | m) == 0, 1) \
db2d05f9 228 && __builtin_expect (f > 0, 1)) \
8619129f
UD
229 ch = final_to_ucs[f - 1]; /* round trip?? */ \
230 else \
231 { \
232 /* This is illegal. */ \
e438a468 233 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8619129f
UD
234 } \
235 } \
236 else \
237 { \
db2d05f9
UD
238 if (__builtin_expect (ch2 < 0x31, 0) \
239 || (__builtin_expect (ch2 > 0x7e, 0) && ch2 < 0x91) \
019357d2 240 || __builtin_expect (ch2, 0) == 0xff \
06c17c78 241 || (__builtin_expect (ch, 0) == 0xd9 && ch2 > 0xe8) \
019357d2 242 || (__builtin_expect (ch, 0) == 0xda \
079e46f0
UD
243 && ch2 > 0xa0 && ch2 < 0xd4) \
244 || (__builtin_expect (ch, 0) == 0xde && ch2 > 0xf1)) \
8619129f
UD
245 { \
246 /* This is illegal. */ \
e438a468 247 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8619129f 248 } \
8619129f
UD
249 else \
250 { \
251 ch = johab_sym_hanja_to_ucs (idx, ch, ch2); \
252 /* if (idx <= 0xdefe) \
253 ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192 \
254 + ch2 - (ch2 > 0x90 \
255 ? 0x43 : 0x31)]; \
256 else \
aae30307 257 ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192 \
8619129f
UD
258 + ch2 - (ch2 > 0x90 \
259 ?0x43 : 0x31)];\
260 */ \
261 } \
262 } \
263 } \
264 \
e438a468 265 if (__builtin_expect (ch == 0, 0)) \
8619129f
UD
266 { \
267 /* This is an illegal character. */ \
e438a468 268 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
8619129f
UD
269 } \
270 \
271 inptr += 2; \
272 } \
273 \
77e1d15a
UD
274 put32 (outptr, ch); \
275 outptr += 4; \
8619129f 276 }
55985355 277#define LOOP_NEED_FLAGS
f9ad060c
UD
278#define ONEBYTE_BODY \
279 { \
280 if (c <= 0x7f) \
281 return (c == 0x5c ? 0x20a9 : c); \
282 else \
283 return WEOF; \
284 }
8619129f
UD
285#include <iconv/loop.c>
286
287
288/* Next, define the other direction. */
289#define MIN_NEEDED_INPUT MIN_NEEDED_TO
290#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
291#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
292#define LOOPFCT TO_LOOP
293#define BODY \
294 { \
77e1d15a 295 uint32_t ch = get32 (inptr); \
8619129f
UD
296 /* \
297 if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
298 { \
299 if (ch >= 0x0391 && ch <= 0x0451) \
300 cp = from_ucs4_greek[ch - 0x391]; \
301 else if (ch >= 0x2010 && ch <= 0x9fa0) \
302 cp = from_ucs4_cjk[ch - 0x02010]; \
303 else \
304 break; \
305 } \
306 else \
307 cp = from_ucs4_lat1[ch]; \
308 */ \
8619129f 309 \
079e46f0 310 if (ch <= 0x7f && ch != 0x5c) \
bc900b11
UD
311 *outptr++ = ch; \
312 else \
8619129f 313 { \
bc900b11
UD
314 if (ch >= 0xac00 && ch <= 0xd7a3) \
315 { \
55985355 316 if (__builtin_expect (outptr + 2 > outend, 0)) \
bc900b11 317 { \
d64b6ad0 318 result = __GCONV_FULL_OUTPUT; \
bc900b11
UD
319 break; \
320 } \
321 \
c63598bf
UD
322 ch -= 0xac00; \
323 \
324 ch = (init_to_bit[ch / 588] /* 21 * 28 = 588 */ \
325 + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */ \
326 + final_to_bit[ch % 28]); /* (ch % (21 * 28)) % 28 */ \
327 \
bc900b11
UD
328 *outptr++ = ch / 256; \
329 *outptr++ = ch % 256; \
330 } \
331 /* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164) \
332 as symbol */ \
333 else if (ch >= 0x3131 && ch <= 0x3163) \
8619129f 334 { \
bc900b11
UD
335 ch = jamo_from_ucs_table[ch - 0x3131]; \
336 \
55985355 337 if (__builtin_expect (outptr + 2 > outend, 0)) \
bc900b11 338 { \
d64b6ad0 339 result = __GCONV_FULL_OUTPUT; \
bc900b11
UD
340 break; \
341 } \
342 \
343 *outptr++ = ch / 256; \
344 *outptr++ = ch % 256; \
345 } \
c63598bf
UD
346 else if ((ch >= 0x4e00 && ch <= 0x9fa5) \
347 || (ch >= 0xf900 && ch <= 0xfa0b)) \
bc900b11
UD
348 { \
349 size_t written; \
c63598bf 350 uint32_t temp; \
bc900b11 351 \
55985355
UD
352 written = ucs4_to_ksc5601_hanja (ch, outptr, outend - outptr); \
353 if (__builtin_expect (written, 1) == 0) \
bc900b11 354 { \
d64b6ad0 355 result = __GCONV_FULL_OUTPUT; \
bc900b11
UD
356 break; \
357 } \
e438a468 358 if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0)) \
bc900b11 359 { \
e438a468 360 STANDARD_TO_LOOP_ERR_HANDLER (4); \
bc900b11
UD
361 } \
362 \
363 outptr[0] -= 0x4a; \
c63598bf
UD
364 outptr[1] -= 0x21; \
365 \
366 temp = outptr[0] * 94 + outptr[1]; \
bc900b11 367 \
c63598bf
UD
368 outptr[0] = 0xe0 + temp / 188; \
369 outptr[1] = temp % 188; \
370 outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31; \
bc900b11
UD
371 \
372 outptr += 2; \
373 } \
079e46f0
UD
374 else if (ch == 0x20a9) \
375 *outptr++ = 0x5c; \
bc900b11
UD
376 else \
377 { \
378 size_t written; \
5134584a 379 uint32_t temp; \
bc900b11 380 \
55985355
UD
381 written = ucs4_to_ksc5601_sym (ch, outptr, outend - outptr); \
382 if (__builtin_expect (written, 1) == 0) \
bc900b11 383 { \
d64b6ad0 384 result = __GCONV_FULL_OUTPUT; \
bc900b11
UD
385 break; \
386 } \
e438a468 387 if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0) \
06c17c78 388 || (outptr[0] == 0x22 && outptr[1] > 0x68)) \
bc900b11 389 { \
601d2942 390 UNICODE_TAG_HANDLER (ch, 4); \
e438a468 391 STANDARD_TO_LOOP_ERR_HANDLER (4); \
bc900b11
UD
392 } \
393 \
5134584a
UD
394 temp = (outptr[0] < 0x4a ? outptr[0] + 0x191 : outptr[0] + 0x176);\
395 outptr[1] += (temp % 2 ? 0x5e : 0); \
396 outptr[1] += (outptr[1] < 0x6f ? 0x10 : 0x22); \
397 outptr[0] = temp / 2; \
bc900b11
UD
398 \
399 outptr += 2; \
8619129f 400 } \
8619129f
UD
401 } \
402 \
403 inptr += 4; \
404 }
55985355 405#define LOOP_NEED_FLAGS
8619129f
UD
406#include <iconv/loop.c>
407
408
409/* Now define the toplevel functions. */
410#include <iconv/skeleton.c>