]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconvdata/johab.c
Update.
[thirdparty/glibc.git] / iconvdata / johab.c
CommitLineData
a44d2393
UD
1/* Mapping tables for JOHAB handling.
2 Copyright (C) 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
8619129f
UD
4 Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5 and Ulrich Drepper <drepper@cygnus.com>, 1998.
a44d2393
UD
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
16
17 You should have received a copy of the GNU Library General Public
18 License along with the GNU C Library; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
2aea1d79 22#include <stdint.h>
a44d2393
UD
23#include <ksc5601.h>
24
a44d2393
UD
25/* The table for Bit pattern to Hangul Jamo
26 5 bits each are used to encode
27 leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28 and trailing consonants(27 + 1 filler).
29
30 KS C 5601-1992 Annex 3 Table 2
31 0 : Filler, -1: invalid, >= 1 : valid
32
33 */
8619129f 34static const int init[32] =
a44d2393
UD
35{
36 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
38};
8619129f 39static const int mid[32] =
a44d2393
UD
40{
41 -1, -1, 0, 1, 2, 3, 4, 5,
42 -1, -1, 6, 7, 8, 9, 10, 11,
43 -1, -1, 12, 13, 14, 15, 16, 17,
44 -1, -1, 18, 19, 20, 21, -1, -1
45};
8619129f 46static const int final[32] =
a44d2393
UD
47{
48 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49 -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
50};
51
52/*
53 Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54 defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
55
56 It's to be considered later which Jamo block to use, Compatibility
57 block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58
59 */
8619129f 60static const uint32_t init_to_ucs[19] =
a44d2393
UD
61{
62 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64 0x314c, 0x314d, 0x314e
65};
66
8619129f 67static const uint32_t final_to_ucs[27] =
a44d2393
UD
68{
69 L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
70 0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
71 0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0',
72 L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
73};
74
75/* The following three arrays are used to convert
76 precomposed Hangul syllables in [0xac00,0xd???]
77 to Jamo bit patterns for Johab encoding
78
79 cf. : KS C 5601-1992, Annex3 Table 2
80
81 Arrays are used to speed up things although it's possible
82 to get the same result arithmetically.
83
84 */
8619129f 85static const int init_to_bit[19] =
a44d2393
UD
86{
87 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90 0xd000
91};
92
8619129f 93static const int mid_to_bit[21] =
a44d2393
UD
94{
95 0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
96 0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97 0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98 0x0340, 0x0360, 0x0380, 0x03a0
99};
100
8619129f 101static const int final_to_bit[28] =
a44d2393
UD
102{
103 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105};
106
107/* The conversion table from
108 UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109 to Johab
110
111 cf. 1. KS C 5601-1992 Annex 3 Table 2
112 2. Unicode 2.0 manual
113
114 */
8619129f 115static const uint16_t jamo_from_ucs_table[51] =
a44d2393
UD
116{
117 0x8841, 0x8c41,
118 0x8444,
119 0x9041,
120 0x8446, 0x8447,
121 0x9441, 0x9841, 0x9c41,
122 0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
123 0xa041, 0xa441, 0xa841,
124 0x8454,
125 0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
126 0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
127 0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128 0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130 0x8741, 0x8761, 0x8781, 0x87a1
131};
132
133
8619129f
UD
134static inline uint32_t
135johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
a44d2393
UD
136{
137 if (idx <= 0xdefe)
8619129f
UD
138 return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139 - (c2 > 0x90 ? 0x43 : 0x31)];
a44d2393 140 else
8619129f
UD
141 return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142 - (c2 > 0x90 ? 0x43 : 0x31)];
a44d2393
UD
143}
144
145static uint16_t
8619129f 146johab_hanja_from_ucs (uint32_t ch)
a44d2393 147{
a44d2393
UD
148 uint16_t idx;
149 if (ucs4_to_ksc5601_hanja (ch, &idx))
150 {
151 int idx1, idx2;
152 /* Hanja begins at the 42th row. 42=0x2a : 0x2a + 0x20 = 0x4a. */
153 idx1 = idx / 256 - 0x4a;
154 idx2 = idx % 256 + 0x80;
155
156 return ((idx1 / 2) * 256 + 0xe000 + idx2
157 + (idx1 % 2 ? 0 : (idx2 > 0xee ? 0x43 : 0x31) - 0xa1));
158 }
159 else
160 return 0;
161}
162
163static uint16_t
8619129f 164johab_sym_from_ucs (uint32_t ch)
a44d2393
UD
165{
166 uint16_t idx;
167 if (ucs4_to_ksc5601_sym (ch, &idx))
168 {
169 int idx1, idx2;
170
171 idx1 = idx / 256 - 0x21;
172 idx2 = idx % 256 + 0x80;
173
174 return ((idx1 / 2) * 256 + 0xd900 + idx2
175 + (idx1 % 2 ? 0 : (idx2 > 0xee ? 0x43 : 0x31) - 0xa1));
176 }
177 else
178 return 0;
179}
180
181
a44d2393 182static inline void
8619129f 183johab_from_ucs4 (uint32_t ch, unsigned char *cp)
a44d2393
UD
184{
185 if (ch >= 0x7f)
186 {
187 int idx;
188
189 if (ch >= 0xac00 && ch <= 0xd7a3)
190 {
191 ch -= 0xac00;
192 idx = init_to_bit[ch / 588]; /* 21*28 = 588 */
193 idx += mid_to_bit[(ch / 28) % 21]; /* (ch % (21 * 28)) / 28 */
194 idx += final_to_bit[ch % 28]; /* (ch % (21 * 28)) % 28 */
195 }
196 /* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164)
197 as symbol */
198 else if (ch >= 0x3131 && ch <= 0x3163)
199 idx = jamo_from_ucs_table[ch - 0x3131];
200 else if (ch >= 0x4e00 && ch <= 0x9fa5
201 || ch >= 0xf900 && ch <= 0xfa0b)
202 idx = johab_hanja_from_ucs (ch);
203 /* Half-width Korean Currency Won Sign
204 else if ( ch == 0x20a9 )
205 idx = 0x5c00;
206 */
207 else
208 idx = johab_sym_from_ucs (ch);
209
8619129f
UD
210 cp[0] = (unsigned char) (idx / 256);
211 cp[1] = (unsigned char) (idx & 0xff);
a44d2393
UD
212
213 }
214 else
215 {
8619129f
UD
216 cp[0] = (unsigned char) ch;
217 cp[1] = 0;
a44d2393 218 }
a44d2393
UD
219}
220
221
8619129f 222/* Definitions used in the body of the `gconv' function. */
9b26f5c4 223#define CHARSET_NAME "JOHAB//"
8619129f
UD
224#define FROM_LOOP from_johab
225#define TO_LOOP to_johab
226#define DEFINE_INIT 1
227#define DEFINE_FINI 1
228#define MIN_NEEDED_FROM 1
229#define MAX_NEEDED_FROM 2
230#define MIN_NEEDED_TO 4
231
232
233/* First define the conversion function from JOHAB to UCS4. */
234#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
235#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
236#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
237#define LOOPFCT FROM_LOOP
238#define BODY \
239 { \
240 uint32_t ch = *inptr; \
241 \
242 /* half-width Korean Currency WON sign \
243 if (ch == 0x5c) \
244 ch = 0x20a9; \
245 else if (ch < 0x7f) \
9b26f5c4 246 ch = (uint32_t) ch; \
8619129f
UD
247 */ \
248 if (ch < 0x7f) \
249 /* Plain ASCII. */ \
250 ++inptr; \
251 /* Johab : 1. Hangul \
252 1st byte : 0x84-0xd3 \
253 2nd byte : 0x41-0x7e, 0x81-0xfe \
254 2. Hanja & Symbol : \
255 1st byte : 0xd8-0xde, 0xe0-0xf9 \
256 2nd byte : 0x31-0x7e, 0x91-0xfe \
257 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ \
258 else \
259 { \
260 if (ch > 0xf9 || ch == 0xdf || (ch > 0x7e && ch < 0x84) \
261 || (ch > 0xd3 && ch < 0xd9)) \
262 { \
263 /* These are illegal. */ \
264 result = GCONV_ILLEGAL_INPUT; \
265 break; \
266 } \
267 else \
268 { \
269 /* Two-byte character. First test whether the next \
270 character is also available. */ \
271 uint32_t ch2; \
272 uint_fast32_t idx; \
273 \
274 if (NEED_LENGTH_TEST && inptr + 1 >= inend) \
275 { \
276 /* The second character is not available. Store the \
277 intermediate result. */ \
278 result = GCONV_INCOMPLETE_INPUT; \
279 break; \
280 } \
281 \
282 ch2 = inptr[1]; \
283 idx = ch * 256 + ch2; \
284 if (ch <= 0xd3) \
285 { \
286 /* Hangul */ \
287 uint_fast32_t i, m, f; \
288 \
289 i = init[(idx & 0x7c00) >> 10]; \
290 m = mid[(idx & 0x03e0) >> 5]; \
291 f = final[idx & 0x001f]; \
292 \
293 if (i == -1 || m == -1 || f == -1) \
294 { \
295 /* This is illegal. */ \
296 result = GCONV_ILLEGAL_INPUT; \
297 break; \
298 } \
299 else if (i > 0 && m > 0) \
300 ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; \
301 else if (i > 0 && m == 0 & f == 0) \
302 ch = init_to_ucs[i - 1]; \
303 else if (i == 0 && m > 0 & f == 0) \
304 ch = 0x314e + m; /* 0x314f + m - 1 */ \
305 else if (i == 0 && m == 0 & f > 0) \
306 ch = final_to_ucs[f - 1]; /* round trip?? */ \
307 else \
308 { \
309 /* This is illegal. */ \
310 result = GCONV_ILLEGAL_INPUT; \
311 break; \
312 } \
313 } \
314 else \
315 { \
316 if (ch2 < 0x31 || (ch2 > 0x7e && ch2 < 0x91) || ch2 == 0xff) \
317 { \
318 /* This is illegal. */ \
319 result = GCONV_ILLEGAL_INPUT; \
320 break; \
321 } \
322 else if (ch == 0xda && ch2 > 0xa0 && ch2 < 0xd4) \
323 { \
324 /* This is illegal. Modern Hangul Jaso is defined \
325 elsewhere in Johab */ \
326 result = GCONV_ILLEGAL_INPUT; \
327 break; \
328 } \
329 else \
330 { \
331 ch = johab_sym_hanja_to_ucs (idx, ch, ch2); \
332 /* if (idx <= 0xdefe) \
333 ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192 \
334 + ch2 - (ch2 > 0x90 \
335 ? 0x43 : 0x31)]; \
336 else \
337 ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192 \
338 + ch2 - (ch2 > 0x90 \
339 ?0x43 : 0x31)];\
340 */ \
341 } \
342 } \
343 } \
344 \
345 if (ch == 0) \
346 { \
347 /* This is an illegal character. */ \
348 result = GCONV_ILLEGAL_INPUT; \
349 break; \
350 } \
351 \
352 inptr += 2; \
353 } \
354 \
355 *((uint32_t *) outptr)++ = ch; \
356 }
357#include <iconv/loop.c>
358
359
360/* Next, define the other direction. */
361#define MIN_NEEDED_INPUT MIN_NEEDED_TO
362#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
363#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
364#define LOOPFCT TO_LOOP
365#define BODY \
366 { \
367 uint32_t ch = *((uint32_t *) inptr); \
368 unsigned char cp[2]; \
369 /* \
370 if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
371 { \
372 if (ch >= 0x0391 && ch <= 0x0451) \
373 cp = from_ucs4_greek[ch - 0x391]; \
374 else if (ch >= 0x2010 && ch <= 0x9fa0) \
375 cp = from_ucs4_cjk[ch - 0x02010]; \
376 else \
377 break; \
378 } \
379 else \
380 cp = from_ucs4_lat1[ch]; \
381 */ \
382 johab_from_ucs4 (ch, cp); \
383 \
384 if (cp[0] == '\0' && ch != 0) \
385 { \
386 /* Illegal character. */ \
387 result = GCONV_ILLEGAL_INPUT; \
388 break; \
389 } \
390 \
391 *outptr++ = cp[0]; \
392 /* Now test for a possible second byte and write this if possible. */ \
393 if (cp[1] != '\0') \
394 { \
395 if (NEED_LENGTH_TEST && outptr >= outend) \
396 { \
397 /* The result does not fit into the buffer. */ \
398 --outptr; \
399 result = GCONV_FULL_OUTPUT; \
400 break; \
401 } \
402 *outptr++ = cp[1]; \
403 } \
404 \
405 inptr += 4; \
406 }
407#include <iconv/loop.c>
408
409
410/* Now define the toplevel functions. */
411#include <iconv/skeleton.c>