]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconvdata/johab.c
CVE-2014-6040: Crashes on invalid input in IBM gconv modules [BZ #17325]
[thirdparty/glibc.git] / iconvdata / johab.c
CommitLineData
a44d2393 1/* Mapping tables for JOHAB handling.
d4697bc9 2 Copyright (C) 1998-2014 Free Software Foundation, Inc.
a44d2393 3 This file is part of the GNU C Library.
8619129f
UD
4 Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5 and Ulrich Drepper <drepper@cygnus.com>, 1998.
a44d2393
UD
6
7 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
a44d2393
UD
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 15 Lesser General Public License for more details.
a44d2393 16
41bdb6e2 17 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
a44d2393 20
55985355 21#include <dlfcn.h>
2aea1d79 22#include <stdint.h>
a44d2393
UD
23#include <ksc5601.h>
24
a44d2393
UD
25/* The table for Bit pattern to Hangul Jamo
26 5 bits each are used to encode
27 leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28 and trailing consonants(27 + 1 filler).
29
30 KS C 5601-1992 Annex 3 Table 2
31 0 : Filler, -1: invalid, >= 1 : valid
32
33 */
8619129f 34static const int init[32] =
a44d2393
UD
35{
36 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
38};
8619129f 39static const int mid[32] =
a44d2393
UD
40{
41 -1, -1, 0, 1, 2, 3, 4, 5,
42 -1, -1, 6, 7, 8, 9, 10, 11,
43 -1, -1, 12, 13, 14, 15, 16, 17,
44 -1, -1, 18, 19, 20, 21, -1, -1
45};
8619129f 46static const int final[32] =
a44d2393
UD
47{
48 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49 -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
50};
51
52/*
53 Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54 defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
55
56 It's to be considered later which Jamo block to use, Compatibility
57 block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58
59 */
8619129f 60static const uint32_t init_to_ucs[19] =
a44d2393
UD
61{
62 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64 0x314c, 0x314d, 0x314e
65};
66
c63598bf 67static const uint32_t final_to_ucs[31] =
a44d2393
UD
68{
69 L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
079e46f0
UD
70 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f,
71 0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0', L'\0',
c63598bf 72 L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
a44d2393
UD
73};
74
75/* The following three arrays are used to convert
76 precomposed Hangul syllables in [0xac00,0xd???]
77 to Jamo bit patterns for Johab encoding
78
79 cf. : KS C 5601-1992, Annex3 Table 2
80
81 Arrays are used to speed up things although it's possible
82 to get the same result arithmetically.
83
84 */
8619129f 85static const int init_to_bit[19] =
a44d2393
UD
86{
87 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90 0xd000
91};
92
8619129f 93static const int mid_to_bit[21] =
a44d2393 94{
aae30307 95 0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
a44d2393
UD
96 0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97 0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98 0x0340, 0x0360, 0x0380, 0x03a0
99};
100
8619129f 101static const int final_to_bit[28] =
a44d2393
UD
102{
103 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105};
106
107/* The conversion table from
108 UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109 to Johab
110
111 cf. 1. KS C 5601-1992 Annex 3 Table 2
112 2. Unicode 2.0 manual
113
114 */
8619129f 115static const uint16_t jamo_from_ucs_table[51] =
a44d2393
UD
116{
117 0x8841, 0x8c41,
118 0x8444,
119 0x9041,
120 0x8446, 0x8447,
121 0x9441, 0x9841, 0x9c41,
079e46f0 122 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 0x8450,
a44d2393
UD
123 0xa041, 0xa441, 0xa841,
124 0x8454,
125 0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
079e46f0 126 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041,
a44d2393
UD
127 0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128 0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130 0x8741, 0x8761, 0x8781, 0x87a1
131};
132
133
aae30307 134static uint32_t
8619129f 135johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
a44d2393
UD
136{
137 if (idx <= 0xdefe)
8619129f
UD
138 return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139 - (c2 > 0x90 ? 0x43 : 0x31)];
a44d2393 140 else
8619129f
UD
141 return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142 - (c2 > 0x90 ? 0x43 : 0x31)];
a44d2393 143}
8619129f 144/* Definitions used in the body of the `gconv' function. */
9b26f5c4 145#define CHARSET_NAME "JOHAB//"
8619129f
UD
146#define FROM_LOOP from_johab
147#define TO_LOOP to_johab
148#define DEFINE_INIT 1
149#define DEFINE_FINI 1
150#define MIN_NEEDED_FROM 1
151#define MAX_NEEDED_FROM 2
152#define MIN_NEEDED_TO 4
13e402e7 153#define ONE_DIRECTION 0
8619129f
UD
154
155
156/* First define the conversion function from JOHAB to UCS4. */
157#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
158#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
159#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
160#define LOOPFCT FROM_LOOP
161#define BODY \
162 { \
163 uint32_t ch = *inptr; \
164 \
079e46f0
UD
165 if (ch <= 0x7f) \
166 { \
167 /* Plain ISO646-KR. */ \
168 if (ch == 0x5c) \
169 ch = 0x20a9; /* half-width Korean Currency WON sign */ \
170 ++inptr; \
171 } \
8619129f
UD
172 /* Johab : 1. Hangul \
173 1st byte : 0x84-0xd3 \
174 2nd byte : 0x41-0x7e, 0x81-0xfe \
175 2. Hanja & Symbol : \
176 1st byte : 0xd8-0xde, 0xe0-0xf9 \
177 2nd byte : 0x31-0x7e, 0x91-0xfe \
178 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ \
179 else \
180 { \
db2d05f9
UD
181 if (__builtin_expect (ch > 0xf9, 0) \
182 || __builtin_expect (ch == 0xdf, 0) \
183 || (__builtin_expect (ch > 0x7e, 0) && ch < 0x84) \
184 || (__builtin_expect (ch > 0xd3, 0) && ch < 0xd9)) \
8619129f
UD
185 { \
186 /* These are illegal. */ \
e438a468 187 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8619129f
UD
188 } \
189 else \
190 { \
191 /* Two-byte character. First test whether the next \
192 character is also available. */ \
193 uint32_t ch2; \
194 uint_fast32_t idx; \
195 \
a1ffb40e 196 if (__glibc_unlikely (inptr + 1 >= inend)) \
8619129f
UD
197 { \
198 /* The second character is not available. Store the \
199 intermediate result. */ \
d64b6ad0 200 result = __GCONV_INCOMPLETE_INPUT; \
8619129f
UD
201 break; \
202 } \
203 \
204 ch2 = inptr[1]; \
205 idx = ch * 256 + ch2; \
a1ffb40e 206 if (__glibc_likely (ch <= 0xd3)) \
8619129f
UD
207 { \
208 /* Hangul */ \
0e15c4b6 209 int_fast32_t i, m, f; \
8619129f
UD
210 \
211 i = init[(idx & 0x7c00) >> 10]; \
212 m = mid[(idx & 0x03e0) >> 5]; \
213 f = final[idx & 0x001f]; \
214 \
0e15c4b6
UD
215 if (__builtin_expect (i == -1, 0) \
216 || __builtin_expect (m == -1, 0) \
217 || __builtin_expect (f == -1, 0)) \
8619129f
UD
218 { \
219 /* This is illegal. */ \
e438a468 220 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8619129f
UD
221 } \
222 else if (i > 0 && m > 0) \
223 ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; \
6796bc80 224 else if (i > 0 && m == 0 && f == 0) \
8619129f 225 ch = init_to_ucs[i - 1]; \
6796bc80 226 else if (i == 0 && m > 0 && f == 0) \
8619129f 227 ch = 0x314e + m; /* 0x314f + m - 1 */ \
0e15c4b6 228 else if (__builtin_expect ((i | m) == 0, 1) \
db2d05f9 229 && __builtin_expect (f > 0, 1)) \
8619129f
UD
230 ch = final_to_ucs[f - 1]; /* round trip?? */ \
231 else \
232 { \
233 /* This is illegal. */ \
e438a468 234 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8619129f
UD
235 } \
236 } \
237 else \
238 { \
db2d05f9
UD
239 if (__builtin_expect (ch2 < 0x31, 0) \
240 || (__builtin_expect (ch2 > 0x7e, 0) && ch2 < 0x91) \
019357d2 241 || __builtin_expect (ch2, 0) == 0xff \
06c17c78 242 || (__builtin_expect (ch, 0) == 0xd9 && ch2 > 0xe8) \
019357d2 243 || (__builtin_expect (ch, 0) == 0xda \
079e46f0
UD
244 && ch2 > 0xa0 && ch2 < 0xd4) \
245 || (__builtin_expect (ch, 0) == 0xde && ch2 > 0xf1)) \
8619129f
UD
246 { \
247 /* This is illegal. */ \
e438a468 248 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8619129f 249 } \
8619129f
UD
250 else \
251 { \
252 ch = johab_sym_hanja_to_ucs (idx, ch, ch2); \
253 /* if (idx <= 0xdefe) \
254 ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192 \
255 + ch2 - (ch2 > 0x90 \
256 ? 0x43 : 0x31)]; \
257 else \
aae30307 258 ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192 \
8619129f
UD
259 + ch2 - (ch2 > 0x90 \
260 ?0x43 : 0x31)];\
261 */ \
262 } \
263 } \
264 } \
265 \
a1ffb40e 266 if (__glibc_unlikely (ch == 0)) \
8619129f
UD
267 { \
268 /* This is an illegal character. */ \
e438a468 269 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
8619129f
UD
270 } \
271 \
272 inptr += 2; \
273 } \
274 \
77e1d15a
UD
275 put32 (outptr, ch); \
276 outptr += 4; \
8619129f 277 }
55985355 278#define LOOP_NEED_FLAGS
f9ad060c
UD
279#define ONEBYTE_BODY \
280 { \
281 if (c <= 0x7f) \
282 return (c == 0x5c ? 0x20a9 : c); \
283 else \
284 return WEOF; \
285 }
8619129f
UD
286#include <iconv/loop.c>
287
288
289/* Next, define the other direction. */
290#define MIN_NEEDED_INPUT MIN_NEEDED_TO
291#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
292#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
293#define LOOPFCT TO_LOOP
294#define BODY \
295 { \
77e1d15a 296 uint32_t ch = get32 (inptr); \
8619129f
UD
297 /* \
298 if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
299 { \
300 if (ch >= 0x0391 && ch <= 0x0451) \
301 cp = from_ucs4_greek[ch - 0x391]; \
302 else if (ch >= 0x2010 && ch <= 0x9fa0) \
303 cp = from_ucs4_cjk[ch - 0x02010]; \
304 else \
305 break; \
306 } \
307 else \
308 cp = from_ucs4_lat1[ch]; \
309 */ \
8619129f 310 \
079e46f0 311 if (ch <= 0x7f && ch != 0x5c) \
bc900b11
UD
312 *outptr++ = ch; \
313 else \
8619129f 314 { \
bc900b11
UD
315 if (ch >= 0xac00 && ch <= 0xd7a3) \
316 { \
a1ffb40e 317 if (__glibc_unlikely (outptr + 2 > outend)) \
bc900b11 318 { \
d64b6ad0 319 result = __GCONV_FULL_OUTPUT; \
bc900b11
UD
320 break; \
321 } \
322 \
c63598bf
UD
323 ch -= 0xac00; \
324 \
325 ch = (init_to_bit[ch / 588] /* 21 * 28 = 588 */ \
326 + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */ \
327 + final_to_bit[ch % 28]); /* (ch % (21 * 28)) % 28 */ \
328 \
bc900b11
UD
329 *outptr++ = ch / 256; \
330 *outptr++ = ch % 256; \
331 } \
332 /* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164) \
333 as symbol */ \
334 else if (ch >= 0x3131 && ch <= 0x3163) \
8619129f 335 { \
bc900b11
UD
336 ch = jamo_from_ucs_table[ch - 0x3131]; \
337 \
a1ffb40e 338 if (__glibc_unlikely (outptr + 2 > outend)) \
bc900b11 339 { \
d64b6ad0 340 result = __GCONV_FULL_OUTPUT; \
bc900b11
UD
341 break; \
342 } \
343 \
344 *outptr++ = ch / 256; \
345 *outptr++ = ch % 256; \
346 } \
c63598bf
UD
347 else if ((ch >= 0x4e00 && ch <= 0x9fa5) \
348 || (ch >= 0xf900 && ch <= 0xfa0b)) \
bc900b11
UD
349 { \
350 size_t written; \
c63598bf 351 uint32_t temp; \
bc900b11 352 \
55985355
UD
353 written = ucs4_to_ksc5601_hanja (ch, outptr, outend - outptr); \
354 if (__builtin_expect (written, 1) == 0) \
bc900b11 355 { \
d64b6ad0 356 result = __GCONV_FULL_OUTPUT; \
bc900b11
UD
357 break; \
358 } \
a1ffb40e 359 if (__glibc_unlikely (written == __UNKNOWN_10646_CHAR)) \
bc900b11 360 { \
e438a468 361 STANDARD_TO_LOOP_ERR_HANDLER (4); \
bc900b11
UD
362 } \
363 \
364 outptr[0] -= 0x4a; \
c63598bf
UD
365 outptr[1] -= 0x21; \
366 \
367 temp = outptr[0] * 94 + outptr[1]; \
bc900b11 368 \
c63598bf
UD
369 outptr[0] = 0xe0 + temp / 188; \
370 outptr[1] = temp % 188; \
371 outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31; \
bc900b11
UD
372 \
373 outptr += 2; \
374 } \
079e46f0
UD
375 else if (ch == 0x20a9) \
376 *outptr++ = 0x5c; \
bc900b11
UD
377 else \
378 { \
379 size_t written; \
5134584a 380 uint32_t temp; \
bc900b11 381 \
55985355
UD
382 written = ucs4_to_ksc5601_sym (ch, outptr, outend - outptr); \
383 if (__builtin_expect (written, 1) == 0) \
bc900b11 384 { \
d64b6ad0 385 result = __GCONV_FULL_OUTPUT; \
bc900b11
UD
386 break; \
387 } \
e438a468 388 if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0) \
06c17c78 389 || (outptr[0] == 0x22 && outptr[1] > 0x68)) \
bc900b11 390 { \
601d2942 391 UNICODE_TAG_HANDLER (ch, 4); \
e438a468 392 STANDARD_TO_LOOP_ERR_HANDLER (4); \
bc900b11
UD
393 } \
394 \
5134584a
UD
395 temp = (outptr[0] < 0x4a ? outptr[0] + 0x191 : outptr[0] + 0x176);\
396 outptr[1] += (temp % 2 ? 0x5e : 0); \
397 outptr[1] += (outptr[1] < 0x6f ? 0x10 : 0x22); \
398 outptr[0] = temp / 2; \
bc900b11
UD
399 \
400 outptr += 2; \
8619129f 401 } \
8619129f
UD
402 } \
403 \
404 inptr += 4; \
405 }
55985355 406#define LOOP_NEED_FLAGS
8619129f
UD
407#include <iconv/loop.c>
408
409
410/* Now define the toplevel functions. */
411#include <iconv/skeleton.c>