]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconvdata/iso-2022-kr.c
Fix handling of conversion problem in CP932 module
[thirdparty/glibc.git] / iconvdata / iso-2022-kr.c
CommitLineData
8babd571 1/* Conversion module for ISO-2022-KR.
4b1b449d
UD
2 Copyright (C) 1998, 1999, 2000-2002, 2007, 2008
3 Free Software Foundation, Inc.
8babd571
UD
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6
7 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
8babd571
UD
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 15 Lesser General Public License for more details.
8babd571 16
41bdb6e2
AJ
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, write to the Free
19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA. */
8babd571 21
55985355 22#include <dlfcn.h>
8babd571
UD
23#include <gconv.h>
24#include <stdint.h>
25#include <string.h>
26#include "ksc5601.h"
27
2fb2e75a
UD
28#include <assert.h>
29
8babd571 30/* This makes obvious what everybody knows: 0x1b is the Esc character. */
2fb2e75a
UD
31#define ESC 0x1b
32
c9fc0e22 33/* The shift sequences for this charset (it does not use ESC). */
8babd571
UD
34#define SI 0x0f
35#define SO 0x0e
36
37/* Definitions used in the body of the `gconv' function. */
2fb2e75a 38#define CHARSET_NAME "ISO-2022-KR//"
8babd571
UD
39#define DEFINE_INIT 1
40#define DEFINE_FINI 1
41#define FROM_LOOP from_iso2022kr_loop
42#define TO_LOOP to_iso2022kr_loop
43#define MIN_NEEDED_FROM 1
0e15c4b6 44#define MAX_NEEDED_FROM 4
8babd571
UD
45#define MIN_NEEDED_TO 4
46#define MAX_NEEDED_TO 4
47#define PREPARE_LOOP \
2fb2e75a 48 int save_set; \
aa831d6d 49 int *setp = &data->__statep->__count; \
d64b6ad0
UD
50 if (!FROM_DIRECTION && !data->__internal_use \
51 && data->__invocation_counter == 0) \
e3e0a182
UD
52 { \
53 /* Emit the designator sequence. */ \
c9fc0e22 54 if (outbuf + 4 > outend) \
d64b6ad0 55 return __GCONV_FULL_OUTPUT; \
e3e0a182 56 \
c9fc0e22
UD
57 *outbuf++ = ESC; \
58 *outbuf++ = '$'; \
59 *outbuf++ = ')'; \
60 *outbuf++ = 'C'; \
e3e0a182 61 }
66175fa8 62#define EXTRA_LOOP_ARGS , setp
2fb2e75a 63
8babd571
UD
64
65/* The COUNT element of the state keeps track of the currently selected
66 character set. The possible values are: */
67enum
68{
69 ASCII_set = 0,
fd1b5c0f 70 KSC5601_set = 8
8babd571
UD
71};
72
73
74/* Since this is a stateful encoding we have to provide code which resets
75 the output state to the initial state. This has to be done during the
76 flushing. */
77#define EMIT_SHIFT_TO_INIT \
aa831d6d 78 if (data->__statep->__count != ASCII_set) \
8babd571 79 { \
66175fa8 80 if (FROM_DIRECTION) \
fd1b5c0f
UD
81 { \
82 /* It's easy, we don't have to emit anything, we just reset the \
83 state for the input. */ \
84 data->__statep->__count &= 7; \
85 data->__statep->__count |= ASCII_set; \
86 } \
8babd571
UD
87 else \
88 { \
8babd571 89 /* We are not in the initial state. To switch back we have \
66175fa8 90 to emit `SI'. */ \
2373b30e 91 if (__builtin_expect (outbuf == outend, 0)) \
8babd571 92 /* We don't have enough room in the output buffer. */ \
d64b6ad0 93 status = __GCONV_FULL_OUTPUT; \
8babd571
UD
94 else \
95 { \
96 /* Write out the shift sequence. */ \
66175fa8 97 *outbuf++ = SI; \
aa831d6d 98 data->__statep->__count = ASCII_set; \
8babd571
UD
99 } \
100 } \
101 }
102
103
104/* Since we might have to reset input pointer we must be able to save
105 and retore the state. */
106#define SAVE_RESET_STATE(Save) \
107 if (Save) \
66175fa8 108 save_set = *setp; \
8babd571 109 else \
66175fa8 110 *setp = save_set
8babd571
UD
111
112
922903d2 113/* First define the conversion function from ISO-2022-KR to UCS4. */
8babd571
UD
114#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
115#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
116#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
117#define LOOPFCT FROM_LOOP
118#define BODY \
119 { \
120 uint32_t ch = *inptr; \
121 \
122 /* This is a 7bit character set, disallow all 8bit characters. */ \
db2d05f9 123 if (__builtin_expect (ch > 0x7f, 0)) \
e438a468 124 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8babd571
UD
125 \
126 /* Recognize escape sequences. */ \
89301d68 127 if (__builtin_expect (ch, 0) == ESC) \
8babd571
UD
128 { \
129 /* We don't really have to handle escape sequences since all the \
66175fa8 130 switching is done using the SI and SO bytes. But we have to \
8babd571
UD
131 recognize `Esc $ ) C' since this is a kind of flag for this \
132 encoding. We simply ignore it. */ \
0e15c4b6 133 if (__builtin_expect (inptr + 2 > inend, 0) \
8babd571 134 || (inptr[1] == '$' \
0e15c4b6 135 && (__builtin_expect (inptr + 3 > inend, 0) \
89301d68 136 || (inptr[2] == ')' \
0e15c4b6 137 && __builtin_expect (inptr + 4 > inend, 0))))) \
8babd571 138 { \
c7c3b0e9 139 result = __GCONV_INCOMPLETE_INPUT; \
8babd571
UD
140 break; \
141 } \
142 if (inptr[1] == '$' && inptr[2] == ')' && inptr[3] == 'C') \
143 { \
144 /* Yeah, yeah, we know this is ISO 2022-KR. */ \
145 inptr += 4; \
146 continue; \
147 } \
148 } \
89301d68 149 else if (__builtin_expect (ch, 0) == SO) \
8babd571
UD
150 { \
151 /* Switch to use KSC. */ \
152 ++inptr; \
153 set = KSC5601_set; \
154 continue; \
155 } \
89301d68 156 else if (__builtin_expect (ch, 0) == SI) \
8babd571
UD
157 { \
158 /* Switch to use ASCII. */ \
159 ++inptr; \
160 set = ASCII_set; \
161 continue; \
162 } \
163 \
66175fa8
UD
164 if (set == ASCII_set) \
165 { \
66175fa8
UD
166 /* Almost done, just advance the input pointer. */ \
167 ++inptr; \
168 } \
8babd571
UD
169 else \
170 { \
171 assert (set == KSC5601_set); \
172 \
173 /* Use the KSC 5601 table. */ \
55985355 174 ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
8babd571 175 \
e438a468 176 if (__builtin_expect (ch == 0, 0)) \
8babd571 177 { \
c7c3b0e9 178 result = __GCONV_INCOMPLETE_INPUT; \
8babd571
UD
179 break; \
180 } \
e438a468 181 else if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0)) \
8babd571 182 { \
e438a468 183 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
8babd571
UD
184 } \
185 } \
186 \
77e1d15a
UD
187 put32 (outptr, ch); \
188 outptr += 4; \
8babd571 189 }
55985355 190#define LOOP_NEED_FLAGS
66175fa8
UD
191#define EXTRA_LOOP_DECLS , int *setp
192#define INIT_PARAMS int set = *setp
193#define UPDATE_PARAMS *setp = set
8babd571
UD
194#include <iconv/loop.c>
195
196
197/* Next, define the other direction. */
198#define MIN_NEEDED_INPUT MIN_NEEDED_TO
199#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
200#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
201#define LOOPFCT TO_LOOP
202#define BODY \
203 { \
278bfa00 204 uint32_t ch = get32 (inptr); \
8babd571
UD
205 \
206 /* First see whether we can write the character using the currently \
207 selected character set. */ \
66175fa8 208 if (ch < 0x80) \
8babd571 209 { \
66175fa8 210 if (set != ASCII_set) \
8babd571 211 { \
66175fa8
UD
212 *outptr++ = SI; \
213 set = ASCII_set; \
55985355 214 if (__builtin_expect (outptr == outend, 0)) \
66175fa8 215 { \
d64b6ad0 216 result = __GCONV_FULL_OUTPUT; \
66175fa8
UD
217 break; \
218 } \
8babd571 219 } \
66175fa8
UD
220 \
221 *outptr++ = ch; \
8babd571
UD
222 } \
223 else \
224 { \
085a4412 225 unsigned char buf[2]; \
278bfa00
UD
226 /* Fake initialization to keep gcc quiet. */ \
227 asm ("" : "=m" (buf)); \
8babd571 228 \
278bfa00 229 size_t written = ucs4_to_ksc5601 (ch, buf, 2); \
89301d68 230 if (__builtin_expect (written, 0) == __UNKNOWN_10646_CHAR) \
8babd571 231 { \
601d2942
UD
232 UNICODE_TAG_HANDLER (ch, 4); \
233 \
66175fa8 234 /* Illegal character. */ \
e438a468 235 STANDARD_TO_LOOP_ERR_HANDLER (4); \
66175fa8 236 } \
85830c4c 237 else \
66175fa8 238 { \
85830c4c
UD
239 assert (written == 2); \
240 \
241 /* We use KSC 5601. */ \
242 if (set != KSC5601_set) \
243 { \
244 *outptr++ = SO; \
245 set = KSC5601_set; \
246 } \
247 \
55985355 248 if (__builtin_expect (outptr + 2 > outend, 0)) \
85830c4c
UD
249 { \
250 result = __GCONV_FULL_OUTPUT; \
251 break; \
252 } \
66175fa8 253 \
85830c4c
UD
254 *outptr++ = buf[0]; \
255 *outptr++ = buf[1]; \
256 } \
8babd571
UD
257 } \
258 \
259 /* Now that we wrote the output increment the input pointer. */ \
260 inptr += 4; \
261 }
55985355 262#define LOOP_NEED_FLAGS
66175fa8
UD
263#define EXTRA_LOOP_DECLS , int *setp
264#define INIT_PARAMS int set = *setp
4b1b449d 265#define REINIT_PARAMS set = *setp
66175fa8 266#define UPDATE_PARAMS *setp = set
8babd571
UD
267#include <iconv/loop.c>
268
269
270/* Now define the toplevel functions. */
271#include <iconv/skeleton.c>