]> git.ipfire.org Git - thirdparty/postgresql.git/blob
e65d755368fc81388af7fd1ac5dd72bd4e435feb
[thirdparty/postgresql.git] /
1 /*-------------------------------------------------------------------------
2 *
3 * EUC_JIS_2004, SHIFT_JIS_2004
4 *
5 * Copyright (c) 2007-2009, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.6 2009/01/29 19:23:39 tgl Exp $
9 *
10 *-------------------------------------------------------------------------
11 */
12
13 #include "postgres.h"
14 #include "fmgr.h"
15 #include "mb/pg_wchar.h"
16
17 PG_MODULE_MAGIC;
18
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
21
22 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
23 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
24
25 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
26 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
27
28 /* ----------
29 * conv_proc(
30 * INTEGER, -- source encoding id
31 * INTEGER, -- destination encoding id
32 * CSTRING, -- source string (null terminated C string)
33 * CSTRING, -- destination string (null terminated C string)
34 * INTEGER -- source string length
35 * ) returns VOID;
36 * ----------
37 */
38
39 Datum
40 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
41 {
42 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44 int len = PG_GETARG_INT32(4);
45
46 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
47
48 euc_jis_20042shift_jis_2004(src, dest, len);
49
50 PG_RETURN_VOID();
51 }
52
53 Datum
54 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
55 {
56 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
57 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
58 int len = PG_GETARG_INT32(4);
59
60 CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
61
62 shift_jis_20042euc_jis_2004(src, dest, len);
63
64 PG_RETURN_VOID();
65 }
66
67 /*
68 * EUC_JIS_2004 -> SHIFT_JIS_2004
69 */
70 static void
71 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
72 {
73 int c1,
74 ku,
75 ten;
76 int l;
77
78 while (len > 0)
79 {
80 c1 = *euc;
81 if (!IS_HIGHBIT_SET(c1))
82 {
83 /* ASCII */
84 if (c1 == 0)
85 report_invalid_encoding(PG_EUC_JIS_2004,
86 (const char *) euc, len);
87 *p++ = c1;
88 euc++;
89 len--;
90 continue;
91 }
92
93 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
94
95 if (l < 0)
96 report_invalid_encoding(PG_EUC_JIS_2004,
97 (const char *) euc, len);
98
99 if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
100 {
101 *p++ = euc[1];
102 }
103 else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
104 {
105 ku = euc[1] - 0xa0;
106 ten = euc[2] - 0xa0;
107
108 switch (ku)
109 {
110 case 1:
111 case 3:
112 case 4:
113 case 5:
114 case 8:
115 case 12:
116 case 13:
117 case 14:
118 case 15:
119 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
120 break;
121 default:
122 if (ku >= 78 && ku <= 94)
123 {
124 *p++ = (ku + 0x19b) >> 1;
125 }
126 else
127 report_invalid_encoding(PG_EUC_JIS_2004,
128 (const char *) euc, len);
129 }
130
131 if (ku % 2)
132 {
133 if (ten >= 1 && ten <= 63)
134 *p++ = ten + 0x3f;
135 else if (ten >= 64 && ten <= 94)
136 *p++ = ten + 0x40;
137 else
138 report_invalid_encoding(PG_EUC_JIS_2004,
139 (const char *) euc, len);
140 }
141 else
142 *p++ = ten + 0x9e;
143 }
144
145 else if (l == 2) /* JIS X 0213 plane 1? */
146 {
147 ku = c1 - 0xa0;
148 ten = euc[1] - 0xa0;
149
150 if (ku >= 1 && ku <= 62)
151 *p++ = (ku + 0x101) >> 1;
152 else if (ku >= 63 && ku <= 94)
153 *p++ = (ku + 0x181) >> 1;
154 else
155 report_invalid_encoding(PG_EUC_JIS_2004,
156 (const char *) euc, len);
157
158 if (ku % 2)
159 {
160 if (ten >= 1 && ten <= 63)
161 *p++ = ten + 0x3f;
162 else if (ten >= 64 && ten <= 94)
163 *p++ = ten + 0x40;
164 else
165 report_invalid_encoding(PG_EUC_JIS_2004,
166 (const char *) euc, len);
167 }
168 else
169 *p++ = ten + 0x9e;
170 }
171 else
172 report_invalid_encoding(PG_EUC_JIS_2004,
173 (const char *) euc, len);
174
175 euc += l;
176 len -= l;
177 }
178 *p = '\0';
179 }
180
181 /*
182 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
183 * *ku = 0: "ku" = even
184 * *ku = 1: "ku" = odd
185 */
186 static int
187 get_ten(int b, int *ku)
188 {
189 int ten;
190
191 if (b >= 0x40 && b <= 0x7e)
192 {
193 ten = b - 0x3f;
194 *ku = 1;
195 }
196 else if (b >= 0x80 && b <= 0x9e)
197 {
198 ten = b - 0x40;
199 *ku = 1;
200 }
201 else if (b >= 0x9f && b <= 0xfc)
202 {
203 ten = b - 0x9e;
204 *ku = 0;
205 }
206 else
207 {
208 ten = -1; /* error */
209 *ku = 0; /* keep compiler quiet */
210 }
211 return ten;
212 }
213
214 /*
215 * SHIFT_JIS_2004 ---> EUC_JIS_2004
216 */
217
218 static void
219 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
220 {
221 int c1,
222 c2;
223 int ku,
224 ten,
225 kubun;
226 int plane;
227 int l;
228
229 while (len > 0)
230 {
231 c1 = *sjis;
232 c2 = sjis[1];
233
234 if (!IS_HIGHBIT_SET(c1))
235 {
236 /* ASCII */
237 if (c1 == 0)
238 report_invalid_encoding(PG_SHIFT_JIS_2004,
239 (const char *) sjis, len);
240 *p++ = c1;
241 sjis++;
242 len--;
243 continue;
244 }
245
246 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
247
248 if (l < 0)
249 report_invalid_encoding(PG_SHIFT_JIS_2004,
250 (const char *) sjis, len);
251
252 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
253 {
254 /* JIS X0201 (1 byte kana) */
255 *p++ = SS2;
256 *p++ = c1;
257 }
258 else if (l == 2)
259 {
260 plane = 1;
261 ku = 1;
262 ten = 1;
263
264 /*
265 * JIS X 0213
266 */
267 if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
268 {
269 ku = (c1 << 1) - 0x100;
270 ten = get_ten(c2, &kubun);
271 if (ten < 0)
272 report_invalid_encoding(PG_SHIFT_JIS_2004,
273 (const char *) sjis, len);
274 ku -= kubun;
275 }
276 else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
277 {
278 ku = (c1 << 1) - 0x180;
279 ten = get_ten(c2, &kubun);
280 if (ten < 0)
281 report_invalid_encoding(PG_SHIFT_JIS_2004,
282
283 (const char *) sjis, len);
284 ku -= kubun;
285 }
286 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
287 * 1,3,4,5,8,12,13,14,15 ku */
288 {
289 plane = 2;
290 ten = get_ten(c2, &kubun);
291 if (ten < 0)
292 report_invalid_encoding(PG_SHIFT_JIS_2004,
293 (const char *) sjis, len);
294 switch (c1)
295 {
296 case 0xf0:
297 ku = kubun == 0 ? 8 : 1;
298 break;
299 case 0xf1:
300 ku = kubun == 0 ? 4 : 3;
301 break;
302 case 0xf2:
303 ku = kubun == 0 ? 12 : 5;
304 break;
305 default:
306 ku = kubun == 0 ? 14 : 13;
307 break;
308 }
309 }
310 else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
311 {
312 plane = 2;
313 ten = get_ten(c2, &kubun);
314 if (ten < 0)
315 report_invalid_encoding(PG_SHIFT_JIS_2004,
316 (const char *) sjis, len);
317 if (c1 == 0xf4 && kubun == 1)
318 ku = 15;
319 else
320 ku = (c1 << 1) - 0x19a - kubun;
321 }
322 else
323 report_invalid_encoding(PG_SHIFT_JIS_2004,
324 (const char *) sjis, len);
325
326 if (plane == 2)
327 *p++ = SS3;
328
329 *p++ = ku + 0xa0;
330 *p++ = ten + 0xa0;
331 }
332 sjis += l;
333 len -= l;
334 }
335 *p = '\0';
336 }