]> git.ipfire.org Git - thirdparty/postgresql.git/blob
5b1ceb49c20cc3d593bbb92498e9ce19e30e3fe2
[thirdparty/postgresql.git] /
1 /*-------------------------------------------------------------------------
2 *
3 * EUC_JIS_2004, SHIFT_JIS_2004
4 *
5 * Copyright (c) 2007, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.1 2007/03/25 11:56:02 ishii Exp $
9 *
10 *-------------------------------------------------------------------------
11 */
12
13 #include "postgres.h"
14 #include "fmgr.h"
15 #include "mb/pg_wchar.h"
16
17 PG_MODULE_MAGIC;
18
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
21
22 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
23 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
24
25 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
26 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
27
28 /* ----------
29 * conv_proc(
30 * INTEGER, -- source encoding id
31 * INTEGER, -- destination encoding id
32 * CSTRING, -- source string (null terminated C string)
33 * CSTRING, -- destination string (null terminated C string)
34 * INTEGER -- source string length
35 * ) returns VOID;
36 * ----------
37 */
38
39 Datum
40 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
41 {
42 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44 int len = PG_GETARG_INT32(4);
45
46 Assert(PG_GETARG_INT32(0) == PG_EUC_JIS_2004);
47 Assert(PG_GETARG_INT32(1) == PG_SHIFT_JIS_2004);
48 Assert(len >= 0);
49
50 euc_jis_20042shift_jis_2004(src, dest, len);
51
52 PG_RETURN_VOID();
53 }
54
55 Datum
56 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
57 {
58 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
59 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
60 int len = PG_GETARG_INT32(4);
61
62 Assert(PG_GETARG_INT32(0) == PG_SHIFT_JIS_2004);
63 Assert(PG_GETARG_INT32(1) == PG_EUC_JIS_2004);
64 Assert(len >= 0);
65
66 shift_jis_20042euc_jis_2004(src, dest, len);
67
68 PG_RETURN_VOID();
69 }
70
71 /*
72 * EUC_JIS_2004 -> SHIFT_JIS_2004
73 */
74 static void
75 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
76 {
77 int c1,
78 ku,
79 ten;
80 int l;
81
82 while (len > 0)
83 {
84 c1 = *euc;
85 if (!IS_HIGHBIT_SET(c1))
86 {
87 /* ASCII */
88 if (c1 == 0)
89 report_invalid_encoding(PG_EUC_JIS_2004,
90 (const char *) euc, len);
91 *p++ = c1;
92 euc++;
93 len--;
94 continue;
95 }
96
97 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
98
99 if (l < 0)
100 report_invalid_encoding(PG_EUC_JIS_2004,
101 (const char *) euc, len);
102
103 if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
104 {
105 *p++ = euc[1];
106 }
107 else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
108 {
109 ku = euc[1] - 0xa0;
110 ten = euc[2] - 0xa0;
111
112 switch (ku)
113 {
114 case 1:
115 case 3:
116 case 4:
117 case 5:
118 case 8:
119 case 12:
120 case 13:
121 case 14:
122 case 15:
123 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
124 break;
125 default:
126 if (ku >= 78 && ku <= 94)
127 {
128 *p++ = (ku + 0x19b) >> 1;
129 }
130 else
131 report_invalid_encoding(PG_EUC_JIS_2004,
132 (const char *) euc, len);
133 }
134
135 if (ku % 2)
136 {
137 if (ten >= 1 && ten <= 63)
138 *p++ = ten + 0x3f;
139 else if (ten >= 64 && ten <= 94)
140 *p++ = ten + 0x40;
141 else
142 report_invalid_encoding(PG_EUC_JIS_2004,
143 (const char *) euc, len);
144 }
145 else
146 *p++ = ten + 0x9e;
147 }
148
149 else if (l == 2) /* JIS X 0213 plane 1? */
150 {
151 ku = c1 - 0xa0;
152 ten = euc[1] - 0xa0;
153
154 if (ku >= 1 && ku <= 62)
155 *p++ = (ku + 0x101) >> 1;
156 else if (ku >= 63 && ku <= 94)
157 *p++ = (ku + 0x181) >> 1;
158 else
159 report_invalid_encoding(PG_EUC_JIS_2004,
160 (const char *) euc, len);
161
162 if (ku % 2)
163 {
164 if (ten >= 1 && ten <= 63)
165 *p++ = ten + 0x3f;
166 else if (ten >= 64 && ten <= 94)
167 *p++ = ten + 0x40;
168 else
169 report_invalid_encoding(PG_EUC_JIS_2004,
170 (const char *) euc, len);
171 }
172 else
173 *p++ = ten + 0x9e;
174 }
175 else
176 report_invalid_encoding(PG_EUC_JIS_2004,
177 (const char *) euc, len);
178
179 euc += l;
180 len -= l;
181 }
182 *p = '\0';
183 }
184
185 /*
186 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
187 * *ku = 0: "ku" = even
188 * *ku = 1: "ku" = odd
189 */
190 static int get_ten(int b, int *ku)
191 {
192 int ten;
193
194 if (b >= 0x40 && b <= 0x7e)
195 {
196 ten = b - 0x3f;
197 *ku = 1;
198 } else if (b >= 0x80 && b <= 0x9e)
199 {
200 ten = b - 0x40;
201 *ku = 1;
202 } else if (b >= 0x9f && b <= 0xfc)
203 {
204 ten = b - 0x9e;
205 *ku = 0;
206 }
207 else
208 {
209 ten = -1; /* error */
210 }
211 return ten;
212 }
213
214 /*
215 * SHIFT_JIS_2004 ---> EUC_JIS_2004
216 */
217
218 static void
219 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
220 {
221 int c1,
222 c2;
223 int ku, ten, kubun;
224 int plane;
225 int l;
226
227 while (len > 0)
228 {
229 c1 = *sjis;
230 c2 = sjis[1];
231
232 if (!IS_HIGHBIT_SET(c1))
233 {
234 /* ASCII */
235 if (c1 == 0)
236 report_invalid_encoding(PG_SHIFT_JIS_2004,
237 (const char *) sjis, len);
238 *p++ = c1;
239 sjis++;
240 len--;
241 continue;
242 }
243
244 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
245
246 if (l < 0)
247 report_invalid_encoding(PG_SHIFT_JIS_2004,
248 (const char *) sjis, len);
249
250 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
251 {
252 /* JIS X0201 (1 byte kana) */
253 *p++ = SS2;
254 *p++ = c1;
255 }
256 else if (l == 2)
257 {
258 plane = 1;
259 ku = 1;
260 ten = 1;
261
262 /*
263 * JIS X 0213
264 */
265 if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
266 {
267 ku = (c1 << 1) - 0x100;
268 ten = get_ten(c2, &kubun);
269 if (ten < 0)
270 report_invalid_encoding(PG_SHIFT_JIS_2004,
271 (const char *) sjis, len);
272 ku -= kubun;
273 }
274 else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
275 {
276 ku = (c1 << 1) - 0x180;
277 ten = get_ten(c2, &kubun);
278 if (ten < 0)
279 report_invalid_encoding(PG_SHIFT_JIS_2004,
280
281 (const char *) sjis, len);
282 ku -= kubun;
283 }
284 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2 1,3,4,5,8,12,13,14,15 ku */
285 {
286 plane = 2;
287 ten = get_ten(c2, &kubun);
288 if (ten < 0)
289 report_invalid_encoding(PG_SHIFT_JIS_2004,
290 (const char *) sjis, len);
291 switch (c1)
292 {
293 case 0xf0:
294 ku = kubun == 0? 8: 1;
295 break;
296 case 0xf1:
297 ku = kubun == 0? 4: 3;
298 break;
299 case 0xf2:
300 ku = kubun == 0? 12: 5;
301 break;
302 default:
303 ku = kubun == 0? 14: 13;
304 break;
305 }
306 }
307 else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
308 {
309 plane = 2;
310 ten = get_ten(c2, &kubun);
311 if (ten < 0)
312 report_invalid_encoding(PG_SHIFT_JIS_2004,
313 (const char *) sjis, len);
314 if (c1 == 0xf4 && kubun == 1)
315 ku = 15;
316 else
317 ku = (c1 << 1) - 0x19a - kubun;
318 }
319 else
320 report_invalid_encoding(PG_SHIFT_JIS_2004,
321 (const char *) sjis, len);
322
323 if (plane == 2)
324 *p++ = SS3;
325
326 *p++ = ku + 0xa0;
327 *p++ = ten + 0xa0;
328 }
329 sjis += l;
330 len -= l;
331 }
332 *p = '\0';
333 }