]> git.ipfire.org Git - thirdparty/postgresql.git/blob
ba10a56cdb85bd9a08fc58bc2b29deca7453850f
[thirdparty/postgresql.git] /
1 /*-------------------------------------------------------------------------
2 *
3 * EUC_JIS_2004, SHIFT_JIS_2004
4 *
5 * Copyright (c) 2007-2009, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.5 2009/01/01 17:23:51 momjian Exp $
9 *
10 *-------------------------------------------------------------------------
11 */
12
13 #include "postgres.h"
14 #include "fmgr.h"
15 #include "mb/pg_wchar.h"
16
17 PG_MODULE_MAGIC;
18
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
21
22 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
23 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
24
25 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
26 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
27
28 /* ----------
29 * conv_proc(
30 * INTEGER, -- source encoding id
31 * INTEGER, -- destination encoding id
32 * CSTRING, -- source string (null terminated C string)
33 * CSTRING, -- destination string (null terminated C string)
34 * INTEGER -- source string length
35 * ) returns VOID;
36 * ----------
37 */
38
39 Datum
40 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
41 {
42 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44 int len = PG_GETARG_INT32(4);
45
46 Assert(PG_GETARG_INT32(0) == PG_EUC_JIS_2004);
47 Assert(PG_GETARG_INT32(1) == PG_SHIFT_JIS_2004);
48 Assert(len >= 0);
49
50 euc_jis_20042shift_jis_2004(src, dest, len);
51
52 PG_RETURN_VOID();
53 }
54
55 Datum
56 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
57 {
58 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
59 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
60 int len = PG_GETARG_INT32(4);
61
62 Assert(PG_GETARG_INT32(0) == PG_SHIFT_JIS_2004);
63 Assert(PG_GETARG_INT32(1) == PG_EUC_JIS_2004);
64 Assert(len >= 0);
65
66 shift_jis_20042euc_jis_2004(src, dest, len);
67
68 PG_RETURN_VOID();
69 }
70
71 /*
72 * EUC_JIS_2004 -> SHIFT_JIS_2004
73 */
74 static void
75 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
76 {
77 int c1,
78 ku,
79 ten;
80 int l;
81
82 while (len > 0)
83 {
84 c1 = *euc;
85 if (!IS_HIGHBIT_SET(c1))
86 {
87 /* ASCII */
88 if (c1 == 0)
89 report_invalid_encoding(PG_EUC_JIS_2004,
90 (const char *) euc, len);
91 *p++ = c1;
92 euc++;
93 len--;
94 continue;
95 }
96
97 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
98
99 if (l < 0)
100 report_invalid_encoding(PG_EUC_JIS_2004,
101 (const char *) euc, len);
102
103 if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
104 {
105 *p++ = euc[1];
106 }
107 else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
108 {
109 ku = euc[1] - 0xa0;
110 ten = euc[2] - 0xa0;
111
112 switch (ku)
113 {
114 case 1:
115 case 3:
116 case 4:
117 case 5:
118 case 8:
119 case 12:
120 case 13:
121 case 14:
122 case 15:
123 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
124 break;
125 default:
126 if (ku >= 78 && ku <= 94)
127 {
128 *p++ = (ku + 0x19b) >> 1;
129 }
130 else
131 report_invalid_encoding(PG_EUC_JIS_2004,
132 (const char *) euc, len);
133 }
134
135 if (ku % 2)
136 {
137 if (ten >= 1 && ten <= 63)
138 *p++ = ten + 0x3f;
139 else if (ten >= 64 && ten <= 94)
140 *p++ = ten + 0x40;
141 else
142 report_invalid_encoding(PG_EUC_JIS_2004,
143 (const char *) euc, len);
144 }
145 else
146 *p++ = ten + 0x9e;
147 }
148
149 else if (l == 2) /* JIS X 0213 plane 1? */
150 {
151 ku = c1 - 0xa0;
152 ten = euc[1] - 0xa0;
153
154 if (ku >= 1 && ku <= 62)
155 *p++ = (ku + 0x101) >> 1;
156 else if (ku >= 63 && ku <= 94)
157 *p++ = (ku + 0x181) >> 1;
158 else
159 report_invalid_encoding(PG_EUC_JIS_2004,
160 (const char *) euc, len);
161
162 if (ku % 2)
163 {
164 if (ten >= 1 && ten <= 63)
165 *p++ = ten + 0x3f;
166 else if (ten >= 64 && ten <= 94)
167 *p++ = ten + 0x40;
168 else
169 report_invalid_encoding(PG_EUC_JIS_2004,
170 (const char *) euc, len);
171 }
172 else
173 *p++ = ten + 0x9e;
174 }
175 else
176 report_invalid_encoding(PG_EUC_JIS_2004,
177 (const char *) euc, len);
178
179 euc += l;
180 len -= l;
181 }
182 *p = '\0';
183 }
184
185 /*
186 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
187 * *ku = 0: "ku" = even
188 * *ku = 1: "ku" = odd
189 */
190 static int
191 get_ten(int b, int *ku)
192 {
193 int ten;
194
195 if (b >= 0x40 && b <= 0x7e)
196 {
197 ten = b - 0x3f;
198 *ku = 1;
199 }
200 else if (b >= 0x80 && b <= 0x9e)
201 {
202 ten = b - 0x40;
203 *ku = 1;
204 }
205 else if (b >= 0x9f && b <= 0xfc)
206 {
207 ten = b - 0x9e;
208 *ku = 0;
209 }
210 else
211 {
212 ten = -1; /* error */
213 *ku = 0; /* keep compiler quiet */
214 }
215 return ten;
216 }
217
218 /*
219 * SHIFT_JIS_2004 ---> EUC_JIS_2004
220 */
221
222 static void
223 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
224 {
225 int c1,
226 c2;
227 int ku,
228 ten,
229 kubun;
230 int plane;
231 int l;
232
233 while (len > 0)
234 {
235 c1 = *sjis;
236 c2 = sjis[1];
237
238 if (!IS_HIGHBIT_SET(c1))
239 {
240 /* ASCII */
241 if (c1 == 0)
242 report_invalid_encoding(PG_SHIFT_JIS_2004,
243 (const char *) sjis, len);
244 *p++ = c1;
245 sjis++;
246 len--;
247 continue;
248 }
249
250 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
251
252 if (l < 0)
253 report_invalid_encoding(PG_SHIFT_JIS_2004,
254 (const char *) sjis, len);
255
256 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
257 {
258 /* JIS X0201 (1 byte kana) */
259 *p++ = SS2;
260 *p++ = c1;
261 }
262 else if (l == 2)
263 {
264 plane = 1;
265 ku = 1;
266 ten = 1;
267
268 /*
269 * JIS X 0213
270 */
271 if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
272 {
273 ku = (c1 << 1) - 0x100;
274 ten = get_ten(c2, &kubun);
275 if (ten < 0)
276 report_invalid_encoding(PG_SHIFT_JIS_2004,
277 (const char *) sjis, len);
278 ku -= kubun;
279 }
280 else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
281 {
282 ku = (c1 << 1) - 0x180;
283 ten = get_ten(c2, &kubun);
284 if (ten < 0)
285 report_invalid_encoding(PG_SHIFT_JIS_2004,
286
287 (const char *) sjis, len);
288 ku -= kubun;
289 }
290 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
291 * 1,3,4,5,8,12,13,14,15 ku */
292 {
293 plane = 2;
294 ten = get_ten(c2, &kubun);
295 if (ten < 0)
296 report_invalid_encoding(PG_SHIFT_JIS_2004,
297 (const char *) sjis, len);
298 switch (c1)
299 {
300 case 0xf0:
301 ku = kubun == 0 ? 8 : 1;
302 break;
303 case 0xf1:
304 ku = kubun == 0 ? 4 : 3;
305 break;
306 case 0xf2:
307 ku = kubun == 0 ? 12 : 5;
308 break;
309 default:
310 ku = kubun == 0 ? 14 : 13;
311 break;
312 }
313 }
314 else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
315 {
316 plane = 2;
317 ten = get_ten(c2, &kubun);
318 if (ten < 0)
319 report_invalid_encoding(PG_SHIFT_JIS_2004,
320 (const char *) sjis, len);
321 if (c1 == 0xf4 && kubun == 1)
322 ku = 15;
323 else
324 ku = (c1 << 1) - 0x19a - kubun;
325 }
326 else
327 report_invalid_encoding(PG_SHIFT_JIS_2004,
328 (const char *) sjis, len);
329
330 if (plane == 2)
331 *p++ = SS3;
332
333 *p++ = ku + 0xa0;
334 *p++ = ten + 0xa0;
335 }
336 sjis += l;
337 len -= l;
338 }
339 *p = '\0';
340 }