]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | /* |
2 | * "$Id: transcode.c 4903 2006-01-10 20:02:46Z mike $" | |
3 | * | |
4 | * Transcoding support for the Common UNIX Printing System (CUPS). | |
5 | * | |
6 | * Copyright 1997-2006 by Easy Software Products. | |
7 | * | |
8 | * These coded instructions, statements, and computer programs are | |
9 | * the property of Easy Software Products and are protected by Federal | |
10 | * copyright law. Distribution and use rights are outlined in the | |
11 | * file "LICENSE.txt" which should have been included with this file. | |
12 | * If this file is missing or damaged please contact Easy Software | |
13 | * Products at: | |
14 | * | |
15 | * Attn: CUPS Licensing Information | |
16 | * Easy Software Products | |
17 | * 44141 Airport View Drive, Suite 204 | |
18 | * Hollywood, Maryland 20636 USA | |
19 | * | |
20 | * Voice: (301) 373-9600 | |
21 | * EMail: cups-info@cups.org | |
22 | * WWW: http://www.cups.org | |
23 | * | |
24 | * Contents: | |
25 | * | |
26 | * cupsCharmapGet() - Get a character set map. | |
27 | * cupsCharmapFree() - Free a character set map. | |
28 | * cupsCharmapFlush() - Flush all character set maps out of cache. | |
29 | * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set. | |
30 | * cupsCharsetToUTF8() - Convert legacy character set to UTF-8. | |
31 | * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16. | |
32 | * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8. | |
33 | * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32. | |
34 | * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8. | |
35 | * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32. | |
36 | * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16. | |
37 | * get_charmap_count() - Count lines in a charmap file. | |
38 | * get_sbcs_charmap() - Get SBCS Charmap. | |
39 | * get_vbcs_charmap() - Get DBCS/VBCS Charmap. | |
40 | * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS. | |
41 | * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS. | |
42 | * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8. | |
43 | * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8. | |
44 | * compare_wide() - Compare key for wide (VBCS) match. | |
45 | */ | |
46 | ||
47 | /* | |
48 | * Include necessary headers... | |
49 | */ | |
50 | ||
51 | #include "globals.h" | |
52 | #include <stdlib.h> | |
53 | #include <errno.h> | |
54 | #include <time.h> | |
55 | ||
56 | ||
57 | /* | |
58 | * Prototypes... | |
59 | */ | |
60 | ||
61 | static int get_charmap_count(const char *filename); | |
62 | static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding, | |
63 | const char *filename); | |
64 | static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding, | |
65 | const char *filename); | |
66 | ||
67 | static int conv_utf8_to_sbcs(char *dest, | |
68 | const cups_utf8_t *src, | |
69 | const int maxout, | |
70 | const cups_encoding_t encoding); | |
71 | static int conv_utf8_to_vbcs(char *dest, | |
72 | const cups_utf8_t *src, | |
73 | const int maxout, | |
74 | const cups_encoding_t encoding); | |
75 | ||
76 | static int conv_sbcs_to_utf8(cups_utf8_t *dest, | |
77 | const char *src, | |
78 | const int maxout, | |
79 | const cups_encoding_t encoding); | |
80 | static int conv_vbcs_to_utf8(cups_utf8_t *dest, | |
81 | const char *src, | |
82 | const int maxout, | |
83 | const cups_encoding_t encoding); | |
84 | ||
85 | static int compare_wide(const void *k1, const void *k2); | |
86 | ||
87 | /* | |
88 | * 'cupsCharmapGet()' - Get a character set map. | |
89 | * | |
90 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
91 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
92 | * This code does not handle multiple-byte character sets (MBCS) | |
93 | * (such as ISO-2022-JP) with charset switching via escapes... | |
94 | */ | |
95 | ||
96 | void * /* O - Charset map pointer */ | |
97 | cupsCharmapGet( | |
98 | const cups_encoding_t encoding) /* I - Encoding */ | |
99 | { | |
100 | char mapname[80]; /* Name of charset map */ | |
101 | char filename[1024]; /* Filename for charset map file */ | |
102 | _cups_globals_t *cg = _cupsGlobals(); /* Global data */ | |
103 | ||
104 | ||
105 | /* | |
106 | * Check for valid arguments... | |
107 | */ | |
108 | ||
109 | if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END)) | |
110 | return (NULL); | |
111 | ||
112 | /* | |
113 | * Get the data directory and charset map name... | |
114 | */ | |
115 | ||
116 | snprintf(mapname, sizeof(mapname), "%s.txt", _cupsEncodingName(encoding)); | |
117 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", | |
118 | cg->cups_datadir, mapname); | |
119 | ||
120 | /* | |
121 | * Read charset map input file into cache... | |
122 | */ | |
123 | ||
124 | if (encoding < CUPS_ENCODING_SBCS_END) | |
125 | return (get_sbcs_charmap(encoding, filename)); | |
126 | else if (encoding < CUPS_ENCODING_VBCS_END) | |
127 | return (get_vbcs_charmap(encoding, filename)); | |
128 | else | |
129 | return (NULL); | |
130 | } | |
131 | ||
132 | /* | |
133 | * 'cupsCharmapFree()' - Free a character set map. | |
134 | * | |
135 | * This does not actually free; use 'cupsCharmapFlush()' for that. | |
136 | */ | |
137 | void | |
138 | cupsCharmapFree(const cups_encoding_t encoding) | |
139 | /* I - Encoding */ | |
140 | { | |
141 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
142 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
143 | _cups_globals_t *cg = _cupsGlobals(); | |
144 | /* Pointer to library globals */ | |
145 | ||
146 | /* | |
147 | * See if we already have this SBCS charset map loaded... | |
148 | */ | |
149 | for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next) | |
150 | { | |
151 | if (cmap->encoding == encoding) | |
152 | { | |
153 | if (cmap->used > 0) | |
154 | cmap->used --; | |
155 | return; | |
156 | } | |
157 | } | |
158 | ||
159 | /* | |
160 | * See if we already have this DBCS/VBCS charset map loaded... | |
161 | */ | |
162 | for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next) | |
163 | { | |
164 | if (vmap->encoding == encoding) | |
165 | { | |
166 | if (vmap->used > 0) | |
167 | vmap->used --; | |
168 | return; | |
169 | } | |
170 | } | |
171 | return; | |
172 | } | |
173 | ||
174 | /* | |
175 | * 'cupsCharmapFlush()' - Flush all character set maps out of cache. | |
176 | */ | |
177 | void | |
178 | cupsCharmapFlush(void) | |
179 | { | |
180 | int i; /* Looping variable */ | |
181 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
182 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
183 | _cups_cmap_t *cnext; /* Next Legacy SBCS Charset Map */ | |
184 | _cups_vmap_t *vnext; /* Next Legacy VBCS Charset Map */ | |
185 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
186 | cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ | |
187 | cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ | |
188 | _cups_globals_t *cg = _cupsGlobals(); | |
189 | /* Pointer to library globals */ | |
190 | ||
191 | /* | |
192 | * Loop through SBCS charset map cache, free all memory... | |
193 | */ | |
194 | for (cmap = cg->cmap_cache; cmap != NULL; cmap = cnext) | |
195 | { | |
196 | for (i = 0; i < 256; i ++) | |
197 | { | |
198 | if ((srow = cmap->uni2char[i]) != NULL) | |
199 | free(srow); | |
200 | } | |
201 | cnext = cmap->next; | |
202 | free(cmap); | |
203 | } | |
204 | cg->cmap_cache = NULL; | |
205 | ||
206 | /* | |
207 | * Loop through DBCS/VBCS charset map cache, free all memory... | |
208 | */ | |
209 | for (vmap = cg->vmap_cache; vmap != NULL; vmap = vnext) | |
210 | { | |
211 | for (i = 0; i < 256; i ++) | |
212 | { | |
213 | if ((crow = vmap->char2uni[i]) != NULL) | |
214 | free(crow); | |
215 | } | |
216 | for (i = 0; i < 256; i ++) | |
217 | { | |
218 | if ((vrow = vmap->uni2char[i]) != NULL) | |
219 | free(vrow); | |
220 | } | |
221 | if (vmap->wide2uni) | |
222 | free(vmap->wide2uni); | |
223 | vnext = vmap->next; | |
224 | free(vmap); | |
225 | } | |
226 | cg->vmap_cache = NULL; | |
227 | return; | |
228 | } | |
229 | ||
230 | /* | |
231 | * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set. | |
232 | * | |
233 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
234 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
235 | * This code does not handle multiple-byte character sets (MBCS) | |
236 | * (such as ISO-2022-JP) with charset switching via escapes... | |
237 | */ | |
238 | int /* O - Count or -1 on error */ | |
239 | cupsUTF8ToCharset(char *dest, /* O - Target string */ | |
240 | const cups_utf8_t *src, /* I - Source string */ | |
241 | const int maxout, /* I - Max output */ | |
242 | const cups_encoding_t encoding) /* I - Encoding */ | |
243 | { | |
244 | /* | |
245 | * Check for valid arguments... | |
246 | */ | |
247 | ||
248 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
249 | return (-1); | |
250 | ||
251 | /* | |
252 | * Handle identity conversions... | |
253 | */ | |
254 | ||
255 | if (encoding == CUPS_UTF8 || | |
256 | encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) | |
257 | { | |
258 | strlcpy(dest, (char *)src, maxout); | |
259 | return (strlen(dest)); | |
260 | } | |
261 | ||
262 | /* | |
263 | * Convert input UTF-8 to legacy charset... | |
264 | */ | |
265 | if (encoding < CUPS_ENCODING_SBCS_END) | |
266 | return (conv_utf8_to_sbcs(dest, src, maxout, encoding)); | |
267 | else if (encoding < CUPS_ENCODING_VBCS_END) | |
268 | return (conv_utf8_to_vbcs(dest, src, maxout, encoding)); | |
269 | else | |
270 | return (-1); | |
271 | } | |
272 | ||
273 | /* | |
274 | * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8. | |
275 | * | |
276 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
277 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
278 | * This code does not handle multiple-byte character sets (MBCS) | |
279 | * (such as ISO-2022-JP) with charset switching via escapes... | |
280 | */ | |
281 | int /* O - Count or -1 on error */ | |
282 | cupsCharsetToUTF8(cups_utf8_t *dest, /* O - Target string */ | |
283 | const char *src, /* I - Source string */ | |
284 | const int maxout, /* I - Max output */ | |
285 | const cups_encoding_t encoding) /* I - Encoding */ | |
286 | { | |
287 | /* | |
288 | * Check for valid arguments... | |
289 | */ | |
290 | ||
291 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
292 | return (-1); | |
293 | ||
294 | /* | |
295 | * Handle identity conversions... | |
296 | */ | |
297 | ||
298 | if (encoding == CUPS_UTF8 || | |
299 | encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) | |
300 | { | |
301 | strlcpy((char *)dest, src, maxout); | |
302 | return (strlen((char *)dest)); | |
303 | } | |
304 | ||
305 | /* | |
306 | * Convert input legacy charset to UTF-8... | |
307 | */ | |
308 | if (encoding < CUPS_ENCODING_SBCS_END) | |
309 | return (conv_sbcs_to_utf8(dest, src, maxout, encoding)); | |
310 | else if (encoding < CUPS_ENCODING_VBCS_END) | |
311 | return (conv_vbcs_to_utf8(dest, src, maxout, encoding)); | |
312 | else | |
313 | return (-1); | |
314 | } | |
315 | ||
316 | /* | |
317 | * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16. | |
318 | * | |
319 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
320 | */ | |
321 | int /* O - Count or -1 on error */ | |
322 | cupsUTF8ToUTF16(cups_utf16_t *dest, /* O - Target string */ | |
323 | const cups_utf8_t *src, /* I - Source string */ | |
324 | const int maxout) /* I - Max output */ | |
325 | { | |
326 | int worklen; /* Internal UCS-4 string length */ | |
327 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
328 | /* Internal UCS-4 string */ | |
329 | ||
330 | /* | |
331 | * Check for valid arguments and clear output... | |
332 | */ | |
333 | if ((dest == NULL) | |
334 | || (src == NULL) | |
335 | || (maxout < 1) | |
336 | || (maxout > CUPS_MAX_USTRING)) | |
337 | return (-1); | |
338 | *dest = 0; | |
339 | ||
340 | /* | |
341 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
342 | */ | |
343 | worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING); | |
344 | if (worklen < 0) | |
345 | return (-1); | |
346 | ||
347 | /* | |
348 | * Convert internal UCS-4 to output UTF-16... | |
349 | */ | |
350 | worklen = cupsUTF32ToUTF16(dest, work, maxout); | |
351 | return (worklen); | |
352 | } | |
353 | ||
354 | /* | |
355 | * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8. | |
356 | * | |
357 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
358 | */ | |
359 | int /* O - Count or -1 on error */ | |
360 | cupsUTF16ToUTF8(cups_utf8_t *dest, /* O - Target string */ | |
361 | const cups_utf16_t *src, /* I - Source string */ | |
362 | const int maxout) /* I - Max output */ | |
363 | { | |
364 | int worklen; /* Internal UCS-4 string length */ | |
365 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
366 | /* Internal UCS-4 string */ | |
367 | ||
368 | /* | |
369 | * Check for valid arguments and clear output... | |
370 | */ | |
371 | if ((dest == NULL) | |
372 | || (src == NULL) | |
373 | || (maxout < 1) | |
374 | || (maxout > CUPS_MAX_USTRING)) | |
375 | return (-1); | |
376 | *dest = 0; | |
377 | ||
378 | /* | |
379 | * Convert input UTF-16 to internal UCS-4 (and byte-swap)... | |
380 | */ | |
381 | worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING); | |
382 | if (worklen < 0) | |
383 | return (-1); | |
384 | ||
385 | /* | |
386 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
387 | */ | |
388 | worklen = cupsUTF32ToUTF8(dest, work, maxout); | |
389 | return (worklen); | |
390 | } | |
391 | ||
392 | /* | |
393 | * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32. | |
394 | * | |
395 | * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows... | |
396 | * | |
397 | * UTF-32 char UTF-8 char(s) | |
398 | * -------------------------------------------------- | |
399 | * 0 to 127 = 0xxxxxxx (US-ASCII) | |
400 | * 128 to 2047 = 110xxxxx 10yyyyyy | |
401 | * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz | |
402 | * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx | |
403 | * | |
404 | * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4, | |
405 | * which would convert to five- or six-octet UTF-8 sequences... | |
406 | * | |
407 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
408 | */ | |
409 | int /* O - Count or -1 on error */ | |
410 | cupsUTF8ToUTF32(cups_utf32_t *dest, /* O - Target string */ | |
411 | const cups_utf8_t *src, /* I - Source string */ | |
412 | const int maxout) /* I - Max output */ | |
413 | { | |
414 | cups_utf8_t *first = (cups_utf8_t *) src; | |
415 | size_t srclen; /* Source string length */ | |
416 | int i; /* Looping variable */ | |
417 | cups_utf32_t ch; /* Character value */ | |
418 | cups_utf32_t next; /* Next character value */ | |
419 | cups_utf32_t ch32; /* UTF-32 character value */ | |
420 | ||
421 | /* | |
422 | * Check for valid arguments and clear output... | |
423 | */ | |
424 | if ((dest == NULL) | |
425 | || (src == NULL) | |
426 | || (maxout < 1) | |
427 | || (maxout > CUPS_MAX_USTRING)) | |
428 | return (-1); | |
429 | *dest = 0; | |
430 | ||
431 | /* | |
432 | * Convert input UTF-8 to output UTF-32 (and insert BOM)... | |
433 | */ | |
434 | *dest = 0xfeff; | |
435 | dest ++; | |
436 | srclen = strlen((char *) src); | |
437 | for (i = 1; i < (maxout - 1); src ++, dest ++) | |
438 | { | |
439 | ch = (cups_utf32_t) *src; | |
440 | ch &= 0xff; | |
441 | if (ch == 0) | |
442 | break; | |
443 | i ++; | |
444 | ||
445 | /* | |
446 | * Convert UTF-8 character(s) to UTF-32 character... | |
447 | */ | |
448 | if ((ch & 0x7f) == ch) | |
449 | { | |
450 | /* | |
451 | * One-octet UTF-8 <= 127 (US-ASCII)... | |
452 | */ | |
453 | *dest = ch; | |
454 | } | |
455 | else if ((ch & 0xe0) == 0xc0) | |
456 | { | |
457 | /* | |
458 | * Two-octet UTF-8 <= 2047 (Latin-x)... | |
459 | */ | |
460 | src ++; | |
461 | next = (cups_utf32_t) *src; | |
462 | next &= 0xff; | |
463 | if (next == 0) | |
464 | return (-1); | |
465 | ch32 = ((ch & 0x1f) << 6) | (next & 0x3f); | |
466 | ||
467 | /* | |
468 | * Check for non-shortest form (invalid UTF-8)... | |
469 | */ | |
470 | if (ch32 <= 127) | |
471 | return (-1); | |
472 | *dest = ch32; | |
473 | } | |
474 | else if ((ch & 0xf0) == 0xe0) | |
475 | { | |
476 | /* | |
477 | * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)... | |
478 | */ | |
479 | src ++; | |
480 | next = (cups_utf32_t) *src; | |
481 | next &= 0xff; | |
482 | if (next == 0) | |
483 | return (-1); | |
484 | ch32 = ((ch & 0x1f) << 6) | (next & 0x3f); | |
485 | src ++; | |
486 | next = (cups_utf32_t) *src; | |
487 | next &= 0xff; | |
488 | if (next == 0) | |
489 | return (-1); | |
490 | ch32 = ((ch32 << 6) | (next & 0x3f)); | |
491 | ||
492 | /* | |
493 | * Check for non-shortest form (invalid UTF-8)... | |
494 | */ | |
495 | if (ch32 <= 2047) | |
496 | return (-1); | |
497 | *dest = ch32; | |
498 | } | |
499 | else if ((ch & 0xf8) == 0xf0) | |
500 | { | |
501 | /* | |
502 | * Four-octet UTF-8 to Replacement Character... | |
503 | */ | |
504 | if (((src - first) + 3) >= srclen) | |
505 | return (-1); | |
506 | src += 3; | |
507 | *dest = 0xfffd; | |
508 | } | |
509 | else if ((ch & 0xfc) == 0xf8) | |
510 | { | |
511 | /* | |
512 | * Five-octet UTF-8 (invalid strict UTF-32)... | |
513 | */ | |
514 | return (-1); | |
515 | } | |
516 | else if ((ch & 0xfe) == 0xfc) | |
517 | { | |
518 | /* | |
519 | * Six-octet UTF-8 (invalid strict UTF-32)... | |
520 | */ | |
521 | return (-1); | |
522 | } | |
523 | else | |
524 | { | |
525 | /* | |
526 | * More than six-octet (invalid UTF-8 sequence)... | |
527 | */ | |
528 | return (-1); | |
529 | } | |
530 | ||
531 | /* | |
532 | * Check for UTF-16 surrogate (illegal UTF-8)... | |
533 | */ | |
534 | if ((*dest >= 0xd800) && (*dest <= 0xdfff)) | |
535 | return (-1); | |
536 | ||
537 | /* | |
538 | * Check for beyond Plane 16 (invalid UTF-8)... | |
539 | */ | |
540 | if (*dest > 0x10ffff) | |
541 | return (-1); | |
542 | } | |
543 | *dest = 0; | |
544 | return (i); | |
545 | } | |
546 | ||
547 | /* | |
548 | * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8. | |
549 | * | |
550 | * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows... | |
551 | * | |
552 | * UTF-32 char UTF-8 char(s) | |
553 | * -------------------------------------------------- | |
554 | * 0 to 127 = 0xxxxxxx (US-ASCII) | |
555 | * 128 to 2047 = 110xxxxx 10yyyyyy | |
556 | * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz | |
557 | * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx | |
558 | * | |
559 | * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4, | |
560 | * which would convert to five- or six-octet UTF-8 sequences... | |
561 | * | |
562 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
563 | */ | |
564 | int /* O - Count or -1 on error */ | |
565 | cupsUTF32ToUTF8(cups_utf8_t *dest, /* O - Target string */ | |
566 | const cups_utf32_t *src, /* I - Source string */ | |
567 | const int maxout) /* I - Max output */ | |
568 | { | |
569 | cups_utf32_t *first = (cups_utf32_t *) src; | |
570 | /* First source char */ | |
571 | cups_utf8_t *start = dest; /* Start of destination string */ | |
572 | int i; /* Looping variable */ | |
573 | int swap = 0; /* Byte-swap input to output */ | |
574 | cups_utf32_t ch; /* Character value */ | |
575 | ||
576 | /* | |
577 | * Check for valid arguments and clear output... | |
578 | */ | |
579 | if ((dest == NULL) | |
580 | || (src == NULL) | |
581 | || (maxout < 1)) | |
582 | return (-1); | |
583 | *dest = '\0'; | |
584 | ||
585 | /* | |
586 | * Check for leading BOM in UTF-32 and inverted BOM... | |
587 | */ | |
588 | if (*src == 0xfffe0000) | |
589 | swap = 1; | |
590 | ||
591 | /* | |
592 | * Convert input UTF-32 to output UTF-8... | |
593 | */ | |
594 | for (i = 0; i < (maxout - 1); src ++) | |
595 | { | |
596 | ch = *src; | |
597 | if (ch == 0) | |
598 | break; | |
599 | ||
600 | /* | |
601 | * Byte swap input UTF-32, if necessary... | |
602 | */ | |
603 | if (swap) | |
604 | ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000)); | |
605 | ||
606 | /* | |
607 | * Check for leading BOM (and delete from output)... | |
608 | */ | |
609 | if ((src == first) && (ch == 0xfeff)) | |
610 | continue; | |
611 | ||
612 | /* | |
613 | * Check for beyond Plane 16 (invalid UTF-32)... | |
614 | */ | |
615 | if (ch > 0x10ffff) | |
616 | return (-1); | |
617 | ||
618 | /* | |
619 | * Convert beyond Plane 0 (BMP) to Replacement Character... | |
620 | */ | |
621 | if (ch > 0xffff) | |
622 | ch = 0xfffd; | |
623 | ||
624 | /* | |
625 | * Convert UTF-32 character to UTF-8 character(s)... | |
626 | */ | |
627 | if (ch <= 0x7f) | |
628 | { | |
629 | /* | |
630 | * One-octet UTF-8 <= 127 (US-ASCII)... | |
631 | */ | |
632 | *dest = (cups_utf8_t) ch; | |
633 | dest ++; | |
634 | i ++; | |
635 | } | |
636 | else if (ch <= 0x7ff) | |
637 | { | |
638 | /* | |
639 | * Two-octet UTF-8 <= 2047 (Latin-x)... | |
640 | */ | |
641 | if (i > (maxout - 2)) | |
642 | break; | |
643 | *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f)); | |
644 | dest ++; | |
645 | i ++; | |
646 | *dest = (cups_utf8_t) (0x80 | (ch & 0x3f)); | |
647 | dest ++; | |
648 | i ++; | |
649 | } | |
650 | else | |
651 | { | |
652 | /* | |
653 | * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)... | |
654 | */ | |
655 | if (i > (maxout - 3)) | |
656 | break; | |
657 | *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f)); | |
658 | dest ++; | |
659 | i ++; | |
660 | *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f)); | |
661 | dest ++; | |
662 | i ++; | |
663 | *dest = (cups_utf8_t) (0x80 | (ch & 0x3f)); | |
664 | dest ++; | |
665 | i ++; | |
666 | } | |
667 | } | |
668 | *dest = '\0'; | |
669 | i = (int) (dest - start); | |
670 | return (i); | |
671 | } | |
672 | ||
673 | /* | |
674 | * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32. | |
675 | * | |
676 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
677 | */ | |
678 | int /* O - Count or -1 on error */ | |
679 | cupsUTF16ToUTF32(cups_utf32_t *dest, /* O - Target string */ | |
680 | const cups_utf16_t *src, /* I - Source string */ | |
681 | const int maxout) /* I - Max output */ | |
682 | { | |
683 | int i; /* Looping variable */ | |
684 | int swap = 0; /* Byte-swap input to output */ | |
685 | int surrogate = 0; /* Expecting low-half surrogate */ | |
686 | cups_utf32_t ch; /* Character value */ | |
687 | ||
688 | /* | |
689 | * Check for valid arguments and clear output... | |
690 | */ | |
691 | if ((dest == NULL) | |
692 | || (src == NULL) | |
693 | || (maxout < 1) | |
694 | || (maxout > CUPS_MAX_USTRING)) | |
695 | return (-1); | |
696 | *dest = 0; | |
697 | ||
698 | /* | |
699 | * Check for leading BOM in UTF-16 and inverted BOM... | |
700 | */ | |
701 | if (*src == 0xfffe) | |
702 | swap = 1; | |
703 | ||
704 | /* | |
705 | * Convert input UTF-16 to output UTF-32... | |
706 | */ | |
707 | for (i = 0; i < (maxout - 1); src ++) | |
708 | { | |
709 | ch = (cups_utf32_t) (*src & 0xffff); | |
710 | if (ch == 0) | |
711 | break; | |
712 | i ++; | |
713 | ||
714 | /* | |
715 | * Byte swap input UTF-16, if necessary... | |
716 | */ | |
717 | if (swap) | |
718 | ch = (cups_utf32_t) ((ch << 8) | (ch >> 8)); | |
719 | ||
720 | /* | |
721 | * Discard expected UTF-16 low-half surrogate... | |
722 | */ | |
723 | if ((ch >= 0xdc00) && (ch <= 0xdfff)) | |
724 | { | |
725 | if (surrogate == 0) | |
726 | return (-1); | |
727 | surrogate = 0; | |
728 | continue; | |
729 | } | |
730 | ||
731 | /* | |
732 | * Convert UTF-16 high-half surrogate to Replacement Character... | |
733 | */ | |
734 | if ((ch >= 0xd800) && (ch <= 0xdbff)) | |
735 | { | |
736 | if (surrogate == 1) | |
737 | return (-1); | |
738 | surrogate = 1; | |
739 | ch = 0xfffd; | |
740 | } | |
741 | *dest = ch; | |
742 | dest ++; | |
743 | } | |
744 | *dest = 0; | |
745 | return (i); | |
746 | } | |
747 | ||
748 | /* | |
749 | * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16. | |
750 | * | |
751 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
752 | */ | |
753 | int /* O - Count or -1 on error */ | |
754 | cupsUTF32ToUTF16(cups_utf16_t *dest, /* O - Target string */ | |
755 | const cups_utf32_t *src, /* I - Source string */ | |
756 | const int maxout) /* I - Max output */ | |
757 | { | |
758 | int i; /* Looping variable */ | |
759 | int swap = 0; /* Byte-swap input to output */ | |
760 | cups_utf32_t ch; /* Character value */ | |
761 | ||
762 | /* | |
763 | * Check for valid arguments and clear output... | |
764 | */ | |
765 | if ((dest == NULL) | |
766 | || (src == NULL) | |
767 | || (maxout < 1) | |
768 | || (maxout > CUPS_MAX_USTRING)) | |
769 | return (-1); | |
770 | *dest = 0; | |
771 | ||
772 | /* | |
773 | * Check for leading BOM in UTF-32 and inverted BOM... | |
774 | */ | |
775 | if (*src == 0xfffe0000) | |
776 | swap = 1; | |
777 | ||
778 | /* | |
779 | * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)... | |
780 | */ | |
781 | for (i = 0; i < (maxout - 1); src ++, dest ++) | |
782 | { | |
783 | ch = *src; | |
784 | if (ch == 0) | |
785 | break; | |
786 | i ++; | |
787 | ||
788 | /* | |
789 | * Byte swap input UTF-32, if necessary... | |
790 | */ | |
791 | if (swap) | |
792 | ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000)); | |
793 | ||
794 | /* | |
795 | * Check for UTF-16 surrogate (illegal UTF-32)... | |
796 | */ | |
797 | if ((ch >= 0xd800) && (ch <= 0xdfff)) | |
798 | return (-1); | |
799 | ||
800 | /* | |
801 | * Check for beyond Plane 16 (invalid UTF-32)... | |
802 | */ | |
803 | if (ch > 0x10ffff) | |
804 | return (-1); | |
805 | ||
806 | /* | |
807 | * Convert beyond Plane 0 (BMP) to Replacement Character... | |
808 | */ | |
809 | if (ch > 0xffff) | |
810 | ch = 0xfffd; | |
811 | *dest = (cups_utf16_t) ch; | |
812 | } | |
813 | *dest = 0; | |
814 | return (i); | |
815 | } | |
816 | ||
817 | /* | |
818 | * 'get_charmap_count()' - Count lines in a charmap file. | |
819 | */ | |
820 | static int /* O - Count or -1 on error */ | |
821 | get_charmap_count(const char *filename) /* I - Charmap Filename */ | |
822 | { | |
823 | int i; /* Looping variable */ | |
824 | cups_file_t *fp; /* Map input file pointer */ | |
825 | char *s; /* Line parsing pointer */ | |
826 | char line[256]; /* Line from input map file */ | |
827 | cups_utf32_t unichar; /* Unicode character value */ | |
828 | ||
829 | /* | |
830 | * Open map input file... | |
831 | */ | |
832 | if ((filename == NULL) || (*filename == '\0')) | |
833 | return (-1); | |
834 | fp = cupsFileOpen(filename, "r"); | |
835 | if (fp == NULL) | |
836 | return (-1); | |
837 | ||
838 | /* | |
839 | * Count lines in map input file... | |
840 | */ | |
841 | for (i = 0; i < CUPS_MAX_CHARMAP_LINES;) | |
842 | { | |
843 | s = cupsFileGets(fp, line, sizeof(line)); | |
844 | if (s == NULL) | |
845 | break; | |
846 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
847 | continue; | |
848 | while ((*s != 0) && (*s != ' ') && (*s != '\t')) | |
849 | s ++; | |
850 | while ((*s == ' ') || (*s == '\t')) | |
851 | s ++; | |
852 | if (strncmp (s, "0x", 2) == 0) | |
853 | s += 2; | |
854 | if ((sscanf(s, "%lx", &unichar) != 1) | |
855 | || (unichar > 0xffff)) | |
856 | { | |
857 | cupsFileClose(fp); | |
858 | return (-1); | |
859 | } | |
860 | i ++; | |
861 | } | |
862 | if (i == 0) | |
863 | i = -1; | |
864 | ||
865 | /* | |
866 | * Close file and return charmap count (non-comment line count)... | |
867 | */ | |
868 | cupsFileClose(fp); | |
869 | return (i); | |
870 | } | |
871 | ||
872 | /* | |
873 | * 'get_sbcs_charmap()' - Get SBCS Charmap. | |
874 | */ | |
875 | static _cups_cmap_t * /* O - Charmap or 0 on error */ | |
876 | get_sbcs_charmap(const cups_encoding_t encoding, | |
877 | /* I - Charmap Encoding */ | |
878 | const char *filename) /* I - Charmap Filename */ | |
879 | { | |
880 | int i; /* Loop variable */ | |
881 | unsigned long legchar; /* Legacy character value */ | |
882 | cups_utf32_t unichar; /* Unicode character value */ | |
883 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
884 | cups_file_t *fp; /* Charset map file pointer */ | |
885 | char *s; /* Line parsing pointer */ | |
886 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
887 | cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ | |
888 | char line[256]; /* Line from charset map file */ | |
889 | _cups_globals_t *cg = _cupsGlobals(); | |
890 | /* Pointer to library globals */ | |
891 | ||
892 | /* | |
893 | * Check for valid arguments... | |
894 | */ | |
895 | if ((encoding < 0) || (filename == NULL)) | |
896 | return (NULL); | |
897 | ||
898 | /* | |
899 | * See if we already have this SBCS charset map loaded... | |
900 | */ | |
901 | for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next) | |
902 | { | |
903 | if (cmap->encoding == encoding) | |
904 | { | |
905 | cmap->used ++; | |
906 | return ((void *) cmap); | |
907 | } | |
908 | } | |
909 | ||
910 | /* | |
911 | * Open SBCS charset map input file... | |
912 | */ | |
913 | fp = cupsFileOpen(filename, "r"); | |
914 | if (fp == NULL) | |
915 | return (NULL); | |
916 | ||
917 | /* | |
918 | * Allocate memory for SBCS charset map and add to cache... | |
919 | */ | |
920 | cmap = (_cups_cmap_t *) calloc(1, sizeof(_cups_cmap_t)); | |
921 | if (cmap == NULL) | |
922 | { | |
923 | cupsFileClose(fp); | |
924 | return (NULL); | |
925 | } | |
926 | cmap->next = cg->cmap_cache; | |
927 | cg->cmap_cache = cmap; | |
928 | cmap->used ++; | |
929 | cmap->encoding = encoding; | |
930 | ||
931 | /* | |
932 | * Save SBCS charset map into memory for transcoding... | |
933 | */ | |
934 | for (i = 0; i < CUPS_MAX_CHARMAP_LINES;) | |
935 | { | |
936 | s = cupsFileGets(fp, line, sizeof(line)); | |
937 | if (s == NULL) | |
938 | break; | |
939 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
940 | continue; | |
941 | if (strncmp (s, "0x", 2) == 0) | |
942 | s += 2; | |
943 | if ((sscanf(s, "%lx", &legchar) != 1) | |
944 | || (legchar > 0xff)) | |
945 | { | |
946 | cupsFileClose(fp); | |
947 | cupsCharmapFlush(); | |
948 | return (NULL); | |
949 | } | |
950 | while ((*s != 0) && (*s != ' ') && (*s != '\t')) | |
951 | s ++; | |
952 | while ((*s == ' ') || (*s == '\t')) | |
953 | s ++; | |
954 | if (strncmp (s, "0x", 2) == 0) | |
955 | s += 2; | |
956 | if (sscanf(s, "%lx", &unichar) != 1) | |
957 | { | |
958 | cupsFileClose(fp); | |
959 | cupsCharmapFlush(); | |
960 | return (NULL); | |
961 | } | |
962 | i ++; | |
963 | ||
964 | /* | |
965 | * Convert beyond Plane 0 (BMP) to Replacement Character... | |
966 | */ | |
967 | if (unichar > 0xffff) | |
968 | unichar = 0xfffd; | |
969 | ||
970 | /* | |
971 | * Save legacy to Unicode mapping in direct lookup table... | |
972 | */ | |
973 | crow = &cmap->char2uni[(int) legchar]; | |
974 | *crow = (cups_ucs2_t) (unichar & 0xffff); | |
975 | ||
976 | /* | |
977 | * Save Unicode to legacy mapping in indirect lookup table... | |
978 | */ | |
979 | srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)]; | |
980 | if (srow == NULL) | |
981 | { | |
982 | srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t)); | |
983 | if (srow == NULL) | |
984 | { | |
985 | cupsFileClose(fp); | |
986 | cupsCharmapFlush(); | |
987 | return (NULL); | |
988 | } | |
989 | cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow; | |
990 | } | |
991 | srow += (int) (unichar & 0xff); | |
992 | ||
993 | /* | |
994 | * Convert Replacement Character to visible replacement... | |
995 | */ | |
996 | if (unichar == 0xfffd) | |
997 | legchar = (unsigned long) '?'; | |
998 | ||
999 | /* | |
1000 | * First (oldest) legacy character uses Unicode mapping cell... | |
1001 | */ | |
1002 | if (*srow == 0) | |
1003 | *srow = (cups_sbcs_t) legchar; | |
1004 | } | |
1005 | cupsFileClose(fp); | |
1006 | return (cmap); | |
1007 | } | |
1008 | ||
1009 | /* | |
1010 | * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap. | |
1011 | */ | |
1012 | static _cups_vmap_t * /* O - Charmap or 0 on error */ | |
1013 | get_vbcs_charmap(const cups_encoding_t encoding, | |
1014 | /* I - Charmap Encoding */ | |
1015 | const char *filename) /* I - Charmap Filename */ | |
1016 | { | |
1017 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
1018 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
1019 | cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ | |
1020 | _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */ | |
1021 | cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */ | |
1022 | unsigned long legchar; /* Legacy character value */ | |
1023 | cups_utf32_t unichar; /* Unicode character value */ | |
1024 | int mapcount; /* Count of lines in charmap file */ | |
1025 | cups_file_t *fp; /* Charset map file pointer */ | |
1026 | char *s; /* Line parsing pointer */ | |
1027 | char line[256]; /* Line from charset map file */ | |
1028 | int i; /* Loop variable */ | |
1029 | int wide; /* 32-bit legacy char */ | |
1030 | _cups_globals_t *cg = _cupsGlobals(); | |
1031 | /* Pointer to library globals */ | |
1032 | ||
1033 | /* | |
1034 | * Check for valid arguments... | |
1035 | */ | |
1036 | if ((encoding < 0) || (filename == NULL)) | |
1037 | return (NULL); | |
1038 | ||
1039 | /* | |
1040 | * See if we already have this DBCS/VBCS charset map loaded... | |
1041 | */ | |
1042 | for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next) | |
1043 | { | |
1044 | if (vmap->encoding == encoding) | |
1045 | { | |
1046 | vmap->used ++; | |
1047 | return ((void *) vmap); | |
1048 | } | |
1049 | } | |
1050 | ||
1051 | /* | |
1052 | * Count lines in charmap file... | |
1053 | */ | |
1054 | mapcount = get_charmap_count(filename); | |
1055 | if (mapcount <= 0) | |
1056 | return (NULL); | |
1057 | ||
1058 | /* | |
1059 | * Open VBCS charset map input file... | |
1060 | */ | |
1061 | fp = cupsFileOpen(filename, "r"); | |
1062 | if (fp == NULL) | |
1063 | return (NULL); | |
1064 | ||
1065 | /* | |
1066 | * Allocate memory for DBCS/VBCS charset map and add to cache... | |
1067 | */ | |
1068 | vmap = (_cups_vmap_t *) calloc(1, sizeof(_cups_vmap_t)); | |
1069 | if (vmap == NULL) | |
1070 | { | |
1071 | cupsFileClose(fp); | |
1072 | return (NULL); | |
1073 | } | |
1074 | vmap->next = cg->vmap_cache; | |
1075 | cg->vmap_cache = vmap; | |
1076 | vmap->used ++; | |
1077 | vmap->encoding = encoding; | |
1078 | ||
1079 | /* | |
1080 | * Save DBCS/VBCS charset map into memory for transcoding... | |
1081 | */ | |
1082 | leadchar = 0; | |
1083 | wide2uni = NULL; | |
1084 | ||
1085 | for (i = 0, wide = 0; i < mapcount; ) | |
1086 | { | |
1087 | s = cupsFileGets(fp, line, sizeof(line)); | |
1088 | if (s == NULL) | |
1089 | break; | |
1090 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1091 | continue; | |
1092 | if (strncmp (s, "0x", 2) == 0) | |
1093 | s += 2; | |
1094 | if ((sscanf(s, "%lx", &legchar) != 1) | |
1095 | || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END))) | |
1096 | { | |
1097 | cupsFileClose(fp); | |
1098 | cupsCharmapFlush(); | |
1099 | return (NULL); | |
1100 | } | |
1101 | while ((*s != 0) && (*s != ' ') && (*s != '\t')) | |
1102 | s ++; | |
1103 | while ((*s == ' ') || (*s == '\t')) | |
1104 | s ++; | |
1105 | if (strncmp (s, "0x", 2) == 0) | |
1106 | s += 2; | |
1107 | if (sscanf(s, "%lx", &unichar) != 1) | |
1108 | { | |
1109 | cupsFileClose(fp); | |
1110 | cupsCharmapFlush(); | |
1111 | return (NULL); | |
1112 | } | |
1113 | i ++; | |
1114 | ||
1115 | /* | |
1116 | * Convert beyond Plane 0 (BMP) to Replacement Character... | |
1117 | */ | |
1118 | if (unichar > 0xffff) | |
1119 | unichar = 0xfffd; | |
1120 | ||
1121 | /* | |
1122 | * Save lead char of 2/3/4-byte legacy char... | |
1123 | */ | |
1124 | if ((legchar > 0xff) && (legchar <= 0xffff)) | |
1125 | { | |
1126 | leadchar = (cups_sbcs_t) (legchar >> 8); | |
1127 | vmap->lead2char[leadchar] = leadchar; | |
1128 | } | |
1129 | if ((legchar > 0xffff) && (legchar <= 0xffffff)) | |
1130 | { | |
1131 | leadchar = (cups_sbcs_t) (legchar >> 16); | |
1132 | vmap->lead3char[leadchar] = leadchar; | |
1133 | } | |
1134 | if (legchar > 0xffffff) | |
1135 | { | |
1136 | leadchar = (cups_sbcs_t) (legchar >> 24); | |
1137 | vmap->lead4char[leadchar] = leadchar; | |
1138 | } | |
1139 | ||
1140 | /* | |
1141 | * Save Legacy to Unicode mapping... | |
1142 | */ | |
1143 | if (legchar <= 0xffff) | |
1144 | { | |
1145 | /* | |
1146 | * Save DBCS 16-bit to Unicode mapping in indirect lookup table... | |
1147 | */ | |
1148 | crow = vmap->char2uni[(int) leadchar]; | |
1149 | if (crow == NULL) | |
1150 | { | |
1151 | crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t)); | |
1152 | if (crow == NULL) | |
1153 | { | |
1154 | cupsFileClose(fp); | |
1155 | cupsCharmapFlush(); | |
1156 | return (NULL); | |
1157 | } | |
1158 | vmap->char2uni[(int) leadchar] = crow; | |
1159 | } | |
1160 | crow += (int) (legchar & 0xff); | |
1161 | *crow = (cups_ucs2_t) unichar; | |
1162 | } | |
1163 | else | |
1164 | { | |
1165 | /* | |
1166 | * Save VBCS 32-bit to Unicode mapping in sorted list table... | |
1167 | */ | |
1168 | if (wide == 0) | |
1169 | { | |
1170 | wide = 1; | |
1171 | vmap->widecount = (mapcount - i + 1); | |
1172 | wide2uni = (_cups_wide2uni_t *) | |
1173 | calloc(vmap->widecount, sizeof(_cups_wide2uni_t)); | |
1174 | if (wide2uni == NULL) | |
1175 | { | |
1176 | cupsFileClose(fp); | |
1177 | cupsCharmapFlush(); | |
1178 | return (NULL); | |
1179 | } | |
1180 | vmap->wide2uni = wide2uni; | |
1181 | } | |
1182 | wide2uni->widechar = (cups_vbcs_t) legchar; | |
1183 | wide2uni->unichar = (cups_ucs2_t)unichar; | |
1184 | wide2uni ++; | |
1185 | } | |
1186 | ||
1187 | /* | |
1188 | * Save Unicode to legacy mapping in indirect lookup table... | |
1189 | */ | |
1190 | vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)]; | |
1191 | if (vrow == NULL) | |
1192 | { | |
1193 | vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t)); | |
1194 | if (vrow == NULL) | |
1195 | { | |
1196 | cupsFileClose(fp); | |
1197 | cupsCharmapFlush(); | |
1198 | return (NULL); | |
1199 | } | |
1200 | vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow; | |
1201 | } | |
1202 | vrow += (int) (unichar & 0xff); | |
1203 | ||
1204 | /* | |
1205 | * Convert Replacement Character to visible replacement... | |
1206 | */ | |
1207 | if (unichar == 0xfffd) | |
1208 | legchar = (unsigned long) '?'; | |
1209 | ||
1210 | /* | |
1211 | * First (oldest) legacy character uses Unicode mapping cell... | |
1212 | */ | |
1213 | if (*vrow == 0) | |
1214 | *vrow = (cups_vbcs_t) legchar; | |
1215 | } | |
1216 | vmap->charcount = (i - vmap->widecount); | |
1217 | cupsFileClose(fp); | |
1218 | return (vmap); | |
1219 | } | |
1220 | ||
1221 | /* | |
1222 | * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS. | |
1223 | */ | |
1224 | static int /* O - Count or -1 on error */ | |
1225 | conv_utf8_to_sbcs(char *dest, /* O - Target string */ | |
1226 | const cups_utf8_t *src, /* I - Source string */ | |
1227 | const int maxout, /* I - Max output */ | |
1228 | const cups_encoding_t encoding) /* I - Encoding */ | |
1229 | { | |
1230 | char *start = dest; /* Start of destination string */ | |
1231 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
1232 | cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ | |
1233 | cups_utf32_t unichar; /* Character value */ | |
1234 | int worklen; /* Internal UCS-4 string length */ | |
1235 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
1236 | /* Internal UCS-4 string */ | |
1237 | int i; /* Looping variable */ | |
1238 | ||
1239 | /* | |
1240 | * Check for valid arguments and clear output... | |
1241 | */ | |
1242 | if ((dest == NULL) | |
1243 | || (src == NULL) | |
1244 | || (maxout < 1) | |
1245 | || (maxout > CUPS_MAX_USTRING) | |
1246 | || (encoding == CUPS_UTF8)) | |
1247 | return (-1); | |
1248 | *dest = '\0'; | |
1249 | ||
1250 | /* | |
1251 | * Find legacy charset map in cache... | |
1252 | */ | |
1253 | cmap = (_cups_cmap_t *) cupsCharmapGet(encoding); | |
1254 | if (cmap == NULL) | |
1255 | return (-1); | |
1256 | ||
1257 | /* | |
1258 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
1259 | */ | |
1260 | worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING); | |
1261 | if (worklen < 0) | |
1262 | return (-1); | |
1263 | ||
1264 | /* | |
1265 | * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)... | |
1266 | */ | |
1267 | for (i = 0; i < worklen;) | |
1268 | { | |
1269 | unichar = work[i]; | |
1270 | if (unichar == 0) | |
1271 | break; | |
1272 | i ++; | |
1273 | ||
1274 | /* | |
1275 | * Check for leading BOM (and delete from output)... | |
1276 | */ | |
1277 | if ((i == 1) && (unichar == 0xfeff)) | |
1278 | continue; | |
1279 | ||
1280 | /* | |
1281 | * Convert ASCII verbatim (optimization)... | |
1282 | */ | |
1283 | if (unichar <= 0x7f) | |
1284 | { | |
1285 | *dest = (char) unichar; | |
1286 | dest ++; | |
1287 | continue; | |
1288 | } | |
1289 | ||
1290 | /* | |
1291 | * Convert unknown character to visible replacement... | |
1292 | */ | |
1293 | srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)]; | |
1294 | if (srow) | |
1295 | srow += (int) (unichar & 0xff); | |
1296 | if ((srow == NULL) || (*srow == 0)) | |
1297 | *dest = '?'; | |
1298 | else | |
1299 | *dest = (char) (*srow); | |
1300 | dest ++; | |
1301 | } | |
1302 | *dest = '\0'; | |
1303 | worklen = (int) (dest - start); | |
1304 | cupsCharmapFree(encoding); | |
1305 | return (worklen); | |
1306 | } | |
1307 | ||
1308 | /* | |
1309 | * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS. | |
1310 | */ | |
1311 | static int /* O - Count or -1 on error */ | |
1312 | conv_utf8_to_vbcs(char *dest, /* O - Target string */ | |
1313 | const cups_utf8_t *src, /* I - Source string */ | |
1314 | const int maxout, /* I - Max output */ | |
1315 | const cups_encoding_t encoding) /* I - Encoding */ | |
1316 | { | |
1317 | char *start = dest; /* Start of destination string */ | |
1318 | _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */ | |
1319 | cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ | |
1320 | cups_utf32_t unichar; /* Character value */ | |
1321 | cups_vbcs_t legchar; /* Legacy character value */ | |
1322 | int worklen; /* Internal UCS-4 string length */ | |
1323 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
1324 | /* Internal UCS-4 string */ | |
1325 | int i; /* Looping variable */ | |
1326 | ||
1327 | /* | |
1328 | * Check for valid arguments and clear output... | |
1329 | */ | |
1330 | if ((dest == NULL) | |
1331 | || (src == NULL) | |
1332 | || (maxout < 1) | |
1333 | || (maxout > CUPS_MAX_USTRING) | |
1334 | || (encoding == CUPS_UTF8)) | |
1335 | return (-1); | |
1336 | *dest = '\0'; | |
1337 | ||
1338 | /* | |
1339 | * Find legacy charset map in cache... | |
1340 | */ | |
1341 | vmap = (_cups_vmap_t *) cupsCharmapGet(encoding); | |
1342 | if (vmap == NULL) | |
1343 | return (-1); | |
1344 | ||
1345 | /* | |
1346 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
1347 | */ | |
1348 | worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING); | |
1349 | if (worklen < 0) | |
1350 | return (-1); | |
1351 | ||
1352 | /* | |
1353 | * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)... | |
1354 | */ | |
1355 | for (i = 0; i < worklen;) | |
1356 | { | |
1357 | unichar = work[i]; | |
1358 | if (unichar == 0) | |
1359 | break; | |
1360 | i ++; | |
1361 | ||
1362 | /* | |
1363 | * Check for leading BOM (and delete from output)... | |
1364 | */ | |
1365 | if ((i == 1) && (unichar == 0xfeff)) | |
1366 | continue; | |
1367 | ||
1368 | /* | |
1369 | * Convert ASCII verbatim (optimization)... | |
1370 | */ | |
1371 | if (unichar <= 0x7f) | |
1372 | { | |
1373 | *dest = (char) unichar; | |
1374 | dest ++; | |
1375 | continue; | |
1376 | } | |
1377 | ||
1378 | /* | |
1379 | * Convert unknown character to visible replacement... | |
1380 | */ | |
1381 | vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)]; | |
1382 | if (vrow) | |
1383 | vrow += (int) (unichar & 0xff); | |
1384 | if ((vrow == NULL) || (*vrow == 0)) | |
1385 | legchar = (cups_vbcs_t) '?'; | |
1386 | else | |
1387 | legchar = (cups_vbcs_t) *vrow; | |
1388 | ||
1389 | /* | |
1390 | * Save n-byte legacy character... | |
1391 | */ | |
1392 | if (legchar > 0xffffff) | |
1393 | { | |
1394 | *dest = (char) ((legchar >> 24) & 0xff); | |
1395 | dest++; | |
1396 | } | |
1397 | if (legchar > 0xffff) | |
1398 | { | |
1399 | *dest = (char) ((legchar >> 16) & 0xff); | |
1400 | dest++; | |
1401 | } | |
1402 | if (legchar > 0xff) | |
1403 | { | |
1404 | *dest = (char) ((legchar >> 8) & 0xff); | |
1405 | dest++; | |
1406 | } | |
1407 | *dest = (char) (legchar & 0xff); | |
1408 | dest ++; | |
1409 | } | |
1410 | *dest = '\0'; | |
1411 | worklen = (int) (dest - start); | |
1412 | cupsCharmapFree(encoding); | |
1413 | return (worklen); | |
1414 | } | |
1415 | ||
1416 | /* | |
1417 | * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8. | |
1418 | */ | |
1419 | static int /* O - Count or -1 on error */ | |
1420 | conv_sbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */ | |
1421 | const char *src, /* I - Source string */ | |
1422 | const int maxout, /* I - Max output */ | |
1423 | const cups_encoding_t encoding) /* I - Encoding */ | |
1424 | { | |
1425 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
1426 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
1427 | unsigned long legchar; /* Legacy character value */ | |
1428 | cups_utf32_t unichar; /* Unicode character value */ | |
1429 | int worklen; /* Internal UCS-4 string length */ | |
1430 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
1431 | /* Internal UCS-4 string */ | |
1432 | int i; /* Looping variable */ | |
1433 | ||
1434 | /* | |
1435 | * Check for valid arguments and clear output... | |
1436 | */ | |
1437 | if ((dest == NULL) | |
1438 | || (src == NULL) | |
1439 | || (maxout < 1) | |
1440 | || (maxout > CUPS_MAX_USTRING) | |
1441 | || (encoding == CUPS_UTF8)) | |
1442 | return (-1); | |
1443 | *dest = '\0'; | |
1444 | ||
1445 | /* | |
1446 | * Find legacy charset map in cache... | |
1447 | */ | |
1448 | cmap = (_cups_cmap_t *) cupsCharmapGet(encoding); | |
1449 | if (cmap == NULL) | |
1450 | return (-1); | |
1451 | ||
1452 | /* | |
1453 | * Convert input legacy charset to internal UCS-4 (and insert BOM)... | |
1454 | */ | |
1455 | work[0] = 0xfeff; | |
1456 | for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++) | |
1457 | { | |
1458 | if (*src == '\0') | |
1459 | break; | |
1460 | legchar = (unsigned long) *src; | |
1461 | ||
1462 | /* | |
1463 | * Convert ASCII verbatim (optimization)... | |
1464 | */ | |
1465 | if (legchar <= 0x7f) | |
1466 | { | |
1467 | work[i] = (cups_utf32_t) legchar; | |
1468 | i ++; | |
1469 | continue; | |
1470 | } | |
1471 | ||
1472 | /* | |
1473 | * Convert unknown character to Replacement Character... | |
1474 | */ | |
1475 | crow = &cmap->char2uni[0]; | |
1476 | crow += (int) legchar; | |
1477 | if (*crow == 0) | |
1478 | unichar = 0xfffd; | |
1479 | else | |
1480 | unichar = (cups_utf32_t) *crow; | |
1481 | work[i] = unichar; | |
1482 | i ++; | |
1483 | } | |
1484 | work[i] = 0; | |
1485 | ||
1486 | /* | |
1487 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
1488 | */ | |
1489 | worklen = cupsUTF32ToUTF8(dest, work, maxout); | |
1490 | cupsCharmapFree(encoding); | |
1491 | return (worklen); | |
1492 | } | |
1493 | ||
1494 | ||
1495 | /* | |
1496 | * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8. | |
1497 | */ | |
1498 | static int /* O - Count or -1 on error */ | |
1499 | conv_vbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */ | |
1500 | const char *src, /* I - Source string */ | |
1501 | const int maxout, /* I - Max output */ | |
1502 | const cups_encoding_t encoding) /* I - Encoding */ | |
1503 | { | |
1504 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
1505 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
1506 | _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */ | |
1507 | cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */ | |
1508 | cups_vbcs_t legchar; /* Legacy character value */ | |
1509 | cups_utf32_t unichar; /* Unicode character value */ | |
1510 | int i; /* Looping variable */ | |
1511 | int worklen; /* Internal UCS-4 string length */ | |
1512 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
1513 | /* Internal UCS-4 string */ | |
1514 | ||
1515 | /* | |
1516 | * Check for valid arguments and clear output... | |
1517 | */ | |
1518 | if ((dest == NULL) | |
1519 | || (src == NULL) | |
1520 | || (maxout < 1) | |
1521 | || (maxout > CUPS_MAX_USTRING) | |
1522 | || (encoding == CUPS_UTF8)) | |
1523 | return (-1); | |
1524 | *dest = '\0'; | |
1525 | ||
1526 | /* | |
1527 | * Find legacy charset map in cache... | |
1528 | */ | |
1529 | vmap = (_cups_vmap_t *) cupsCharmapGet(encoding); | |
1530 | if (vmap == NULL) | |
1531 | return (-1); | |
1532 | ||
1533 | /* | |
1534 | * Convert input legacy charset to internal UCS-4 (and insert BOM)... | |
1535 | */ | |
1536 | work[0] = 0xfeff; | |
1537 | for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++) | |
1538 | { | |
1539 | if (*src == '\0') | |
1540 | break; | |
1541 | legchar = (cups_vbcs_t) *src; | |
1542 | leadchar = (cups_sbcs_t) *src; | |
1543 | ||
1544 | /* | |
1545 | * Convert ASCII verbatim (optimization)... | |
1546 | */ | |
1547 | if (legchar <= 0x7f) | |
1548 | { | |
1549 | work[i] = (cups_utf32_t) legchar; | |
1550 | i ++; | |
1551 | continue; | |
1552 | } | |
1553 | ||
1554 | /* | |
1555 | * Convert 2-byte legacy character... | |
1556 | */ | |
1557 | if (vmap->lead2char[(int) leadchar] == leadchar) | |
1558 | { | |
1559 | src ++; | |
1560 | if (*src == '\0') | |
1561 | return (-1); | |
1562 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1563 | ||
1564 | /* | |
1565 | * Convert unknown character to Replacement Character... | |
1566 | */ | |
1567 | crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)]; | |
1568 | if (crow) | |
1569 | crow += (int) (legchar & 0xff); | |
1570 | if ((crow == NULL) || (*crow == 0)) | |
1571 | unichar = 0xfffd; | |
1572 | else | |
1573 | unichar = (cups_utf32_t) *crow; | |
1574 | work[i] = unichar; | |
1575 | i ++; | |
1576 | continue; | |
1577 | } | |
1578 | ||
1579 | /* | |
1580 | * Fetch 3-byte or 4-byte legacy character... | |
1581 | */ | |
1582 | if (vmap->lead3char[(int) leadchar] == leadchar) | |
1583 | { | |
1584 | src ++; | |
1585 | if (*src == '\0') | |
1586 | return (-1); | |
1587 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1588 | src ++; | |
1589 | if (*src == '\0') | |
1590 | return (-1); | |
1591 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1592 | } | |
1593 | else if (vmap->lead4char[(int) leadchar] == leadchar) | |
1594 | { | |
1595 | src ++; | |
1596 | if (*src == '\0') | |
1597 | return (-1); | |
1598 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1599 | src ++; | |
1600 | if (*src == '\0') | |
1601 | return (-1); | |
1602 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1603 | src ++; | |
1604 | if (*src == '\0') | |
1605 | return (-1); | |
1606 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1607 | } | |
1608 | else | |
1609 | return (-1); | |
1610 | ||
1611 | /* | |
1612 | * Find 3-byte or 4-byte legacy character... | |
1613 | */ | |
1614 | wide2uni = vmap->wide2uni; | |
1615 | wide2uni = (_cups_wide2uni_t *) bsearch(&legchar, | |
1616 | vmap->wide2uni, | |
1617 | vmap->widecount, | |
1618 | sizeof(_cups_wide2uni_t), | |
1619 | compare_wide); | |
1620 | ||
1621 | /* | |
1622 | * Convert unknown character to Replacement Character... | |
1623 | */ | |
1624 | if ((wide2uni == NULL) || (wide2uni->unichar == 0)) | |
1625 | unichar = 0xfffd; | |
1626 | else | |
1627 | unichar = wide2uni->unichar; | |
1628 | work[i] = unichar; | |
1629 | i ++; | |
1630 | } | |
1631 | work[i] = 0; | |
1632 | ||
1633 | /* | |
1634 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
1635 | */ | |
1636 | worklen = cupsUTF32ToUTF8(dest, work, maxout); | |
1637 | cupsCharmapFree(encoding); | |
1638 | return (worklen); | |
1639 | } | |
1640 | ||
1641 | /* | |
1642 | * 'compare_wide()' - Compare key for wide (VBCS) match. | |
1643 | */ | |
1644 | static int | |
1645 | compare_wide(const void *k1, /* I - Key char */ | |
1646 | const void *k2) /* I - Map char */ | |
1647 | { | |
1648 | cups_vbcs_t *kp = (cups_vbcs_t *) k1; | |
1649 | /* Key char pointer */ | |
1650 | _cups_wide2uni_t *mp = (_cups_wide2uni_t *) k2; | |
1651 | /* Map char pointer */ | |
1652 | cups_vbcs_t key; /* Legacy key character */ | |
1653 | cups_vbcs_t map; /* Legacy map character */ | |
1654 | int result; /* Result Value */ | |
1655 | ||
1656 | key = *kp; | |
1657 | map = mp->widechar; | |
1658 | if (key >= map) | |
1659 | result = (int) (key - map); | |
1660 | else | |
1661 | result = -1 * ((int) (map - key)); | |
1662 | return (result); | |
1663 | } | |
1664 | ||
1665 | ||
1666 | /* | |
1667 | * End of "$Id: transcode.c 4903 2006-01-10 20:02:46Z mike $" | |
1668 | */ |