]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | /* |
fa73b229 | 2 | * "$Id: transcode.c 4967 2006-01-24 03:42:15Z mike $" |
ef416fc2 | 3 | * |
4 | * Transcoding support for the Common UNIX Printing System (CUPS). | |
5 | * | |
6 | * Copyright 1997-2006 by Easy Software Products. | |
7 | * | |
8 | * These coded instructions, statements, and computer programs are | |
9 | * the property of Easy Software Products and are protected by Federal | |
10 | * copyright law. Distribution and use rights are outlined in the | |
11 | * file "LICENSE.txt" which should have been included with this file. | |
12 | * If this file is missing or damaged please contact Easy Software | |
13 | * Products at: | |
14 | * | |
15 | * Attn: CUPS Licensing Information | |
16 | * Easy Software Products | |
17 | * 44141 Airport View Drive, Suite 204 | |
18 | * Hollywood, Maryland 20636 USA | |
19 | * | |
20 | * Voice: (301) 373-9600 | |
21 | * EMail: cups-info@cups.org | |
22 | * WWW: http://www.cups.org | |
23 | * | |
24 | * Contents: | |
25 | * | |
26 | * cupsCharmapGet() - Get a character set map. | |
27 | * cupsCharmapFree() - Free a character set map. | |
28 | * cupsCharmapFlush() - Flush all character set maps out of cache. | |
fa73b229 | 29 | * _cupsCharmapFlush() - Flush all character set maps out of cache. |
ef416fc2 | 30 | * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set. |
31 | * cupsCharsetToUTF8() - Convert legacy character set to UTF-8. | |
32 | * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16. | |
33 | * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8. | |
34 | * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32. | |
35 | * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8. | |
36 | * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32. | |
37 | * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16. | |
38 | * get_charmap_count() - Count lines in a charmap file. | |
39 | * get_sbcs_charmap() - Get SBCS Charmap. | |
40 | * get_vbcs_charmap() - Get DBCS/VBCS Charmap. | |
41 | * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS. | |
42 | * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS. | |
43 | * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8. | |
44 | * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8. | |
45 | * compare_wide() - Compare key for wide (VBCS) match. | |
46 | */ | |
47 | ||
48 | /* | |
49 | * Include necessary headers... | |
50 | */ | |
51 | ||
52 | #include "globals.h" | |
53 | #include <stdlib.h> | |
54 | #include <errno.h> | |
55 | #include <time.h> | |
56 | ||
57 | ||
58 | /* | |
59 | * Prototypes... | |
60 | */ | |
61 | ||
62 | static int get_charmap_count(const char *filename); | |
63 | static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding, | |
64 | const char *filename); | |
65 | static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding, | |
66 | const char *filename); | |
67 | ||
68 | static int conv_utf8_to_sbcs(char *dest, | |
69 | const cups_utf8_t *src, | |
70 | const int maxout, | |
71 | const cups_encoding_t encoding); | |
72 | static int conv_utf8_to_vbcs(char *dest, | |
73 | const cups_utf8_t *src, | |
74 | const int maxout, | |
75 | const cups_encoding_t encoding); | |
76 | ||
77 | static int conv_sbcs_to_utf8(cups_utf8_t *dest, | |
78 | const char *src, | |
79 | const int maxout, | |
80 | const cups_encoding_t encoding); | |
81 | static int conv_vbcs_to_utf8(cups_utf8_t *dest, | |
82 | const char *src, | |
83 | const int maxout, | |
84 | const cups_encoding_t encoding); | |
85 | ||
86 | static int compare_wide(const void *k1, const void *k2); | |
87 | ||
88 | /* | |
89 | * 'cupsCharmapGet()' - Get a character set map. | |
90 | * | |
91 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
92 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
93 | * This code does not handle multiple-byte character sets (MBCS) | |
94 | * (such as ISO-2022-JP) with charset switching via escapes... | |
95 | */ | |
96 | ||
97 | void * /* O - Charset map pointer */ | |
98 | cupsCharmapGet( | |
99 | const cups_encoding_t encoding) /* I - Encoding */ | |
100 | { | |
101 | char mapname[80]; /* Name of charset map */ | |
102 | char filename[1024]; /* Filename for charset map file */ | |
103 | _cups_globals_t *cg = _cupsGlobals(); /* Global data */ | |
104 | ||
105 | ||
106 | /* | |
107 | * Check for valid arguments... | |
108 | */ | |
109 | ||
110 | if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END)) | |
111 | return (NULL); | |
112 | ||
113 | /* | |
114 | * Get the data directory and charset map name... | |
115 | */ | |
116 | ||
117 | snprintf(mapname, sizeof(mapname), "%s.txt", _cupsEncodingName(encoding)); | |
118 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", | |
119 | cg->cups_datadir, mapname); | |
120 | ||
121 | /* | |
122 | * Read charset map input file into cache... | |
123 | */ | |
124 | ||
125 | if (encoding < CUPS_ENCODING_SBCS_END) | |
126 | return (get_sbcs_charmap(encoding, filename)); | |
127 | else if (encoding < CUPS_ENCODING_VBCS_END) | |
128 | return (get_vbcs_charmap(encoding, filename)); | |
129 | else | |
130 | return (NULL); | |
131 | } | |
132 | ||
133 | /* | |
134 | * 'cupsCharmapFree()' - Free a character set map. | |
135 | * | |
136 | * This does not actually free; use 'cupsCharmapFlush()' for that. | |
137 | */ | |
138 | void | |
139 | cupsCharmapFree(const cups_encoding_t encoding) | |
140 | /* I - Encoding */ | |
141 | { | |
142 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
143 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
144 | _cups_globals_t *cg = _cupsGlobals(); | |
145 | /* Pointer to library globals */ | |
146 | ||
147 | /* | |
148 | * See if we already have this SBCS charset map loaded... | |
149 | */ | |
150 | for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next) | |
151 | { | |
152 | if (cmap->encoding == encoding) | |
153 | { | |
154 | if (cmap->used > 0) | |
155 | cmap->used --; | |
156 | return; | |
157 | } | |
158 | } | |
159 | ||
160 | /* | |
161 | * See if we already have this DBCS/VBCS charset map loaded... | |
162 | */ | |
163 | for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next) | |
164 | { | |
165 | if (vmap->encoding == encoding) | |
166 | { | |
167 | if (vmap->used > 0) | |
168 | vmap->used --; | |
169 | return; | |
170 | } | |
171 | } | |
172 | return; | |
173 | } | |
174 | ||
fa73b229 | 175 | |
ef416fc2 | 176 | /* |
177 | * 'cupsCharmapFlush()' - Flush all character set maps out of cache. | |
178 | */ | |
179 | void | |
180 | cupsCharmapFlush(void) | |
181 | { | |
fa73b229 | 182 | _cupsCharmapFlush(_cupsGlobals()); |
183 | } | |
184 | ||
185 | ||
186 | /* | |
187 | * '_cupsCharmapFlush()' - Flush all character set maps out of cache. | |
188 | */ | |
189 | ||
190 | void | |
191 | _cupsCharmapFlush(_cups_globals_t *cg) /* I - Global data */ | |
192 | { | |
193 | int i; /* Looping variable */ | |
194 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
195 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
196 | _cups_cmap_t *cnext; /* Next Legacy SBCS Charset Map */ | |
197 | _cups_vmap_t *vnext; /* Next Legacy VBCS Charset Map */ | |
198 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
199 | cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ | |
200 | cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ | |
201 | ||
ef416fc2 | 202 | |
203 | /* | |
204 | * Loop through SBCS charset map cache, free all memory... | |
205 | */ | |
206 | for (cmap = cg->cmap_cache; cmap != NULL; cmap = cnext) | |
207 | { | |
208 | for (i = 0; i < 256; i ++) | |
209 | { | |
210 | if ((srow = cmap->uni2char[i]) != NULL) | |
211 | free(srow); | |
212 | } | |
213 | cnext = cmap->next; | |
214 | free(cmap); | |
215 | } | |
216 | cg->cmap_cache = NULL; | |
217 | ||
218 | /* | |
219 | * Loop through DBCS/VBCS charset map cache, free all memory... | |
220 | */ | |
221 | for (vmap = cg->vmap_cache; vmap != NULL; vmap = vnext) | |
222 | { | |
223 | for (i = 0; i < 256; i ++) | |
224 | { | |
225 | if ((crow = vmap->char2uni[i]) != NULL) | |
226 | free(crow); | |
227 | } | |
228 | for (i = 0; i < 256; i ++) | |
229 | { | |
230 | if ((vrow = vmap->uni2char[i]) != NULL) | |
231 | free(vrow); | |
232 | } | |
233 | if (vmap->wide2uni) | |
234 | free(vmap->wide2uni); | |
235 | vnext = vmap->next; | |
236 | free(vmap); | |
237 | } | |
238 | cg->vmap_cache = NULL; | |
239 | return; | |
240 | } | |
241 | ||
242 | /* | |
243 | * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set. | |
244 | * | |
245 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
246 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
247 | * This code does not handle multiple-byte character sets (MBCS) | |
248 | * (such as ISO-2022-JP) with charset switching via escapes... | |
249 | */ | |
250 | int /* O - Count or -1 on error */ | |
251 | cupsUTF8ToCharset(char *dest, /* O - Target string */ | |
252 | const cups_utf8_t *src, /* I - Source string */ | |
253 | const int maxout, /* I - Max output */ | |
254 | const cups_encoding_t encoding) /* I - Encoding */ | |
255 | { | |
256 | /* | |
257 | * Check for valid arguments... | |
258 | */ | |
259 | ||
260 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
261 | return (-1); | |
262 | ||
263 | /* | |
264 | * Handle identity conversions... | |
265 | */ | |
266 | ||
267 | if (encoding == CUPS_UTF8 || | |
268 | encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) | |
269 | { | |
270 | strlcpy(dest, (char *)src, maxout); | |
271 | return (strlen(dest)); | |
272 | } | |
273 | ||
274 | /* | |
275 | * Convert input UTF-8 to legacy charset... | |
276 | */ | |
277 | if (encoding < CUPS_ENCODING_SBCS_END) | |
278 | return (conv_utf8_to_sbcs(dest, src, maxout, encoding)); | |
279 | else if (encoding < CUPS_ENCODING_VBCS_END) | |
280 | return (conv_utf8_to_vbcs(dest, src, maxout, encoding)); | |
281 | else | |
282 | return (-1); | |
283 | } | |
284 | ||
285 | /* | |
286 | * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8. | |
287 | * | |
288 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
289 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
290 | * This code does not handle multiple-byte character sets (MBCS) | |
291 | * (such as ISO-2022-JP) with charset switching via escapes... | |
292 | */ | |
293 | int /* O - Count or -1 on error */ | |
294 | cupsCharsetToUTF8(cups_utf8_t *dest, /* O - Target string */ | |
295 | const char *src, /* I - Source string */ | |
296 | const int maxout, /* I - Max output */ | |
297 | const cups_encoding_t encoding) /* I - Encoding */ | |
298 | { | |
299 | /* | |
300 | * Check for valid arguments... | |
301 | */ | |
302 | ||
303 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
304 | return (-1); | |
305 | ||
306 | /* | |
307 | * Handle identity conversions... | |
308 | */ | |
309 | ||
310 | if (encoding == CUPS_UTF8 || | |
311 | encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) | |
312 | { | |
313 | strlcpy((char *)dest, src, maxout); | |
314 | return (strlen((char *)dest)); | |
315 | } | |
316 | ||
317 | /* | |
318 | * Convert input legacy charset to UTF-8... | |
319 | */ | |
320 | if (encoding < CUPS_ENCODING_SBCS_END) | |
321 | return (conv_sbcs_to_utf8(dest, src, maxout, encoding)); | |
322 | else if (encoding < CUPS_ENCODING_VBCS_END) | |
323 | return (conv_vbcs_to_utf8(dest, src, maxout, encoding)); | |
324 | else | |
325 | return (-1); | |
326 | } | |
327 | ||
328 | /* | |
329 | * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16. | |
330 | * | |
331 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
332 | */ | |
333 | int /* O - Count or -1 on error */ | |
334 | cupsUTF8ToUTF16(cups_utf16_t *dest, /* O - Target string */ | |
335 | const cups_utf8_t *src, /* I - Source string */ | |
336 | const int maxout) /* I - Max output */ | |
337 | { | |
338 | int worklen; /* Internal UCS-4 string length */ | |
339 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
340 | /* Internal UCS-4 string */ | |
341 | ||
342 | /* | |
343 | * Check for valid arguments and clear output... | |
344 | */ | |
345 | if ((dest == NULL) | |
346 | || (src == NULL) | |
347 | || (maxout < 1) | |
348 | || (maxout > CUPS_MAX_USTRING)) | |
349 | return (-1); | |
350 | *dest = 0; | |
351 | ||
352 | /* | |
353 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
354 | */ | |
355 | worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING); | |
356 | if (worklen < 0) | |
357 | return (-1); | |
358 | ||
359 | /* | |
360 | * Convert internal UCS-4 to output UTF-16... | |
361 | */ | |
362 | worklen = cupsUTF32ToUTF16(dest, work, maxout); | |
363 | return (worklen); | |
364 | } | |
365 | ||
366 | /* | |
367 | * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8. | |
368 | * | |
369 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
370 | */ | |
371 | int /* O - Count or -1 on error */ | |
372 | cupsUTF16ToUTF8(cups_utf8_t *dest, /* O - Target string */ | |
373 | const cups_utf16_t *src, /* I - Source string */ | |
374 | const int maxout) /* I - Max output */ | |
375 | { | |
376 | int worklen; /* Internal UCS-4 string length */ | |
377 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
378 | /* Internal UCS-4 string */ | |
379 | ||
380 | /* | |
381 | * Check for valid arguments and clear output... | |
382 | */ | |
383 | if ((dest == NULL) | |
384 | || (src == NULL) | |
385 | || (maxout < 1) | |
386 | || (maxout > CUPS_MAX_USTRING)) | |
387 | return (-1); | |
388 | *dest = 0; | |
389 | ||
390 | /* | |
391 | * Convert input UTF-16 to internal UCS-4 (and byte-swap)... | |
392 | */ | |
393 | worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING); | |
394 | if (worklen < 0) | |
395 | return (-1); | |
396 | ||
397 | /* | |
398 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
399 | */ | |
400 | worklen = cupsUTF32ToUTF8(dest, work, maxout); | |
401 | return (worklen); | |
402 | } | |
403 | ||
404 | /* | |
405 | * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32. | |
406 | * | |
407 | * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows... | |
408 | * | |
409 | * UTF-32 char UTF-8 char(s) | |
410 | * -------------------------------------------------- | |
411 | * 0 to 127 = 0xxxxxxx (US-ASCII) | |
412 | * 128 to 2047 = 110xxxxx 10yyyyyy | |
413 | * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz | |
414 | * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx | |
415 | * | |
416 | * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4, | |
417 | * which would convert to five- or six-octet UTF-8 sequences... | |
418 | * | |
419 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
420 | */ | |
421 | int /* O - Count or -1 on error */ | |
422 | cupsUTF8ToUTF32(cups_utf32_t *dest, /* O - Target string */ | |
423 | const cups_utf8_t *src, /* I - Source string */ | |
424 | const int maxout) /* I - Max output */ | |
425 | { | |
426 | cups_utf8_t *first = (cups_utf8_t *) src; | |
427 | size_t srclen; /* Source string length */ | |
428 | int i; /* Looping variable */ | |
429 | cups_utf32_t ch; /* Character value */ | |
430 | cups_utf32_t next; /* Next character value */ | |
431 | cups_utf32_t ch32; /* UTF-32 character value */ | |
432 | ||
433 | /* | |
434 | * Check for valid arguments and clear output... | |
435 | */ | |
436 | if ((dest == NULL) | |
437 | || (src == NULL) | |
438 | || (maxout < 1) | |
439 | || (maxout > CUPS_MAX_USTRING)) | |
440 | return (-1); | |
441 | *dest = 0; | |
442 | ||
443 | /* | |
444 | * Convert input UTF-8 to output UTF-32 (and insert BOM)... | |
445 | */ | |
446 | *dest = 0xfeff; | |
447 | dest ++; | |
448 | srclen = strlen((char *) src); | |
449 | for (i = 1; i < (maxout - 1); src ++, dest ++) | |
450 | { | |
451 | ch = (cups_utf32_t) *src; | |
452 | ch &= 0xff; | |
453 | if (ch == 0) | |
454 | break; | |
455 | i ++; | |
456 | ||
457 | /* | |
458 | * Convert UTF-8 character(s) to UTF-32 character... | |
459 | */ | |
460 | if ((ch & 0x7f) == ch) | |
461 | { | |
462 | /* | |
463 | * One-octet UTF-8 <= 127 (US-ASCII)... | |
464 | */ | |
465 | *dest = ch; | |
466 | } | |
467 | else if ((ch & 0xe0) == 0xc0) | |
468 | { | |
469 | /* | |
470 | * Two-octet UTF-8 <= 2047 (Latin-x)... | |
471 | */ | |
472 | src ++; | |
473 | next = (cups_utf32_t) *src; | |
474 | next &= 0xff; | |
475 | if (next == 0) | |
476 | return (-1); | |
477 | ch32 = ((ch & 0x1f) << 6) | (next & 0x3f); | |
478 | ||
479 | /* | |
480 | * Check for non-shortest form (invalid UTF-8)... | |
481 | */ | |
482 | if (ch32 <= 127) | |
483 | return (-1); | |
484 | *dest = ch32; | |
485 | } | |
486 | else if ((ch & 0xf0) == 0xe0) | |
487 | { | |
488 | /* | |
489 | * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)... | |
490 | */ | |
491 | src ++; | |
492 | next = (cups_utf32_t) *src; | |
493 | next &= 0xff; | |
494 | if (next == 0) | |
495 | return (-1); | |
496 | ch32 = ((ch & 0x1f) << 6) | (next & 0x3f); | |
497 | src ++; | |
498 | next = (cups_utf32_t) *src; | |
499 | next &= 0xff; | |
500 | if (next == 0) | |
501 | return (-1); | |
502 | ch32 = ((ch32 << 6) | (next & 0x3f)); | |
503 | ||
504 | /* | |
505 | * Check for non-shortest form (invalid UTF-8)... | |
506 | */ | |
507 | if (ch32 <= 2047) | |
508 | return (-1); | |
509 | *dest = ch32; | |
510 | } | |
511 | else if ((ch & 0xf8) == 0xf0) | |
512 | { | |
513 | /* | |
514 | * Four-octet UTF-8 to Replacement Character... | |
515 | */ | |
516 | if (((src - first) + 3) >= srclen) | |
517 | return (-1); | |
518 | src += 3; | |
519 | *dest = 0xfffd; | |
520 | } | |
521 | else if ((ch & 0xfc) == 0xf8) | |
522 | { | |
523 | /* | |
524 | * Five-octet UTF-8 (invalid strict UTF-32)... | |
525 | */ | |
526 | return (-1); | |
527 | } | |
528 | else if ((ch & 0xfe) == 0xfc) | |
529 | { | |
530 | /* | |
531 | * Six-octet UTF-8 (invalid strict UTF-32)... | |
532 | */ | |
533 | return (-1); | |
534 | } | |
535 | else | |
536 | { | |
537 | /* | |
538 | * More than six-octet (invalid UTF-8 sequence)... | |
539 | */ | |
540 | return (-1); | |
541 | } | |
542 | ||
543 | /* | |
544 | * Check for UTF-16 surrogate (illegal UTF-8)... | |
545 | */ | |
546 | if ((*dest >= 0xd800) && (*dest <= 0xdfff)) | |
547 | return (-1); | |
548 | ||
549 | /* | |
550 | * Check for beyond Plane 16 (invalid UTF-8)... | |
551 | */ | |
552 | if (*dest > 0x10ffff) | |
553 | return (-1); | |
554 | } | |
555 | *dest = 0; | |
556 | return (i); | |
557 | } | |
558 | ||
559 | /* | |
560 | * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8. | |
561 | * | |
562 | * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows... | |
563 | * | |
564 | * UTF-32 char UTF-8 char(s) | |
565 | * -------------------------------------------------- | |
566 | * 0 to 127 = 0xxxxxxx (US-ASCII) | |
567 | * 128 to 2047 = 110xxxxx 10yyyyyy | |
568 | * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz | |
569 | * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx | |
570 | * | |
571 | * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4, | |
572 | * which would convert to five- or six-octet UTF-8 sequences... | |
573 | * | |
574 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
575 | */ | |
576 | int /* O - Count or -1 on error */ | |
577 | cupsUTF32ToUTF8(cups_utf8_t *dest, /* O - Target string */ | |
578 | const cups_utf32_t *src, /* I - Source string */ | |
579 | const int maxout) /* I - Max output */ | |
580 | { | |
581 | cups_utf32_t *first = (cups_utf32_t *) src; | |
582 | /* First source char */ | |
583 | cups_utf8_t *start = dest; /* Start of destination string */ | |
584 | int i; /* Looping variable */ | |
585 | int swap = 0; /* Byte-swap input to output */ | |
586 | cups_utf32_t ch; /* Character value */ | |
587 | ||
588 | /* | |
589 | * Check for valid arguments and clear output... | |
590 | */ | |
591 | if ((dest == NULL) | |
592 | || (src == NULL) | |
593 | || (maxout < 1)) | |
594 | return (-1); | |
595 | *dest = '\0'; | |
596 | ||
597 | /* | |
598 | * Check for leading BOM in UTF-32 and inverted BOM... | |
599 | */ | |
600 | if (*src == 0xfffe0000) | |
601 | swap = 1; | |
602 | ||
603 | /* | |
604 | * Convert input UTF-32 to output UTF-8... | |
605 | */ | |
606 | for (i = 0; i < (maxout - 1); src ++) | |
607 | { | |
608 | ch = *src; | |
609 | if (ch == 0) | |
610 | break; | |
611 | ||
612 | /* | |
613 | * Byte swap input UTF-32, if necessary... | |
614 | */ | |
615 | if (swap) | |
616 | ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000)); | |
617 | ||
618 | /* | |
619 | * Check for leading BOM (and delete from output)... | |
620 | */ | |
621 | if ((src == first) && (ch == 0xfeff)) | |
622 | continue; | |
623 | ||
624 | /* | |
625 | * Check for beyond Plane 16 (invalid UTF-32)... | |
626 | */ | |
627 | if (ch > 0x10ffff) | |
628 | return (-1); | |
629 | ||
630 | /* | |
631 | * Convert beyond Plane 0 (BMP) to Replacement Character... | |
632 | */ | |
633 | if (ch > 0xffff) | |
634 | ch = 0xfffd; | |
635 | ||
636 | /* | |
637 | * Convert UTF-32 character to UTF-8 character(s)... | |
638 | */ | |
639 | if (ch <= 0x7f) | |
640 | { | |
641 | /* | |
642 | * One-octet UTF-8 <= 127 (US-ASCII)... | |
643 | */ | |
644 | *dest = (cups_utf8_t) ch; | |
645 | dest ++; | |
646 | i ++; | |
647 | } | |
648 | else if (ch <= 0x7ff) | |
649 | { | |
650 | /* | |
651 | * Two-octet UTF-8 <= 2047 (Latin-x)... | |
652 | */ | |
653 | if (i > (maxout - 2)) | |
654 | break; | |
655 | *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f)); | |
656 | dest ++; | |
657 | i ++; | |
658 | *dest = (cups_utf8_t) (0x80 | (ch & 0x3f)); | |
659 | dest ++; | |
660 | i ++; | |
661 | } | |
662 | else | |
663 | { | |
664 | /* | |
665 | * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)... | |
666 | */ | |
667 | if (i > (maxout - 3)) | |
668 | break; | |
669 | *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f)); | |
670 | dest ++; | |
671 | i ++; | |
672 | *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f)); | |
673 | dest ++; | |
674 | i ++; | |
675 | *dest = (cups_utf8_t) (0x80 | (ch & 0x3f)); | |
676 | dest ++; | |
677 | i ++; | |
678 | } | |
679 | } | |
680 | *dest = '\0'; | |
681 | i = (int) (dest - start); | |
682 | return (i); | |
683 | } | |
684 | ||
685 | /* | |
686 | * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32. | |
687 | * | |
688 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
689 | */ | |
690 | int /* O - Count or -1 on error */ | |
691 | cupsUTF16ToUTF32(cups_utf32_t *dest, /* O - Target string */ | |
692 | const cups_utf16_t *src, /* I - Source string */ | |
693 | const int maxout) /* I - Max output */ | |
694 | { | |
695 | int i; /* Looping variable */ | |
696 | int swap = 0; /* Byte-swap input to output */ | |
697 | int surrogate = 0; /* Expecting low-half surrogate */ | |
698 | cups_utf32_t ch; /* Character value */ | |
699 | ||
700 | /* | |
701 | * Check for valid arguments and clear output... | |
702 | */ | |
703 | if ((dest == NULL) | |
704 | || (src == NULL) | |
705 | || (maxout < 1) | |
706 | || (maxout > CUPS_MAX_USTRING)) | |
707 | return (-1); | |
708 | *dest = 0; | |
709 | ||
710 | /* | |
711 | * Check for leading BOM in UTF-16 and inverted BOM... | |
712 | */ | |
713 | if (*src == 0xfffe) | |
714 | swap = 1; | |
715 | ||
716 | /* | |
717 | * Convert input UTF-16 to output UTF-32... | |
718 | */ | |
719 | for (i = 0; i < (maxout - 1); src ++) | |
720 | { | |
721 | ch = (cups_utf32_t) (*src & 0xffff); | |
722 | if (ch == 0) | |
723 | break; | |
724 | i ++; | |
725 | ||
726 | /* | |
727 | * Byte swap input UTF-16, if necessary... | |
728 | */ | |
729 | if (swap) | |
730 | ch = (cups_utf32_t) ((ch << 8) | (ch >> 8)); | |
731 | ||
732 | /* | |
733 | * Discard expected UTF-16 low-half surrogate... | |
734 | */ | |
735 | if ((ch >= 0xdc00) && (ch <= 0xdfff)) | |
736 | { | |
737 | if (surrogate == 0) | |
738 | return (-1); | |
739 | surrogate = 0; | |
740 | continue; | |
741 | } | |
742 | ||
743 | /* | |
744 | * Convert UTF-16 high-half surrogate to Replacement Character... | |
745 | */ | |
746 | if ((ch >= 0xd800) && (ch <= 0xdbff)) | |
747 | { | |
748 | if (surrogate == 1) | |
749 | return (-1); | |
750 | surrogate = 1; | |
751 | ch = 0xfffd; | |
752 | } | |
753 | *dest = ch; | |
754 | dest ++; | |
755 | } | |
756 | *dest = 0; | |
757 | return (i); | |
758 | } | |
759 | ||
760 | /* | |
761 | * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16. | |
762 | * | |
763 | * This code does not support Unicode beyond 16-bits (Plane 0)... | |
764 | */ | |
765 | int /* O - Count or -1 on error */ | |
766 | cupsUTF32ToUTF16(cups_utf16_t *dest, /* O - Target string */ | |
767 | const cups_utf32_t *src, /* I - Source string */ | |
768 | const int maxout) /* I - Max output */ | |
769 | { | |
770 | int i; /* Looping variable */ | |
771 | int swap = 0; /* Byte-swap input to output */ | |
772 | cups_utf32_t ch; /* Character value */ | |
773 | ||
774 | /* | |
775 | * Check for valid arguments and clear output... | |
776 | */ | |
777 | if ((dest == NULL) | |
778 | || (src == NULL) | |
779 | || (maxout < 1) | |
780 | || (maxout > CUPS_MAX_USTRING)) | |
781 | return (-1); | |
782 | *dest = 0; | |
783 | ||
784 | /* | |
785 | * Check for leading BOM in UTF-32 and inverted BOM... | |
786 | */ | |
787 | if (*src == 0xfffe0000) | |
788 | swap = 1; | |
789 | ||
790 | /* | |
791 | * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)... | |
792 | */ | |
793 | for (i = 0; i < (maxout - 1); src ++, dest ++) | |
794 | { | |
795 | ch = *src; | |
796 | if (ch == 0) | |
797 | break; | |
798 | i ++; | |
799 | ||
800 | /* | |
801 | * Byte swap input UTF-32, if necessary... | |
802 | */ | |
803 | if (swap) | |
804 | ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000)); | |
805 | ||
806 | /* | |
807 | * Check for UTF-16 surrogate (illegal UTF-32)... | |
808 | */ | |
809 | if ((ch >= 0xd800) && (ch <= 0xdfff)) | |
810 | return (-1); | |
811 | ||
812 | /* | |
813 | * Check for beyond Plane 16 (invalid UTF-32)... | |
814 | */ | |
815 | if (ch > 0x10ffff) | |
816 | return (-1); | |
817 | ||
818 | /* | |
819 | * Convert beyond Plane 0 (BMP) to Replacement Character... | |
820 | */ | |
821 | if (ch > 0xffff) | |
822 | ch = 0xfffd; | |
823 | *dest = (cups_utf16_t) ch; | |
824 | } | |
825 | *dest = 0; | |
826 | return (i); | |
827 | } | |
828 | ||
829 | /* | |
830 | * 'get_charmap_count()' - Count lines in a charmap file. | |
831 | */ | |
832 | static int /* O - Count or -1 on error */ | |
833 | get_charmap_count(const char *filename) /* I - Charmap Filename */ | |
834 | { | |
835 | int i; /* Looping variable */ | |
836 | cups_file_t *fp; /* Map input file pointer */ | |
837 | char *s; /* Line parsing pointer */ | |
838 | char line[256]; /* Line from input map file */ | |
839 | cups_utf32_t unichar; /* Unicode character value */ | |
840 | ||
841 | /* | |
842 | * Open map input file... | |
843 | */ | |
844 | if ((filename == NULL) || (*filename == '\0')) | |
845 | return (-1); | |
846 | fp = cupsFileOpen(filename, "r"); | |
847 | if (fp == NULL) | |
848 | return (-1); | |
849 | ||
850 | /* | |
851 | * Count lines in map input file... | |
852 | */ | |
853 | for (i = 0; i < CUPS_MAX_CHARMAP_LINES;) | |
854 | { | |
855 | s = cupsFileGets(fp, line, sizeof(line)); | |
856 | if (s == NULL) | |
857 | break; | |
858 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
859 | continue; | |
860 | while ((*s != 0) && (*s != ' ') && (*s != '\t')) | |
861 | s ++; | |
862 | while ((*s == ' ') || (*s == '\t')) | |
863 | s ++; | |
864 | if (strncmp (s, "0x", 2) == 0) | |
865 | s += 2; | |
866 | if ((sscanf(s, "%lx", &unichar) != 1) | |
867 | || (unichar > 0xffff)) | |
868 | { | |
869 | cupsFileClose(fp); | |
870 | return (-1); | |
871 | } | |
872 | i ++; | |
873 | } | |
874 | if (i == 0) | |
875 | i = -1; | |
876 | ||
877 | /* | |
878 | * Close file and return charmap count (non-comment line count)... | |
879 | */ | |
880 | cupsFileClose(fp); | |
881 | return (i); | |
882 | } | |
883 | ||
884 | /* | |
885 | * 'get_sbcs_charmap()' - Get SBCS Charmap. | |
886 | */ | |
887 | static _cups_cmap_t * /* O - Charmap or 0 on error */ | |
888 | get_sbcs_charmap(const cups_encoding_t encoding, | |
889 | /* I - Charmap Encoding */ | |
890 | const char *filename) /* I - Charmap Filename */ | |
891 | { | |
892 | int i; /* Loop variable */ | |
893 | unsigned long legchar; /* Legacy character value */ | |
894 | cups_utf32_t unichar; /* Unicode character value */ | |
895 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
896 | cups_file_t *fp; /* Charset map file pointer */ | |
897 | char *s; /* Line parsing pointer */ | |
898 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
899 | cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ | |
900 | char line[256]; /* Line from charset map file */ | |
901 | _cups_globals_t *cg = _cupsGlobals(); | |
902 | /* Pointer to library globals */ | |
903 | ||
904 | /* | |
905 | * Check for valid arguments... | |
906 | */ | |
907 | if ((encoding < 0) || (filename == NULL)) | |
908 | return (NULL); | |
909 | ||
910 | /* | |
911 | * See if we already have this SBCS charset map loaded... | |
912 | */ | |
913 | for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next) | |
914 | { | |
915 | if (cmap->encoding == encoding) | |
916 | { | |
917 | cmap->used ++; | |
918 | return ((void *) cmap); | |
919 | } | |
920 | } | |
921 | ||
922 | /* | |
923 | * Open SBCS charset map input file... | |
924 | */ | |
925 | fp = cupsFileOpen(filename, "r"); | |
926 | if (fp == NULL) | |
927 | return (NULL); | |
928 | ||
929 | /* | |
930 | * Allocate memory for SBCS charset map and add to cache... | |
931 | */ | |
932 | cmap = (_cups_cmap_t *) calloc(1, sizeof(_cups_cmap_t)); | |
933 | if (cmap == NULL) | |
934 | { | |
935 | cupsFileClose(fp); | |
936 | return (NULL); | |
937 | } | |
938 | cmap->next = cg->cmap_cache; | |
939 | cg->cmap_cache = cmap; | |
940 | cmap->used ++; | |
941 | cmap->encoding = encoding; | |
942 | ||
943 | /* | |
944 | * Save SBCS charset map into memory for transcoding... | |
945 | */ | |
946 | for (i = 0; i < CUPS_MAX_CHARMAP_LINES;) | |
947 | { | |
948 | s = cupsFileGets(fp, line, sizeof(line)); | |
949 | if (s == NULL) | |
950 | break; | |
951 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
952 | continue; | |
953 | if (strncmp (s, "0x", 2) == 0) | |
954 | s += 2; | |
955 | if ((sscanf(s, "%lx", &legchar) != 1) | |
956 | || (legchar > 0xff)) | |
957 | { | |
958 | cupsFileClose(fp); | |
959 | cupsCharmapFlush(); | |
960 | return (NULL); | |
961 | } | |
962 | while ((*s != 0) && (*s != ' ') && (*s != '\t')) | |
963 | s ++; | |
964 | while ((*s == ' ') || (*s == '\t')) | |
965 | s ++; | |
966 | if (strncmp (s, "0x", 2) == 0) | |
967 | s += 2; | |
968 | if (sscanf(s, "%lx", &unichar) != 1) | |
969 | { | |
970 | cupsFileClose(fp); | |
971 | cupsCharmapFlush(); | |
972 | return (NULL); | |
973 | } | |
974 | i ++; | |
975 | ||
976 | /* | |
977 | * Convert beyond Plane 0 (BMP) to Replacement Character... | |
978 | */ | |
979 | if (unichar > 0xffff) | |
980 | unichar = 0xfffd; | |
981 | ||
982 | /* | |
983 | * Save legacy to Unicode mapping in direct lookup table... | |
984 | */ | |
985 | crow = &cmap->char2uni[(int) legchar]; | |
986 | *crow = (cups_ucs2_t) (unichar & 0xffff); | |
987 | ||
988 | /* | |
989 | * Save Unicode to legacy mapping in indirect lookup table... | |
990 | */ | |
991 | srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)]; | |
992 | if (srow == NULL) | |
993 | { | |
994 | srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t)); | |
995 | if (srow == NULL) | |
996 | { | |
997 | cupsFileClose(fp); | |
998 | cupsCharmapFlush(); | |
999 | return (NULL); | |
1000 | } | |
1001 | cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow; | |
1002 | } | |
1003 | srow += (int) (unichar & 0xff); | |
1004 | ||
1005 | /* | |
1006 | * Convert Replacement Character to visible replacement... | |
1007 | */ | |
1008 | if (unichar == 0xfffd) | |
1009 | legchar = (unsigned long) '?'; | |
1010 | ||
1011 | /* | |
1012 | * First (oldest) legacy character uses Unicode mapping cell... | |
1013 | */ | |
1014 | if (*srow == 0) | |
1015 | *srow = (cups_sbcs_t) legchar; | |
1016 | } | |
1017 | cupsFileClose(fp); | |
1018 | return (cmap); | |
1019 | } | |
1020 | ||
1021 | /* | |
1022 | * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap. | |
1023 | */ | |
1024 | static _cups_vmap_t * /* O - Charmap or 0 on error */ | |
1025 | get_vbcs_charmap(const cups_encoding_t encoding, | |
1026 | /* I - Charmap Encoding */ | |
1027 | const char *filename) /* I - Charmap Filename */ | |
1028 | { | |
1029 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
1030 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
1031 | cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ | |
1032 | _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */ | |
1033 | cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */ | |
1034 | unsigned long legchar; /* Legacy character value */ | |
1035 | cups_utf32_t unichar; /* Unicode character value */ | |
1036 | int mapcount; /* Count of lines in charmap file */ | |
1037 | cups_file_t *fp; /* Charset map file pointer */ | |
1038 | char *s; /* Line parsing pointer */ | |
1039 | char line[256]; /* Line from charset map file */ | |
1040 | int i; /* Loop variable */ | |
1041 | int wide; /* 32-bit legacy char */ | |
1042 | _cups_globals_t *cg = _cupsGlobals(); | |
1043 | /* Pointer to library globals */ | |
1044 | ||
1045 | /* | |
1046 | * Check for valid arguments... | |
1047 | */ | |
1048 | if ((encoding < 0) || (filename == NULL)) | |
1049 | return (NULL); | |
1050 | ||
1051 | /* | |
1052 | * See if we already have this DBCS/VBCS charset map loaded... | |
1053 | */ | |
1054 | for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next) | |
1055 | { | |
1056 | if (vmap->encoding == encoding) | |
1057 | { | |
1058 | vmap->used ++; | |
1059 | return ((void *) vmap); | |
1060 | } | |
1061 | } | |
1062 | ||
1063 | /* | |
1064 | * Count lines in charmap file... | |
1065 | */ | |
1066 | mapcount = get_charmap_count(filename); | |
1067 | if (mapcount <= 0) | |
1068 | return (NULL); | |
1069 | ||
1070 | /* | |
1071 | * Open VBCS charset map input file... | |
1072 | */ | |
1073 | fp = cupsFileOpen(filename, "r"); | |
1074 | if (fp == NULL) | |
1075 | return (NULL); | |
1076 | ||
1077 | /* | |
1078 | * Allocate memory for DBCS/VBCS charset map and add to cache... | |
1079 | */ | |
1080 | vmap = (_cups_vmap_t *) calloc(1, sizeof(_cups_vmap_t)); | |
1081 | if (vmap == NULL) | |
1082 | { | |
1083 | cupsFileClose(fp); | |
1084 | return (NULL); | |
1085 | } | |
1086 | vmap->next = cg->vmap_cache; | |
1087 | cg->vmap_cache = vmap; | |
1088 | vmap->used ++; | |
1089 | vmap->encoding = encoding; | |
1090 | ||
1091 | /* | |
1092 | * Save DBCS/VBCS charset map into memory for transcoding... | |
1093 | */ | |
1094 | leadchar = 0; | |
1095 | wide2uni = NULL; | |
1096 | ||
1097 | for (i = 0, wide = 0; i < mapcount; ) | |
1098 | { | |
1099 | s = cupsFileGets(fp, line, sizeof(line)); | |
1100 | if (s == NULL) | |
1101 | break; | |
1102 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1103 | continue; | |
1104 | if (strncmp (s, "0x", 2) == 0) | |
1105 | s += 2; | |
1106 | if ((sscanf(s, "%lx", &legchar) != 1) | |
1107 | || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END))) | |
1108 | { | |
1109 | cupsFileClose(fp); | |
1110 | cupsCharmapFlush(); | |
1111 | return (NULL); | |
1112 | } | |
1113 | while ((*s != 0) && (*s != ' ') && (*s != '\t')) | |
1114 | s ++; | |
1115 | while ((*s == ' ') || (*s == '\t')) | |
1116 | s ++; | |
1117 | if (strncmp (s, "0x", 2) == 0) | |
1118 | s += 2; | |
1119 | if (sscanf(s, "%lx", &unichar) != 1) | |
1120 | { | |
1121 | cupsFileClose(fp); | |
1122 | cupsCharmapFlush(); | |
1123 | return (NULL); | |
1124 | } | |
1125 | i ++; | |
1126 | ||
1127 | /* | |
1128 | * Convert beyond Plane 0 (BMP) to Replacement Character... | |
1129 | */ | |
1130 | if (unichar > 0xffff) | |
1131 | unichar = 0xfffd; | |
1132 | ||
1133 | /* | |
1134 | * Save lead char of 2/3/4-byte legacy char... | |
1135 | */ | |
1136 | if ((legchar > 0xff) && (legchar <= 0xffff)) | |
1137 | { | |
1138 | leadchar = (cups_sbcs_t) (legchar >> 8); | |
1139 | vmap->lead2char[leadchar] = leadchar; | |
1140 | } | |
1141 | if ((legchar > 0xffff) && (legchar <= 0xffffff)) | |
1142 | { | |
1143 | leadchar = (cups_sbcs_t) (legchar >> 16); | |
1144 | vmap->lead3char[leadchar] = leadchar; | |
1145 | } | |
1146 | if (legchar > 0xffffff) | |
1147 | { | |
1148 | leadchar = (cups_sbcs_t) (legchar >> 24); | |
1149 | vmap->lead4char[leadchar] = leadchar; | |
1150 | } | |
1151 | ||
1152 | /* | |
1153 | * Save Legacy to Unicode mapping... | |
1154 | */ | |
1155 | if (legchar <= 0xffff) | |
1156 | { | |
1157 | /* | |
1158 | * Save DBCS 16-bit to Unicode mapping in indirect lookup table... | |
1159 | */ | |
1160 | crow = vmap->char2uni[(int) leadchar]; | |
1161 | if (crow == NULL) | |
1162 | { | |
1163 | crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t)); | |
1164 | if (crow == NULL) | |
1165 | { | |
1166 | cupsFileClose(fp); | |
1167 | cupsCharmapFlush(); | |
1168 | return (NULL); | |
1169 | } | |
1170 | vmap->char2uni[(int) leadchar] = crow; | |
1171 | } | |
1172 | crow += (int) (legchar & 0xff); | |
1173 | *crow = (cups_ucs2_t) unichar; | |
1174 | } | |
1175 | else | |
1176 | { | |
1177 | /* | |
1178 | * Save VBCS 32-bit to Unicode mapping in sorted list table... | |
1179 | */ | |
1180 | if (wide == 0) | |
1181 | { | |
1182 | wide = 1; | |
1183 | vmap->widecount = (mapcount - i + 1); | |
1184 | wide2uni = (_cups_wide2uni_t *) | |
1185 | calloc(vmap->widecount, sizeof(_cups_wide2uni_t)); | |
1186 | if (wide2uni == NULL) | |
1187 | { | |
1188 | cupsFileClose(fp); | |
1189 | cupsCharmapFlush(); | |
1190 | return (NULL); | |
1191 | } | |
1192 | vmap->wide2uni = wide2uni; | |
1193 | } | |
1194 | wide2uni->widechar = (cups_vbcs_t) legchar; | |
1195 | wide2uni->unichar = (cups_ucs2_t)unichar; | |
1196 | wide2uni ++; | |
1197 | } | |
1198 | ||
1199 | /* | |
1200 | * Save Unicode to legacy mapping in indirect lookup table... | |
1201 | */ | |
1202 | vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)]; | |
1203 | if (vrow == NULL) | |
1204 | { | |
1205 | vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t)); | |
1206 | if (vrow == NULL) | |
1207 | { | |
1208 | cupsFileClose(fp); | |
1209 | cupsCharmapFlush(); | |
1210 | return (NULL); | |
1211 | } | |
1212 | vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow; | |
1213 | } | |
1214 | vrow += (int) (unichar & 0xff); | |
1215 | ||
1216 | /* | |
1217 | * Convert Replacement Character to visible replacement... | |
1218 | */ | |
1219 | if (unichar == 0xfffd) | |
1220 | legchar = (unsigned long) '?'; | |
1221 | ||
1222 | /* | |
1223 | * First (oldest) legacy character uses Unicode mapping cell... | |
1224 | */ | |
1225 | if (*vrow == 0) | |
1226 | *vrow = (cups_vbcs_t) legchar; | |
1227 | } | |
1228 | vmap->charcount = (i - vmap->widecount); | |
1229 | cupsFileClose(fp); | |
1230 | return (vmap); | |
1231 | } | |
1232 | ||
1233 | /* | |
1234 | * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS. | |
1235 | */ | |
1236 | static int /* O - Count or -1 on error */ | |
1237 | conv_utf8_to_sbcs(char *dest, /* O - Target string */ | |
1238 | const cups_utf8_t *src, /* I - Source string */ | |
1239 | const int maxout, /* I - Max output */ | |
1240 | const cups_encoding_t encoding) /* I - Encoding */ | |
1241 | { | |
1242 | char *start = dest; /* Start of destination string */ | |
1243 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
1244 | cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ | |
1245 | cups_utf32_t unichar; /* Character value */ | |
1246 | int worklen; /* Internal UCS-4 string length */ | |
1247 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
1248 | /* Internal UCS-4 string */ | |
1249 | int i; /* Looping variable */ | |
1250 | ||
1251 | /* | |
1252 | * Check for valid arguments and clear output... | |
1253 | */ | |
1254 | if ((dest == NULL) | |
1255 | || (src == NULL) | |
1256 | || (maxout < 1) | |
1257 | || (maxout > CUPS_MAX_USTRING) | |
1258 | || (encoding == CUPS_UTF8)) | |
1259 | return (-1); | |
1260 | *dest = '\0'; | |
1261 | ||
1262 | /* | |
1263 | * Find legacy charset map in cache... | |
1264 | */ | |
1265 | cmap = (_cups_cmap_t *) cupsCharmapGet(encoding); | |
1266 | if (cmap == NULL) | |
1267 | return (-1); | |
1268 | ||
1269 | /* | |
1270 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
1271 | */ | |
1272 | worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING); | |
1273 | if (worklen < 0) | |
1274 | return (-1); | |
1275 | ||
1276 | /* | |
1277 | * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)... | |
1278 | */ | |
1279 | for (i = 0; i < worklen;) | |
1280 | { | |
1281 | unichar = work[i]; | |
1282 | if (unichar == 0) | |
1283 | break; | |
1284 | i ++; | |
1285 | ||
1286 | /* | |
1287 | * Check for leading BOM (and delete from output)... | |
1288 | */ | |
1289 | if ((i == 1) && (unichar == 0xfeff)) | |
1290 | continue; | |
1291 | ||
1292 | /* | |
1293 | * Convert ASCII verbatim (optimization)... | |
1294 | */ | |
1295 | if (unichar <= 0x7f) | |
1296 | { | |
1297 | *dest = (char) unichar; | |
1298 | dest ++; | |
1299 | continue; | |
1300 | } | |
1301 | ||
1302 | /* | |
1303 | * Convert unknown character to visible replacement... | |
1304 | */ | |
1305 | srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)]; | |
1306 | if (srow) | |
1307 | srow += (int) (unichar & 0xff); | |
1308 | if ((srow == NULL) || (*srow == 0)) | |
1309 | *dest = '?'; | |
1310 | else | |
1311 | *dest = (char) (*srow); | |
1312 | dest ++; | |
1313 | } | |
1314 | *dest = '\0'; | |
1315 | worklen = (int) (dest - start); | |
1316 | cupsCharmapFree(encoding); | |
1317 | return (worklen); | |
1318 | } | |
1319 | ||
1320 | /* | |
1321 | * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS. | |
1322 | */ | |
1323 | static int /* O - Count or -1 on error */ | |
1324 | conv_utf8_to_vbcs(char *dest, /* O - Target string */ | |
1325 | const cups_utf8_t *src, /* I - Source string */ | |
1326 | const int maxout, /* I - Max output */ | |
1327 | const cups_encoding_t encoding) /* I - Encoding */ | |
1328 | { | |
1329 | char *start = dest; /* Start of destination string */ | |
1330 | _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */ | |
1331 | cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ | |
1332 | cups_utf32_t unichar; /* Character value */ | |
1333 | cups_vbcs_t legchar; /* Legacy character value */ | |
1334 | int worklen; /* Internal UCS-4 string length */ | |
1335 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
1336 | /* Internal UCS-4 string */ | |
1337 | int i; /* Looping variable */ | |
1338 | ||
1339 | /* | |
1340 | * Check for valid arguments and clear output... | |
1341 | */ | |
1342 | if ((dest == NULL) | |
1343 | || (src == NULL) | |
1344 | || (maxout < 1) | |
1345 | || (maxout > CUPS_MAX_USTRING) | |
1346 | || (encoding == CUPS_UTF8)) | |
1347 | return (-1); | |
1348 | *dest = '\0'; | |
1349 | ||
1350 | /* | |
1351 | * Find legacy charset map in cache... | |
1352 | */ | |
1353 | vmap = (_cups_vmap_t *) cupsCharmapGet(encoding); | |
1354 | if (vmap == NULL) | |
1355 | return (-1); | |
1356 | ||
1357 | /* | |
1358 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
1359 | */ | |
1360 | worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING); | |
1361 | if (worklen < 0) | |
1362 | return (-1); | |
1363 | ||
1364 | /* | |
1365 | * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)... | |
1366 | */ | |
1367 | for (i = 0; i < worklen;) | |
1368 | { | |
1369 | unichar = work[i]; | |
1370 | if (unichar == 0) | |
1371 | break; | |
1372 | i ++; | |
1373 | ||
1374 | /* | |
1375 | * Check for leading BOM (and delete from output)... | |
1376 | */ | |
1377 | if ((i == 1) && (unichar == 0xfeff)) | |
1378 | continue; | |
1379 | ||
1380 | /* | |
1381 | * Convert ASCII verbatim (optimization)... | |
1382 | */ | |
1383 | if (unichar <= 0x7f) | |
1384 | { | |
1385 | *dest = (char) unichar; | |
1386 | dest ++; | |
1387 | continue; | |
1388 | } | |
1389 | ||
1390 | /* | |
1391 | * Convert unknown character to visible replacement... | |
1392 | */ | |
1393 | vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)]; | |
1394 | if (vrow) | |
1395 | vrow += (int) (unichar & 0xff); | |
1396 | if ((vrow == NULL) || (*vrow == 0)) | |
1397 | legchar = (cups_vbcs_t) '?'; | |
1398 | else | |
1399 | legchar = (cups_vbcs_t) *vrow; | |
1400 | ||
1401 | /* | |
1402 | * Save n-byte legacy character... | |
1403 | */ | |
1404 | if (legchar > 0xffffff) | |
1405 | { | |
1406 | *dest = (char) ((legchar >> 24) & 0xff); | |
1407 | dest++; | |
1408 | } | |
1409 | if (legchar > 0xffff) | |
1410 | { | |
1411 | *dest = (char) ((legchar >> 16) & 0xff); | |
1412 | dest++; | |
1413 | } | |
1414 | if (legchar > 0xff) | |
1415 | { | |
1416 | *dest = (char) ((legchar >> 8) & 0xff); | |
1417 | dest++; | |
1418 | } | |
1419 | *dest = (char) (legchar & 0xff); | |
1420 | dest ++; | |
1421 | } | |
1422 | *dest = '\0'; | |
1423 | worklen = (int) (dest - start); | |
1424 | cupsCharmapFree(encoding); | |
1425 | return (worklen); | |
1426 | } | |
1427 | ||
1428 | /* | |
1429 | * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8. | |
1430 | */ | |
1431 | static int /* O - Count or -1 on error */ | |
1432 | conv_sbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */ | |
1433 | const char *src, /* I - Source string */ | |
1434 | const int maxout, /* I - Max output */ | |
1435 | const cups_encoding_t encoding) /* I - Encoding */ | |
1436 | { | |
1437 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
1438 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
1439 | unsigned long legchar; /* Legacy character value */ | |
1440 | cups_utf32_t unichar; /* Unicode character value */ | |
1441 | int worklen; /* Internal UCS-4 string length */ | |
1442 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
1443 | /* Internal UCS-4 string */ | |
1444 | int i; /* Looping variable */ | |
1445 | ||
1446 | /* | |
1447 | * Check for valid arguments and clear output... | |
1448 | */ | |
1449 | if ((dest == NULL) | |
1450 | || (src == NULL) | |
1451 | || (maxout < 1) | |
1452 | || (maxout > CUPS_MAX_USTRING) | |
1453 | || (encoding == CUPS_UTF8)) | |
1454 | return (-1); | |
1455 | *dest = '\0'; | |
1456 | ||
1457 | /* | |
1458 | * Find legacy charset map in cache... | |
1459 | */ | |
1460 | cmap = (_cups_cmap_t *) cupsCharmapGet(encoding); | |
1461 | if (cmap == NULL) | |
1462 | return (-1); | |
1463 | ||
1464 | /* | |
1465 | * Convert input legacy charset to internal UCS-4 (and insert BOM)... | |
1466 | */ | |
1467 | work[0] = 0xfeff; | |
1468 | for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++) | |
1469 | { | |
1470 | if (*src == '\0') | |
1471 | break; | |
1472 | legchar = (unsigned long) *src; | |
1473 | ||
1474 | /* | |
1475 | * Convert ASCII verbatim (optimization)... | |
1476 | */ | |
1477 | if (legchar <= 0x7f) | |
1478 | { | |
1479 | work[i] = (cups_utf32_t) legchar; | |
1480 | i ++; | |
1481 | continue; | |
1482 | } | |
1483 | ||
1484 | /* | |
1485 | * Convert unknown character to Replacement Character... | |
1486 | */ | |
1487 | crow = &cmap->char2uni[0]; | |
1488 | crow += (int) legchar; | |
1489 | if (*crow == 0) | |
1490 | unichar = 0xfffd; | |
1491 | else | |
1492 | unichar = (cups_utf32_t) *crow; | |
1493 | work[i] = unichar; | |
1494 | i ++; | |
1495 | } | |
1496 | work[i] = 0; | |
1497 | ||
1498 | /* | |
1499 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
1500 | */ | |
1501 | worklen = cupsUTF32ToUTF8(dest, work, maxout); | |
1502 | cupsCharmapFree(encoding); | |
1503 | return (worklen); | |
1504 | } | |
1505 | ||
1506 | ||
1507 | /* | |
1508 | * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8. | |
1509 | */ | |
1510 | static int /* O - Count or -1 on error */ | |
1511 | conv_vbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */ | |
1512 | const char *src, /* I - Source string */ | |
1513 | const int maxout, /* I - Max output */ | |
1514 | const cups_encoding_t encoding) /* I - Encoding */ | |
1515 | { | |
1516 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
1517 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
1518 | _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */ | |
1519 | cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */ | |
1520 | cups_vbcs_t legchar; /* Legacy character value */ | |
1521 | cups_utf32_t unichar; /* Unicode character value */ | |
1522 | int i; /* Looping variable */ | |
1523 | int worklen; /* Internal UCS-4 string length */ | |
1524 | cups_utf32_t work[CUPS_MAX_USTRING]; | |
1525 | /* Internal UCS-4 string */ | |
1526 | ||
1527 | /* | |
1528 | * Check for valid arguments and clear output... | |
1529 | */ | |
1530 | if ((dest == NULL) | |
1531 | || (src == NULL) | |
1532 | || (maxout < 1) | |
1533 | || (maxout > CUPS_MAX_USTRING) | |
1534 | || (encoding == CUPS_UTF8)) | |
1535 | return (-1); | |
1536 | *dest = '\0'; | |
1537 | ||
1538 | /* | |
1539 | * Find legacy charset map in cache... | |
1540 | */ | |
1541 | vmap = (_cups_vmap_t *) cupsCharmapGet(encoding); | |
1542 | if (vmap == NULL) | |
1543 | return (-1); | |
1544 | ||
1545 | /* | |
1546 | * Convert input legacy charset to internal UCS-4 (and insert BOM)... | |
1547 | */ | |
1548 | work[0] = 0xfeff; | |
1549 | for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++) | |
1550 | { | |
1551 | if (*src == '\0') | |
1552 | break; | |
1553 | legchar = (cups_vbcs_t) *src; | |
1554 | leadchar = (cups_sbcs_t) *src; | |
1555 | ||
1556 | /* | |
1557 | * Convert ASCII verbatim (optimization)... | |
1558 | */ | |
1559 | if (legchar <= 0x7f) | |
1560 | { | |
1561 | work[i] = (cups_utf32_t) legchar; | |
1562 | i ++; | |
1563 | continue; | |
1564 | } | |
1565 | ||
1566 | /* | |
1567 | * Convert 2-byte legacy character... | |
1568 | */ | |
1569 | if (vmap->lead2char[(int) leadchar] == leadchar) | |
1570 | { | |
1571 | src ++; | |
1572 | if (*src == '\0') | |
1573 | return (-1); | |
1574 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1575 | ||
1576 | /* | |
1577 | * Convert unknown character to Replacement Character... | |
1578 | */ | |
1579 | crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)]; | |
1580 | if (crow) | |
1581 | crow += (int) (legchar & 0xff); | |
1582 | if ((crow == NULL) || (*crow == 0)) | |
1583 | unichar = 0xfffd; | |
1584 | else | |
1585 | unichar = (cups_utf32_t) *crow; | |
1586 | work[i] = unichar; | |
1587 | i ++; | |
1588 | continue; | |
1589 | } | |
1590 | ||
1591 | /* | |
1592 | * Fetch 3-byte or 4-byte legacy character... | |
1593 | */ | |
1594 | if (vmap->lead3char[(int) leadchar] == leadchar) | |
1595 | { | |
1596 | src ++; | |
1597 | if (*src == '\0') | |
1598 | return (-1); | |
1599 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1600 | src ++; | |
1601 | if (*src == '\0') | |
1602 | return (-1); | |
1603 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1604 | } | |
1605 | else if (vmap->lead4char[(int) leadchar] == leadchar) | |
1606 | { | |
1607 | src ++; | |
1608 | if (*src == '\0') | |
1609 | return (-1); | |
1610 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1611 | src ++; | |
1612 | if (*src == '\0') | |
1613 | return (-1); | |
1614 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1615 | src ++; | |
1616 | if (*src == '\0') | |
1617 | return (-1); | |
1618 | legchar = (legchar << 8) | (cups_vbcs_t) *src; | |
1619 | } | |
1620 | else | |
1621 | return (-1); | |
1622 | ||
1623 | /* | |
1624 | * Find 3-byte or 4-byte legacy character... | |
1625 | */ | |
1626 | wide2uni = vmap->wide2uni; | |
1627 | wide2uni = (_cups_wide2uni_t *) bsearch(&legchar, | |
1628 | vmap->wide2uni, | |
1629 | vmap->widecount, | |
1630 | sizeof(_cups_wide2uni_t), | |
1631 | compare_wide); | |
1632 | ||
1633 | /* | |
1634 | * Convert unknown character to Replacement Character... | |
1635 | */ | |
1636 | if ((wide2uni == NULL) || (wide2uni->unichar == 0)) | |
1637 | unichar = 0xfffd; | |
1638 | else | |
1639 | unichar = wide2uni->unichar; | |
1640 | work[i] = unichar; | |
1641 | i ++; | |
1642 | } | |
1643 | work[i] = 0; | |
1644 | ||
1645 | /* | |
1646 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
1647 | */ | |
1648 | worklen = cupsUTF32ToUTF8(dest, work, maxout); | |
1649 | cupsCharmapFree(encoding); | |
1650 | return (worklen); | |
1651 | } | |
1652 | ||
1653 | /* | |
1654 | * 'compare_wide()' - Compare key for wide (VBCS) match. | |
1655 | */ | |
1656 | static int | |
1657 | compare_wide(const void *k1, /* I - Key char */ | |
1658 | const void *k2) /* I - Map char */ | |
1659 | { | |
1660 | cups_vbcs_t *kp = (cups_vbcs_t *) k1; | |
1661 | /* Key char pointer */ | |
1662 | _cups_wide2uni_t *mp = (_cups_wide2uni_t *) k2; | |
1663 | /* Map char pointer */ | |
1664 | cups_vbcs_t key; /* Legacy key character */ | |
1665 | cups_vbcs_t map; /* Legacy map character */ | |
1666 | int result; /* Result Value */ | |
1667 | ||
1668 | key = *kp; | |
1669 | map = mp->widechar; | |
1670 | if (key >= map) | |
1671 | result = (int) (key - map); | |
1672 | else | |
1673 | result = -1 * ((int) (map - key)); | |
1674 | return (result); | |
1675 | } | |
1676 | ||
1677 | ||
1678 | /* | |
fa73b229 | 1679 | * End of "$Id: transcode.c 4967 2006-01-24 03:42:15Z mike $" |
ef416fc2 | 1680 | */ |