]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | /* |
e1d6a774 | 2 | * "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $" |
ef416fc2 | 3 | * |
4 | * Transcoding support for the Common UNIX Printing System (CUPS). | |
5 | * | |
6 | * Copyright 1997-2006 by Easy Software Products. | |
7 | * | |
8 | * These coded instructions, statements, and computer programs are | |
9 | * the property of Easy Software Products and are protected by Federal | |
10 | * copyright law. Distribution and use rights are outlined in the | |
11 | * file "LICENSE.txt" which should have been included with this file. | |
12 | * If this file is missing or damaged please contact Easy Software | |
13 | * Products at: | |
14 | * | |
15 | * Attn: CUPS Licensing Information | |
16 | * Easy Software Products | |
17 | * 44141 Airport View Drive, Suite 204 | |
18 | * Hollywood, Maryland 20636 USA | |
19 | * | |
20 | * Voice: (301) 373-9600 | |
21 | * EMail: cups-info@cups.org | |
22 | * WWW: http://www.cups.org | |
23 | * | |
24 | * Contents: | |
25 | * | |
fa73b229 | 26 | * _cupsCharmapFlush() - Flush all character set maps out of cache. |
e1d6a774 | 27 | * _cupsCharmapFree() - Free a character set map. |
28 | * _cupsCharmapGet() - Get a character set map. | |
ef416fc2 | 29 | * cupsCharsetToUTF8() - Convert legacy character set to UTF-8. |
e1d6a774 | 30 | * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set. |
ef416fc2 | 31 | * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32. |
32 | * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8. | |
e1d6a774 | 33 | * compare_wide() - Compare key for wide (VBCS) match. |
34 | * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8. | |
ef416fc2 | 35 | * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS. |
36 | * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS. | |
ef416fc2 | 37 | * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8. |
e1d6a774 | 38 | * free_sbcs_charmap() - Free memory used by a single byte character set. |
39 | * free_vbcs_charmap() - Free memory used by a variable byte character set. | |
40 | * get_charmap_count() - Count lines in a charmap file. | |
41 | * get_sbcs_charmap() - Get SBCS Charmap. | |
42 | * get_vbcs_charmap() - Get DBCS/VBCS Charmap. | |
ef416fc2 | 43 | */ |
44 | ||
45 | /* | |
46 | * Include necessary headers... | |
47 | */ | |
48 | ||
49 | #include "globals.h" | |
e1d6a774 | 50 | #include "debug.h" |
ef416fc2 | 51 | #include <stdlib.h> |
52 | #include <errno.h> | |
53 | #include <time.h> | |
54 | ||
55 | ||
56 | /* | |
e1d6a774 | 57 | * Local functions... |
ef416fc2 | 58 | */ |
59 | ||
e1d6a774 | 60 | static int compare_wide(const void *k1, const void *k2); |
61 | static int conv_sbcs_to_utf8(cups_utf8_t *dest, | |
62 | const cups_sbcs_t *src, | |
63 | int maxout, | |
64 | const cups_encoding_t encoding); | |
65 | static int conv_utf8_to_sbcs(cups_sbcs_t *dest, | |
66 | const cups_utf8_t *src, | |
67 | int maxout, | |
68 | const cups_encoding_t encoding); | |
69 | static int conv_utf8_to_vbcs(cups_sbcs_t *dest, | |
70 | const cups_utf8_t *src, | |
71 | int maxout, | |
72 | const cups_encoding_t encoding); | |
73 | static int conv_vbcs_to_utf8(cups_utf8_t *dest, | |
74 | const cups_sbcs_t *src, | |
75 | int maxout, | |
76 | const cups_encoding_t encoding); | |
77 | static void free_sbcs_charmap(_cups_cmap_t *sbcs); | |
78 | static void free_vbcs_charmap(_cups_vmap_t *vbcs); | |
79 | static int get_charmap_count(cups_file_t *fp); | |
80 | static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding, | |
81 | const char *filename); | |
82 | static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding, | |
83 | const char *filename); | |
84 | ||
ef416fc2 | 85 | |
86 | /* | |
e1d6a774 | 87 | * '_cupsCharmapFlush()' - Flush all character set maps out of cache. |
ef416fc2 | 88 | */ |
89 | ||
e1d6a774 | 90 | void |
91 | _cupsCharmapFlush(_cups_globals_t *cg) /* I - Global data */ | |
ef416fc2 | 92 | { |
e1d6a774 | 93 | _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */ |
94 | *cnext; /* Next Legacy SBCS Charset Map */ | |
95 | _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */ | |
96 | *vnext; /* Next Legacy VBCS Charset Map */ | |
ef416fc2 | 97 | |
98 | ||
99 | /* | |
e1d6a774 | 100 | * Loop through SBCS charset map cache, free all memory... |
ef416fc2 | 101 | */ |
102 | ||
e1d6a774 | 103 | for (cmap = cg->cmap_cache; cmap; cmap = cnext) |
104 | { | |
105 | cnext = cmap->next; | |
ef416fc2 | 106 | |
e1d6a774 | 107 | free_sbcs_charmap(cmap); |
108 | } | |
ef416fc2 | 109 | |
e1d6a774 | 110 | cg->cmap_cache = NULL; |
ef416fc2 | 111 | |
112 | /* | |
e1d6a774 | 113 | * Loop through DBCS/VBCS charset map cache, free all memory... |
ef416fc2 | 114 | */ |
115 | ||
e1d6a774 | 116 | for (vmap = cg->vmap_cache; vmap; vmap = vnext) |
117 | { | |
118 | vnext = vmap->next; | |
119 | ||
120 | free_vbcs_charmap(vmap); | |
121 | ||
122 | free(vmap); | |
123 | } | |
124 | ||
125 | cg->vmap_cache = NULL; | |
ef416fc2 | 126 | } |
127 | ||
e1d6a774 | 128 | |
ef416fc2 | 129 | /* |
e1d6a774 | 130 | * '_cupsCharmapFree()' - Free a character set map. |
ef416fc2 | 131 | * |
e1d6a774 | 132 | * This does not actually free; use '_cupsCharmapFlush()' for that. |
ef416fc2 | 133 | */ |
e1d6a774 | 134 | |
ef416fc2 | 135 | void |
e1d6a774 | 136 | _cupsCharmapFree( |
137 | const cups_encoding_t encoding) /* I - Encoding */ | |
ef416fc2 | 138 | { |
e1d6a774 | 139 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ |
140 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
141 | _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */ | |
142 | ||
ef416fc2 | 143 | |
144 | /* | |
145 | * See if we already have this SBCS charset map loaded... | |
146 | */ | |
e1d6a774 | 147 | |
148 | for (cmap = cg->cmap_cache; cmap; cmap = cmap->next) | |
ef416fc2 | 149 | { |
150 | if (cmap->encoding == encoding) | |
151 | { | |
152 | if (cmap->used > 0) | |
153 | cmap->used --; | |
e1d6a774 | 154 | |
ef416fc2 | 155 | return; |
156 | } | |
157 | } | |
158 | ||
159 | /* | |
160 | * See if we already have this DBCS/VBCS charset map loaded... | |
161 | */ | |
e1d6a774 | 162 | |
163 | for (vmap = cg->vmap_cache; vmap; vmap = vmap->next) | |
ef416fc2 | 164 | { |
165 | if (vmap->encoding == encoding) | |
166 | { | |
167 | if (vmap->used > 0) | |
168 | vmap->used --; | |
169 | return; | |
170 | } | |
171 | } | |
fa73b229 | 172 | } |
173 | ||
174 | ||
175 | /* | |
e1d6a774 | 176 | * '_cupsCharmapGet()' - Get a character set map. |
177 | * | |
178 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
179 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
180 | * This code does not handle multiple-byte character sets (MBCS) | |
181 | * (such as ISO-2022-JP) with charset switching via escapes... | |
fa73b229 | 182 | */ |
183 | ||
e1d6a774 | 184 | void * /* O - Charset map pointer */ |
185 | _cupsCharmapGet( | |
186 | const cups_encoding_t encoding) /* I - Encoding */ | |
fa73b229 | 187 | { |
e1d6a774 | 188 | char filename[1024]; /* Filename for charset map file */ |
189 | _cups_globals_t *cg = _cupsGlobals(); /* Global data */ | |
190 | ||
fa73b229 | 191 | |
e1d6a774 | 192 | DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding)); |
ef416fc2 | 193 | |
194 | /* | |
e1d6a774 | 195 | * Check for valid arguments... |
ef416fc2 | 196 | */ |
e1d6a774 | 197 | |
198 | if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) | |
ef416fc2 | 199 | { |
e1d6a774 | 200 | DEBUG_puts(" Bad encoding, returning NULL!"); |
201 | return (NULL); | |
ef416fc2 | 202 | } |
ef416fc2 | 203 | |
204 | /* | |
e1d6a774 | 205 | * Get the data directory and charset map name... |
ef416fc2 | 206 | */ |
e1d6a774 | 207 | |
208 | snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt", | |
209 | cg->cups_datadir, _cupsEncodingName(encoding)); | |
210 | ||
211 | DEBUG_printf((" filename=\"%s\"\n", filename)); | |
212 | ||
213 | /* | |
214 | * Read charset map input file into cache... | |
215 | */ | |
216 | ||
217 | if (encoding < CUPS_ENCODING_SBCS_END) | |
218 | return (get_sbcs_charmap(encoding, filename)); | |
219 | else if (encoding < CUPS_ENCODING_VBCS_END) | |
220 | return (get_vbcs_charmap(encoding, filename)); | |
221 | else | |
222 | return (NULL); | |
ef416fc2 | 223 | } |
224 | ||
e1d6a774 | 225 | |
ef416fc2 | 226 | /* |
e1d6a774 | 227 | * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8. |
ef416fc2 | 228 | * |
229 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
230 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
231 | * This code does not handle multiple-byte character sets (MBCS) | |
232 | * (such as ISO-2022-JP) with charset switching via escapes... | |
233 | */ | |
e1d6a774 | 234 | |
235 | int /* O - Count or -1 on error */ | |
236 | cupsCharsetToUTF8( | |
237 | cups_utf8_t *dest, /* O - Target string */ | |
238 | const char *src, /* I - Source string */ | |
239 | const int maxout, /* I - Max output */ | |
240 | const cups_encoding_t encoding) /* I - Encoding */ | |
ef416fc2 | 241 | { |
242 | /* | |
243 | * Check for valid arguments... | |
244 | */ | |
245 | ||
e1d6a774 | 246 | DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n", |
247 | dest, src, maxout, encoding)); | |
248 | ||
249 | if (dest) | |
250 | *dest = '\0'; | |
251 | ||
ef416fc2 | 252 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) |
e1d6a774 | 253 | { |
254 | DEBUG_puts(" Bad arguments, returning -1"); | |
ef416fc2 | 255 | return (-1); |
e1d6a774 | 256 | } |
ef416fc2 | 257 | |
258 | /* | |
259 | * Handle identity conversions... | |
260 | */ | |
261 | ||
262 | if (encoding == CUPS_UTF8 || | |
263 | encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) | |
264 | { | |
e1d6a774 | 265 | strlcpy((char *)dest, src, maxout); |
266 | return (strlen((char *)dest)); | |
ef416fc2 | 267 | } |
268 | ||
269 | /* | |
e1d6a774 | 270 | * Convert input legacy charset to UTF-8... |
ef416fc2 | 271 | */ |
e1d6a774 | 272 | |
ef416fc2 | 273 | if (encoding < CUPS_ENCODING_SBCS_END) |
e1d6a774 | 274 | return (conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding)); |
ef416fc2 | 275 | else if (encoding < CUPS_ENCODING_VBCS_END) |
e1d6a774 | 276 | return (conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding)); |
ef416fc2 | 277 | else |
e1d6a774 | 278 | { |
279 | puts(" Bad encoding, returning -1"); | |
ef416fc2 | 280 | return (-1); |
e1d6a774 | 281 | } |
ef416fc2 | 282 | } |
283 | ||
e1d6a774 | 284 | |
ef416fc2 | 285 | /* |
e1d6a774 | 286 | * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set. |
ef416fc2 | 287 | * |
288 | * This code handles single-byte (SBCS), double-byte (DBCS), and | |
289 | * variable-byte (VBCS) character sets _without_ charset escapes... | |
290 | * This code does not handle multiple-byte character sets (MBCS) | |
291 | * (such as ISO-2022-JP) with charset switching via escapes... | |
292 | */ | |
e1d6a774 | 293 | |
294 | int /* O - Count or -1 on error */ | |
295 | cupsUTF8ToCharset( | |
296 | char *dest, /* O - Target string */ | |
297 | const cups_utf8_t *src, /* I - Source string */ | |
298 | const int maxout, /* I - Max output */ | |
299 | const cups_encoding_t encoding) /* I - Encoding */ | |
ef416fc2 | 300 | { |
301 | /* | |
302 | * Check for valid arguments... | |
303 | */ | |
304 | ||
305 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
e1d6a774 | 306 | { |
307 | if (dest) | |
308 | *dest = '\0'; | |
309 | ||
ef416fc2 | 310 | return (-1); |
e1d6a774 | 311 | } |
ef416fc2 | 312 | |
313 | /* | |
314 | * Handle identity conversions... | |
315 | */ | |
316 | ||
317 | if (encoding == CUPS_UTF8 || | |
318 | encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) | |
319 | { | |
e1d6a774 | 320 | strlcpy(dest, (char *)src, maxout); |
321 | return (strlen(dest)); | |
ef416fc2 | 322 | } |
323 | ||
324 | /* | |
e1d6a774 | 325 | * Convert input UTF-8 to legacy charset... |
ef416fc2 | 326 | */ |
e1d6a774 | 327 | |
ef416fc2 | 328 | if (encoding < CUPS_ENCODING_SBCS_END) |
e1d6a774 | 329 | return (conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding)); |
ef416fc2 | 330 | else if (encoding < CUPS_ENCODING_VBCS_END) |
e1d6a774 | 331 | return (conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding)); |
ef416fc2 | 332 | else |
333 | return (-1); | |
334 | } | |
335 | ||
ef416fc2 | 336 | |
337 | /* | |
338 | * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32. | |
339 | * | |
340 | * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows... | |
341 | * | |
342 | * UTF-32 char UTF-8 char(s) | |
343 | * -------------------------------------------------- | |
e1d6a774 | 344 | * 0 to 127 = 0xxxxxxx (US-ASCII) |
ef416fc2 | 345 | * 128 to 2047 = 110xxxxx 10yyyyyy |
346 | * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz | |
e1d6a774 | 347 | * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx |
ef416fc2 | 348 | * |
349 | * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4, | |
350 | * which would convert to five- or six-octet UTF-8 sequences... | |
ef416fc2 | 351 | */ |
e1d6a774 | 352 | |
353 | int /* O - Count or -1 on error */ | |
354 | cupsUTF8ToUTF32( | |
355 | cups_utf32_t *dest, /* O - Target string */ | |
356 | const cups_utf8_t *src, /* I - Source string */ | |
357 | const int maxout) /* I - Max output */ | |
ef416fc2 | 358 | { |
e1d6a774 | 359 | size_t srclen; /* Source string length */ |
360 | int i; /* Looping variable */ | |
361 | cups_utf8_t ch; /* Character value */ | |
362 | cups_utf8_t next; /* Next character value */ | |
363 | cups_utf32_t ch32; /* UTF-32 character value */ | |
364 | ||
ef416fc2 | 365 | |
366 | /* | |
367 | * Check for valid arguments and clear output... | |
368 | */ | |
e1d6a774 | 369 | |
370 | if (dest) | |
371 | *dest = 0; | |
372 | ||
373 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
ef416fc2 | 374 | return (-1); |
ef416fc2 | 375 | |
376 | /* | |
377 | * Convert input UTF-8 to output UTF-32 (and insert BOM)... | |
378 | */ | |
e1d6a774 | 379 | |
380 | *dest++ = 0xfeff; | |
381 | srclen = strlen((char *)src); | |
382 | ||
383 | for (i = maxout - 1; *src && i > 0; i --) | |
ef416fc2 | 384 | { |
e1d6a774 | 385 | ch = *src++; |
ef416fc2 | 386 | |
387 | /* | |
388 | * Convert UTF-8 character(s) to UTF-32 character... | |
389 | */ | |
e1d6a774 | 390 | |
391 | if (!(ch & 0x80)) | |
ef416fc2 | 392 | { |
393 | /* | |
394 | * One-octet UTF-8 <= 127 (US-ASCII)... | |
395 | */ | |
e1d6a774 | 396 | |
397 | *dest++ = ch; | |
ef416fc2 | 398 | } |
399 | else if ((ch & 0xe0) == 0xc0) | |
400 | { | |
401 | /* | |
402 | * Two-octet UTF-8 <= 2047 (Latin-x)... | |
403 | */ | |
e1d6a774 | 404 | |
405 | next = *src++; | |
406 | if (!next) | |
ef416fc2 | 407 | return (-1); |
e1d6a774 | 408 | |
ef416fc2 | 409 | ch32 = ((ch & 0x1f) << 6) | (next & 0x3f); |
410 | ||
411 | /* | |
412 | * Check for non-shortest form (invalid UTF-8)... | |
413 | */ | |
e1d6a774 | 414 | |
415 | if (ch32 < 0x80) | |
ef416fc2 | 416 | return (-1); |
e1d6a774 | 417 | |
418 | *dest++ = ch32; | |
ef416fc2 | 419 | } |
420 | else if ((ch & 0xf0) == 0xe0) | |
421 | { | |
422 | /* | |
423 | * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)... | |
424 | */ | |
e1d6a774 | 425 | |
426 | next = *src++; | |
427 | if (!next) | |
ef416fc2 | 428 | return (-1); |
e1d6a774 | 429 | |
430 | ch32 = ((ch & 0x0f) << 6) | (next & 0x3f); | |
431 | ||
432 | next = *src++; | |
433 | if (!next) | |
ef416fc2 | 434 | return (-1); |
e1d6a774 | 435 | |
436 | ch32 = (ch32 << 6) | (next & 0x3f); | |
ef416fc2 | 437 | |
438 | /* | |
439 | * Check for non-shortest form (invalid UTF-8)... | |
440 | */ | |
e1d6a774 | 441 | |
442 | if (ch32 < 0x800) | |
ef416fc2 | 443 | return (-1); |
e1d6a774 | 444 | |
445 | *dest++ = ch32; | |
ef416fc2 | 446 | } |
447 | else if ((ch & 0xf8) == 0xf0) | |
448 | { | |
449 | /* | |
e1d6a774 | 450 | * Four-octet UTF-8... |
ef416fc2 | 451 | */ |
e1d6a774 | 452 | |
453 | next = *src++; | |
454 | if (!next) | |
ef416fc2 | 455 | return (-1); |
e1d6a774 | 456 | |
457 | ch32 = ((ch & 0x07) << 6) | (next & 0x3f); | |
458 | ||
459 | next = *src++; | |
460 | if (!next) | |
461 | return (-1); | |
462 | ||
463 | ch32 = (ch32 << 6) | (next & 0x3f); | |
464 | ||
465 | next = *src++; | |
466 | if (!next) | |
467 | return (-1); | |
468 | ||
469 | ch32 = (ch32 << 6) | (next & 0x3f); | |
470 | ||
ef416fc2 | 471 | /* |
e1d6a774 | 472 | * Check for non-shortest form (invalid UTF-8)... |
ef416fc2 | 473 | */ |
e1d6a774 | 474 | |
475 | if (ch32 < 0x10000) | |
476 | return (-1); | |
477 | ||
478 | *dest++ = ch32; | |
ef416fc2 | 479 | } |
480 | else | |
481 | { | |
482 | /* | |
e1d6a774 | 483 | * More than 4-octet (invalid UTF-8 sequence)... |
ef416fc2 | 484 | */ |
e1d6a774 | 485 | |
ef416fc2 | 486 | return (-1); |
487 | } | |
488 | ||
489 | /* | |
490 | * Check for UTF-16 surrogate (illegal UTF-8)... | |
491 | */ | |
ef416fc2 | 492 | |
e1d6a774 | 493 | if (*dest >= 0xd800 && *dest <= 0xdfff) |
ef416fc2 | 494 | return (-1); |
495 | } | |
e1d6a774 | 496 | |
ef416fc2 | 497 | *dest = 0; |
e1d6a774 | 498 | |
ef416fc2 | 499 | return (i); |
500 | } | |
501 | ||
e1d6a774 | 502 | |
ef416fc2 | 503 | /* |
504 | * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8. | |
505 | * | |
506 | * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows... | |
507 | * | |
508 | * UTF-32 char UTF-8 char(s) | |
509 | * -------------------------------------------------- | |
e1d6a774 | 510 | * 0 to 127 = 0xxxxxxx (US-ASCII) |
ef416fc2 | 511 | * 128 to 2047 = 110xxxxx 10yyyyyy |
512 | * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz | |
e1d6a774 | 513 | * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx |
ef416fc2 | 514 | * |
515 | * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4, | |
516 | * which would convert to five- or six-octet UTF-8 sequences... | |
ef416fc2 | 517 | */ |
e1d6a774 | 518 | |
519 | int /* O - Count or -1 on error */ | |
520 | cupsUTF32ToUTF8( | |
521 | cups_utf8_t *dest, /* O - Target string */ | |
522 | const cups_utf32_t *src, /* I - Source string */ | |
523 | const int maxout) /* I - Max output */ | |
ef416fc2 | 524 | { |
e1d6a774 | 525 | cups_utf8_t *start; /* Start of destination string */ |
526 | int i; /* Looping variable */ | |
527 | int swap; /* Byte-swap input to output */ | |
528 | cups_utf32_t ch; /* Character value */ | |
529 | ||
ef416fc2 | 530 | |
531 | /* | |
532 | * Check for valid arguments and clear output... | |
533 | */ | |
e1d6a774 | 534 | |
535 | if (dest) | |
536 | *dest = '\0'; | |
537 | ||
538 | if (!dest || !src || maxout < 1) | |
ef416fc2 | 539 | return (-1); |
ef416fc2 | 540 | |
541 | /* | |
542 | * Check for leading BOM in UTF-32 and inverted BOM... | |
543 | */ | |
e1d6a774 | 544 | |
545 | start = dest; | |
546 | swap = *src == 0xfffe0000; | |
547 | ||
548 | if (*src == 0xfffe0000 || *src == 0xfeff) | |
549 | src ++; | |
ef416fc2 | 550 | |
551 | /* | |
552 | * Convert input UTF-32 to output UTF-8... | |
553 | */ | |
e1d6a774 | 554 | |
555 | for (i = maxout - 1; *src && i > 0;) | |
ef416fc2 | 556 | { |
e1d6a774 | 557 | ch = *src++; |
ef416fc2 | 558 | |
559 | /* | |
560 | * Byte swap input UTF-32, if necessary... | |
e1d6a774 | 561 | * (only byte-swapping 24 of 32 bits) |
ef416fc2 | 562 | */ |
e1d6a774 | 563 | |
ef416fc2 | 564 | if (swap) |
565 | ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000)); | |
566 | ||
567 | /* | |
e1d6a774 | 568 | * Check for beyond Plane 16 (invalid UTF-32)... |
ef416fc2 | 569 | */ |
ef416fc2 | 570 | |
ef416fc2 | 571 | if (ch > 0x10ffff) |
572 | return (-1); | |
573 | ||
ef416fc2 | 574 | /* |
575 | * Convert UTF-32 character to UTF-8 character(s)... | |
576 | */ | |
e1d6a774 | 577 | |
578 | if (ch < 0x80) | |
ef416fc2 | 579 | { |
580 | /* | |
581 | * One-octet UTF-8 <= 127 (US-ASCII)... | |
582 | */ | |
e1d6a774 | 583 | |
584 | *dest++ = (cups_utf8_t)ch; | |
585 | i --; | |
ef416fc2 | 586 | } |
e1d6a774 | 587 | else if (ch < 0x800) |
ef416fc2 | 588 | { |
589 | /* | |
590 | * Two-octet UTF-8 <= 2047 (Latin-x)... | |
591 | */ | |
e1d6a774 | 592 | |
593 | if (i < 2) | |
594 | return (-1); | |
595 | ||
596 | *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f)); | |
597 | *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); | |
598 | i -= 2; | |
ef416fc2 | 599 | } |
e1d6a774 | 600 | else if (ch < 0x10000) |
ef416fc2 | 601 | { |
602 | /* | |
603 | * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)... | |
604 | */ | |
e1d6a774 | 605 | |
606 | if (i < 3) | |
607 | return (-1); | |
608 | ||
609 | *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f)); | |
610 | *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f)); | |
611 | *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); | |
612 | i -= 3; | |
613 | } | |
614 | else | |
615 | { | |
616 | /* | |
617 | * Four-octet UTF-8... | |
618 | */ | |
619 | ||
620 | if (i < 4) | |
621 | return (-1); | |
622 | ||
623 | *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07)); | |
624 | *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f)); | |
625 | *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f)); | |
626 | *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); | |
627 | i -= 4; | |
ef416fc2 | 628 | } |
629 | } | |
e1d6a774 | 630 | |
ef416fc2 | 631 | *dest = '\0'; |
e1d6a774 | 632 | |
633 | return ((int)(dest - start)); | |
ef416fc2 | 634 | } |
635 | ||
e1d6a774 | 636 | |
ef416fc2 | 637 | /* |
e1d6a774 | 638 | * 'compare_wide()' - Compare key for wide (VBCS) match. |
639 | */ | |
640 | ||
641 | static int | |
642 | compare_wide(const void *k1, /* I - Key char */ | |
643 | const void *k2) /* I - Map char */ | |
644 | { | |
645 | cups_vbcs_t key; /* Legacy key character */ | |
646 | cups_vbcs_t map; /* Legacy map character */ | |
647 | ||
648 | ||
649 | key = *((cups_vbcs_t *)k1); | |
650 | map = ((_cups_wide2uni_t *)k2)->widechar; | |
651 | ||
652 | return ((int)(key - map)); | |
653 | } | |
654 | ||
655 | ||
656 | /* | |
657 | * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8. | |
ef416fc2 | 658 | */ |
e1d6a774 | 659 | |
660 | static int /* O - Count or -1 on error */ | |
661 | conv_sbcs_to_utf8( | |
662 | cups_utf8_t *dest, /* O - Target string */ | |
663 | const cups_sbcs_t *src, /* I - Source string */ | |
664 | int maxout, /* I - Max output */ | |
665 | const cups_encoding_t encoding) /* I - Encoding */ | |
ef416fc2 | 666 | { |
e1d6a774 | 667 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ |
668 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
669 | cups_sbcs_t legchar; /* Legacy character value */ | |
670 | cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */ | |
671 | *workptr; /* Pointer into string */ | |
672 | ||
ef416fc2 | 673 | |
674 | /* | |
e1d6a774 | 675 | * Find legacy charset map in cache... |
ef416fc2 | 676 | */ |
e1d6a774 | 677 | |
678 | if ((cmap = (_cups_cmap_t *)_cupsCharmapGet(encoding)) == NULL) | |
ef416fc2 | 679 | return (-1); |
ef416fc2 | 680 | |
681 | /* | |
e1d6a774 | 682 | * Convert input legacy charset to internal UCS-4 (and insert BOM)... |
ef416fc2 | 683 | */ |
ef416fc2 | 684 | |
e1d6a774 | 685 | work[0] = 0xfeff; |
686 | for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);) | |
ef416fc2 | 687 | { |
e1d6a774 | 688 | legchar = *src++; |
ef416fc2 | 689 | |
690 | /* | |
e1d6a774 | 691 | * Convert ASCII verbatim (optimization)... |
ef416fc2 | 692 | */ |
ef416fc2 | 693 | |
e1d6a774 | 694 | if (legchar < 0x80) |
695 | *workptr++ = (cups_utf32_t)legchar; | |
696 | else | |
ef416fc2 | 697 | { |
e1d6a774 | 698 | /* |
699 | * Convert unknown character to Replacement Character... | |
700 | */ | |
ef416fc2 | 701 | |
e1d6a774 | 702 | crow = cmap->char2uni + legchar; |
703 | ||
704 | if (!*crow) | |
705 | *workptr++ = 0xfffd; | |
706 | else | |
707 | *workptr++ = (cups_utf32_t)*crow; | |
ef416fc2 | 708 | } |
ef416fc2 | 709 | } |
e1d6a774 | 710 | |
711 | *workptr = 0; | |
712 | ||
713 | /* | |
714 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
715 | */ | |
716 | ||
717 | _cupsCharmapFree(encoding); | |
718 | ||
719 | return (cupsUTF32ToUTF8(dest, work, maxout)); | |
ef416fc2 | 720 | } |
721 | ||
e1d6a774 | 722 | |
ef416fc2 | 723 | /* |
e1d6a774 | 724 | * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS. |
ef416fc2 | 725 | */ |
e1d6a774 | 726 | |
727 | static int /* O - Count or -1 on error */ | |
728 | conv_utf8_to_sbcs( | |
729 | cups_sbcs_t *dest, /* O - Target string */ | |
730 | const cups_utf8_t *src, /* I - Source string */ | |
731 | int maxout, /* I - Max output */ | |
732 | const cups_encoding_t encoding) /* I - Encoding */ | |
ef416fc2 | 733 | { |
e1d6a774 | 734 | cups_sbcs_t *start; /* Start of destination string */ |
735 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
736 | cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ | |
737 | cups_utf32_t unichar; /* Character value */ | |
738 | cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */ | |
739 | *workptr; /* Pointer into string */ | |
740 | ||
ef416fc2 | 741 | |
742 | /* | |
e1d6a774 | 743 | * Find legacy charset map in cache... |
ef416fc2 | 744 | */ |
e1d6a774 | 745 | |
746 | if ((cmap = (_cups_cmap_t *) _cupsCharmapGet(encoding)) == NULL) | |
ef416fc2 | 747 | return (-1); |
ef416fc2 | 748 | |
749 | /* | |
e1d6a774 | 750 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... |
ef416fc2 | 751 | */ |
e1d6a774 | 752 | |
753 | if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0) | |
754 | return (-1); | |
ef416fc2 | 755 | |
756 | /* | |
e1d6a774 | 757 | * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)... |
ef416fc2 | 758 | */ |
e1d6a774 | 759 | |
760 | for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --) | |
ef416fc2 | 761 | { |
e1d6a774 | 762 | unichar = *workptr++; |
763 | if (!unichar) | |
ef416fc2 | 764 | break; |
ef416fc2 | 765 | |
766 | /* | |
e1d6a774 | 767 | * Convert ASCII verbatim (optimization)... |
ef416fc2 | 768 | */ |
ef416fc2 | 769 | |
e1d6a774 | 770 | if (unichar < 0x80) |
771 | { | |
772 | *dest++ = (cups_sbcs_t)unichar; | |
773 | continue; | |
774 | } | |
ef416fc2 | 775 | |
776 | /* | |
e1d6a774 | 777 | * Convert unknown character to visible replacement... |
ef416fc2 | 778 | */ |
ef416fc2 | 779 | |
e1d6a774 | 780 | srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)]; |
ef416fc2 | 781 | |
e1d6a774 | 782 | if (srow) |
783 | srow += (int)(unichar & 0xff); | |
ef416fc2 | 784 | |
e1d6a774 | 785 | if (!srow || !*srow) |
786 | *dest++ = '?'; | |
787 | else | |
788 | *dest++ = *srow; | |
ef416fc2 | 789 | } |
ef416fc2 | 790 | |
e1d6a774 | 791 | *dest = '\0'; |
792 | ||
793 | _cupsCharmapFree(encoding); | |
794 | ||
795 | return ((int)(dest - start)); | |
ef416fc2 | 796 | } |
797 | ||
e1d6a774 | 798 | |
ef416fc2 | 799 | /* |
e1d6a774 | 800 | * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS. |
ef416fc2 | 801 | */ |
e1d6a774 | 802 | |
803 | static int /* O - Count or -1 on error */ | |
804 | conv_utf8_to_vbcs( | |
805 | cups_sbcs_t *dest, /* O - Target string */ | |
806 | const cups_utf8_t *src, /* I - Source string */ | |
807 | int maxout, /* I - Max output */ | |
808 | const cups_encoding_t encoding) /* I - Encoding */ | |
ef416fc2 | 809 | { |
e1d6a774 | 810 | cups_sbcs_t *start; /* Start of destination string */ |
811 | _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */ | |
812 | cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ | |
813 | cups_utf32_t unichar; /* Character value */ | |
814 | cups_vbcs_t legchar; /* Legacy character value */ | |
815 | cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */ | |
816 | *workptr; /* Pointer into string */ | |
ef416fc2 | 817 | |
ef416fc2 | 818 | |
819 | /* | |
e1d6a774 | 820 | * Find legacy charset map in cache... |
ef416fc2 | 821 | */ |
ef416fc2 | 822 | |
e1d6a774 | 823 | if ((vmap = (_cups_vmap_t *)_cupsCharmapGet(encoding)) == NULL) |
824 | return (-1); | |
ef416fc2 | 825 | |
826 | /* | |
e1d6a774 | 827 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... |
ef416fc2 | 828 | */ |
e1d6a774 | 829 | |
830 | if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0) | |
831 | return (-1); | |
ef416fc2 | 832 | |
833 | /* | |
e1d6a774 | 834 | * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)... |
ef416fc2 | 835 | */ |
e1d6a774 | 836 | |
837 | for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --) | |
ef416fc2 | 838 | { |
e1d6a774 | 839 | unichar = *workptr++; |
840 | if (!unichar) | |
ef416fc2 | 841 | break; |
ef416fc2 | 842 | |
843 | /* | |
e1d6a774 | 844 | * Convert ASCII verbatim (optimization)... |
ef416fc2 | 845 | */ |
e1d6a774 | 846 | |
847 | if (unichar < 0x80) | |
848 | { | |
849 | *dest++ = (cups_vbcs_t)unichar; | |
850 | continue; | |
851 | } | |
ef416fc2 | 852 | |
853 | /* | |
e1d6a774 | 854 | * Convert unknown character to visible replacement... |
ef416fc2 | 855 | */ |
e1d6a774 | 856 | |
857 | vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)]; | |
858 | ||
859 | if (vrow) | |
860 | vrow += (int)(unichar & 0xff); | |
861 | ||
862 | if (!vrow || !*vrow) | |
863 | legchar = (cups_vbcs_t)'?'; | |
864 | else | |
865 | legchar = (cups_vbcs_t)*vrow; | |
ef416fc2 | 866 | |
867 | /* | |
e1d6a774 | 868 | * Save n-byte legacy character... |
ef416fc2 | 869 | */ |
e1d6a774 | 870 | |
871 | if (legchar > 0xffffff) | |
ef416fc2 | 872 | { |
e1d6a774 | 873 | if (maxout < 5) |
874 | return (-1); | |
875 | ||
876 | *dest++ = (cups_sbcs_t)(legchar >> 24); | |
877 | *dest++ = (cups_sbcs_t)(legchar >> 16); | |
878 | *dest++ = (cups_sbcs_t)(legchar >> 8); | |
879 | *dest++ = (cups_sbcs_t)legchar; | |
880 | ||
881 | maxout -= 3; | |
ef416fc2 | 882 | } |
e1d6a774 | 883 | else if (legchar > 0xffff) |
884 | { | |
885 | if (maxout < 4) | |
886 | return (-1); | |
ef416fc2 | 887 | |
e1d6a774 | 888 | *dest++ = (cups_sbcs_t)(legchar >> 16); |
889 | *dest++ = (cups_sbcs_t)(legchar >> 8); | |
890 | *dest++ = (cups_sbcs_t)legchar; | |
ef416fc2 | 891 | |
e1d6a774 | 892 | maxout -= 2; |
893 | } | |
894 | else if (legchar > 0xff) | |
895 | { | |
896 | *dest++ = (cups_sbcs_t)(legchar >> 8); | |
897 | *dest++ = (cups_sbcs_t)legchar; | |
898 | ||
899 | maxout --; | |
900 | } | |
ef416fc2 | 901 | } |
e1d6a774 | 902 | |
903 | *dest = '\0'; | |
904 | ||
905 | _cupsCharmapFree(encoding); | |
906 | ||
907 | return ((int)(dest - start)); | |
ef416fc2 | 908 | } |
909 | ||
e1d6a774 | 910 | |
ef416fc2 | 911 | /* |
e1d6a774 | 912 | * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8. |
ef416fc2 | 913 | */ |
e1d6a774 | 914 | |
915 | static int /* O - Count or -1 on error */ | |
916 | conv_vbcs_to_utf8( | |
917 | cups_utf8_t *dest, /* O - Target string */ | |
918 | const cups_sbcs_t *src, /* I - Source string */ | |
919 | int maxout, /* I - Max output */ | |
920 | const cups_encoding_t encoding) /* I - Encoding */ | |
ef416fc2 | 921 | { |
e1d6a774 | 922 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ |
923 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
924 | _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */ | |
925 | cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */ | |
926 | cups_vbcs_t legchar; /* Legacy character value */ | |
927 | cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */ | |
928 | *workptr; /* Pointer into string */ | |
ef416fc2 | 929 | |
ef416fc2 | 930 | |
931 | /* | |
e1d6a774 | 932 | * Find legacy charset map in cache... |
ef416fc2 | 933 | */ |
ef416fc2 | 934 | |
e1d6a774 | 935 | if ((vmap = (_cups_vmap_t *)_cupsCharmapGet(encoding)) == NULL) |
936 | return (-1); | |
ef416fc2 | 937 | |
938 | /* | |
e1d6a774 | 939 | * Convert input legacy charset to internal UCS-4 (and insert BOM)... |
ef416fc2 | 940 | */ |
ef416fc2 | 941 | |
e1d6a774 | 942 | work[0] = 0xfeff; |
943 | for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);) | |
ef416fc2 | 944 | { |
e1d6a774 | 945 | legchar = *src++; |
946 | leadchar = (cups_sbcs_t)legchar; | |
ef416fc2 | 947 | |
948 | /* | |
e1d6a774 | 949 | * Convert ASCII verbatim (optimization)... |
ef416fc2 | 950 | */ |
ef416fc2 | 951 | |
e1d6a774 | 952 | if (legchar < 0x80) |
ef416fc2 | 953 | { |
e1d6a774 | 954 | *workptr++ = (cups_utf32_t)legchar; |
955 | continue; | |
ef416fc2 | 956 | } |
957 | ||
958 | /* | |
e1d6a774 | 959 | * Convert 2-byte legacy character... |
ef416fc2 | 960 | */ |
e1d6a774 | 961 | |
962 | if (vmap->lead2char[(int)leadchar] == leadchar) | |
ef416fc2 | 963 | { |
e1d6a774 | 964 | if (!*src) |
965 | return (-1); | |
966 | ||
967 | legchar = (legchar << 8) | *src++; | |
968 | ||
ef416fc2 | 969 | /* |
e1d6a774 | 970 | * Convert unknown character to Replacement Character... |
ef416fc2 | 971 | */ |
e1d6a774 | 972 | |
973 | crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)]; | |
974 | if (crow) | |
975 | crow += (int) (legchar & 0xff); | |
976 | ||
977 | if (!crow || !*crow) | |
978 | *workptr++ = 0xfffd; | |
979 | else | |
980 | *workptr++ = (cups_utf32_t)*crow; | |
981 | continue; | |
ef416fc2 | 982 | } |
983 | ||
984 | /* | |
e1d6a774 | 985 | * Fetch 3-byte or 4-byte legacy character... |
ef416fc2 | 986 | */ |
e1d6a774 | 987 | |
988 | if (vmap->lead3char[(int)leadchar] == leadchar) | |
ef416fc2 | 989 | { |
e1d6a774 | 990 | if (!*src || !src[1]) |
991 | return (-1); | |
992 | ||
993 | legchar = (legchar << 8) | *src++; | |
994 | legchar = (legchar << 8) | *src++; | |
ef416fc2 | 995 | } |
e1d6a774 | 996 | else if (vmap->lead4char[(int)leadchar] == leadchar) |
997 | { | |
998 | if (!*src || !src[1] || !src[2]) | |
999 | return (-1); | |
1000 | ||
1001 | legchar = (legchar << 8) | *src++; | |
1002 | legchar = (legchar << 8) | *src++; | |
1003 | legchar = (legchar << 8) | *src++; | |
1004 | } | |
1005 | else | |
1006 | return (-1); | |
ef416fc2 | 1007 | |
1008 | /* | |
e1d6a774 | 1009 | * Find 3-byte or 4-byte legacy character... |
ef416fc2 | 1010 | */ |
e1d6a774 | 1011 | |
1012 | wide2uni = (_cups_wide2uni_t *)bsearch(&legchar, | |
1013 | vmap->wide2uni, | |
1014 | vmap->widecount, | |
1015 | sizeof(_cups_wide2uni_t), | |
1016 | compare_wide); | |
ef416fc2 | 1017 | |
1018 | /* | |
e1d6a774 | 1019 | * Convert unknown character to Replacement Character... |
ef416fc2 | 1020 | */ |
e1d6a774 | 1021 | |
1022 | if (!wide2uni || !wide2uni->unichar) | |
1023 | *workptr++ = 0xfffd; | |
1024 | else | |
1025 | *workptr++ = wide2uni->unichar; | |
ef416fc2 | 1026 | } |
e1d6a774 | 1027 | |
1028 | *workptr = 0; | |
1029 | ||
1030 | _cupsCharmapFree(encoding); | |
1031 | ||
1032 | /* | |
1033 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
1034 | */ | |
1035 | ||
1036 | return (cupsUTF32ToUTF8(dest, work, maxout)); | |
ef416fc2 | 1037 | } |
1038 | ||
e1d6a774 | 1039 | |
ef416fc2 | 1040 | /* |
e1d6a774 | 1041 | * 'free_sbcs_charmap()' - Free memory used by a single byte character set. |
ef416fc2 | 1042 | */ |
e1d6a774 | 1043 | |
1044 | static void | |
1045 | free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */ | |
ef416fc2 | 1046 | { |
e1d6a774 | 1047 | int i; /* Looping variable */ |
ef416fc2 | 1048 | |
ef416fc2 | 1049 | |
e1d6a774 | 1050 | for (i = 0; i < 256; i ++) |
1051 | if (cmap->uni2char[i]) | |
1052 | free(cmap->uni2char[i]); | |
1053 | ||
1054 | free(cmap); | |
1055 | } | |
1056 | ||
1057 | ||
1058 | /* | |
1059 | * 'free_vbcs_charmap()' - Free memory used by a variable byte character set. | |
1060 | */ | |
1061 | ||
1062 | static void | |
1063 | free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */ | |
1064 | { | |
1065 | int i; /* Looping variable */ | |
1066 | ||
1067 | ||
1068 | for (i = 0; i < 256; i ++) | |
1069 | if (vmap->char2uni[i]) | |
1070 | free(vmap->char2uni[i]); | |
1071 | ||
1072 | for (i = 0; i < 256; i ++) | |
1073 | if (vmap->uni2char[i]) | |
1074 | free(vmap->uni2char[i]); | |
1075 | ||
1076 | if (vmap->wide2uni) | |
1077 | free(vmap->wide2uni); | |
1078 | ||
1079 | free(vmap); | |
1080 | } | |
1081 | ||
1082 | ||
1083 | /* | |
1084 | * 'get_charmap_count()' - Count lines in a charmap file. | |
1085 | */ | |
1086 | ||
1087 | static int /* O - Count or -1 on error */ | |
1088 | get_charmap_count(cups_file_t *fp) /* I - File to read from */ | |
1089 | { | |
1090 | int count; /* Number of lines */ | |
1091 | char line[256]; /* Line from input map file */ | |
ef416fc2 | 1092 | |
ef416fc2 | 1093 | |
1094 | /* | |
e1d6a774 | 1095 | * Count lines in map input file... |
ef416fc2 | 1096 | */ |
ef416fc2 | 1097 | |
e1d6a774 | 1098 | count = 0; |
ef416fc2 | 1099 | |
e1d6a774 | 1100 | while (cupsFileGets(fp, line, sizeof(line))) |
1101 | if (line[0] == '0') | |
1102 | count ++; | |
ef416fc2 | 1103 | |
e1d6a774 | 1104 | /* |
1105 | * Return the number of lines... | |
1106 | */ | |
1107 | ||
1108 | if (count > 0) | |
1109 | return (count); | |
1110 | else | |
1111 | return (-1); | |
ef416fc2 | 1112 | } |
1113 | ||
e1d6a774 | 1114 | |
ef416fc2 | 1115 | /* |
e1d6a774 | 1116 | * 'get_sbcs_charmap()' - Get SBCS Charmap. |
ef416fc2 | 1117 | */ |
e1d6a774 | 1118 | |
1119 | static _cups_cmap_t * /* O - Charmap or 0 on error */ | |
1120 | get_sbcs_charmap( | |
1121 | const cups_encoding_t encoding, /* I - Charmap Encoding */ | |
1122 | const char *filename) /* I - Charmap Filename */ | |
ef416fc2 | 1123 | { |
e1d6a774 | 1124 | unsigned long legchar; /* Legacy character value */ |
1125 | cups_utf32_t unichar; /* Unicode character value */ | |
1126 | _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ | |
1127 | cups_file_t *fp; /* Charset map file pointer */ | |
1128 | char *s; /* Line parsing pointer */ | |
1129 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
1130 | cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ | |
1131 | char line[256]; /* Line from charset map file */ | |
1132 | _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */ | |
1133 | ||
ef416fc2 | 1134 | |
1135 | /* | |
e1d6a774 | 1136 | * See if we already have this SBCS charset map loaded... |
ef416fc2 | 1137 | */ |
e1d6a774 | 1138 | |
1139 | for (cmap = cg->cmap_cache; cmap; cmap = cmap->next) | |
1140 | { | |
1141 | if (cmap->encoding == encoding) | |
1142 | { | |
1143 | cmap->used ++; | |
1144 | DEBUG_printf((" returning existing cmap=%p\n", cmap)); | |
1145 | return ((void *)cmap); | |
1146 | } | |
1147 | } | |
ef416fc2 | 1148 | |
1149 | /* | |
e1d6a774 | 1150 | * Open SBCS charset map input file... |
ef416fc2 | 1151 | */ |
e1d6a774 | 1152 | |
1153 | if ((fp = cupsFileOpen(filename, "r")) == NULL) | |
1154 | return (NULL); | |
ef416fc2 | 1155 | |
1156 | /* | |
e1d6a774 | 1157 | * Allocate memory for SBCS charset map... |
ef416fc2 | 1158 | */ |
e1d6a774 | 1159 | |
1160 | if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL) | |
1161 | { | |
1162 | cupsFileClose(fp); | |
1163 | DEBUG_puts(" Unable to allocate memory!"); | |
1164 | return (NULL); | |
1165 | } | |
1166 | ||
1167 | cmap->used ++; | |
1168 | cmap->encoding = encoding; | |
ef416fc2 | 1169 | |
1170 | /* | |
e1d6a774 | 1171 | * Save SBCS charset map into memory for transcoding... |
ef416fc2 | 1172 | */ |
e1d6a774 | 1173 | |
1174 | while (cupsFileGets(fp, line, sizeof(line))) | |
ef416fc2 | 1175 | { |
e1d6a774 | 1176 | if (line[0] != '0') |
1177 | continue; | |
1178 | ||
1179 | legchar = strtol(line, &s, 16); | |
1180 | if (legchar < 0 || legchar > 0xff) | |
1181 | goto sbcs_error; | |
1182 | ||
1183 | unichar = strtol(s, NULL, 16); | |
1184 | if (unichar < 0 || unichar > 0xffff) | |
1185 | goto sbcs_error; | |
ef416fc2 | 1186 | |
1187 | /* | |
e1d6a774 | 1188 | * Save legacy to Unicode mapping in direct lookup table... |
ef416fc2 | 1189 | */ |
e1d6a774 | 1190 | |
1191 | crow = cmap->char2uni + legchar; | |
1192 | *crow = (cups_ucs2_t)(unichar & 0xffff); | |
ef416fc2 | 1193 | |
1194 | /* | |
e1d6a774 | 1195 | * Save Unicode to legacy mapping in indirect lookup table... |
ef416fc2 | 1196 | */ |
e1d6a774 | 1197 | |
1198 | srow = cmap->uni2char[(unichar >> 8) & 0xff]; | |
1199 | if (!srow) | |
ef416fc2 | 1200 | { |
e1d6a774 | 1201 | srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t)); |
1202 | if (!srow) | |
1203 | goto sbcs_error; | |
1204 | ||
1205 | cmap->uni2char[(unichar >> 8) & 0xff] = srow; | |
ef416fc2 | 1206 | } |
1207 | ||
e1d6a774 | 1208 | srow += unichar & 0xff; |
1209 | ||
ef416fc2 | 1210 | /* |
e1d6a774 | 1211 | * Convert Replacement Character to visible replacement... |
ef416fc2 | 1212 | */ |
e1d6a774 | 1213 | |
1214 | if (unichar == 0xfffd) | |
1215 | legchar = (unsigned long)'?'; | |
ef416fc2 | 1216 | |
1217 | /* | |
e1d6a774 | 1218 | * First (oldest) legacy character uses Unicode mapping cell... |
ef416fc2 | 1219 | */ |
ef416fc2 | 1220 | |
e1d6a774 | 1221 | if (!*srow) |
1222 | *srow = (cups_sbcs_t)legchar; | |
1223 | } | |
ef416fc2 | 1224 | |
e1d6a774 | 1225 | cupsFileClose(fp); |
1226 | ||
ef416fc2 | 1227 | /* |
e1d6a774 | 1228 | * Add it to the cache and return... |
ef416fc2 | 1229 | */ |
e1d6a774 | 1230 | |
1231 | cmap->next = cg->cmap_cache; | |
1232 | cg->cmap_cache = cmap; | |
1233 | ||
1234 | DEBUG_printf((" returning new cmap=%p\n", cmap)); | |
1235 | ||
1236 | return (cmap); | |
ef416fc2 | 1237 | |
1238 | /* | |
e1d6a774 | 1239 | * If we get here, there was an error in the cmap file... |
ef416fc2 | 1240 | */ |
e1d6a774 | 1241 | |
1242 | sbcs_error: | |
1243 | ||
1244 | free_sbcs_charmap(cmap); | |
1245 | ||
1246 | cupsFileClose(fp); | |
1247 | ||
1248 | DEBUG_puts(" Error, returning NULL!"); | |
1249 | ||
1250 | return (NULL); | |
1251 | } | |
1252 | ||
1253 | ||
1254 | /* | |
1255 | * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap. | |
1256 | */ | |
1257 | ||
1258 | static _cups_vmap_t * /* O - Charmap or 0 on error */ | |
1259 | get_vbcs_charmap( | |
1260 | const cups_encoding_t encoding, /* I - Charmap Encoding */ | |
1261 | const char *filename) /* I - Charmap Filename */ | |
1262 | { | |
1263 | _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ | |
1264 | cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ | |
1265 | cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ | |
1266 | _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */ | |
1267 | cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */ | |
1268 | unsigned long legchar; /* Legacy character value */ | |
1269 | cups_utf32_t unichar; /* Unicode character value */ | |
1270 | int mapcount; /* Count of lines in charmap file */ | |
1271 | cups_file_t *fp; /* Charset map file pointer */ | |
1272 | char *s; /* Line parsing pointer */ | |
1273 | char line[256]; /* Line from charset map file */ | |
1274 | int i; /* Loop variable */ | |
1275 | int wide; /* 32-bit legacy char */ | |
1276 | _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */ | |
1277 | ||
1278 | ||
1279 | DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n", | |
1280 | encoding, filename)); | |
ef416fc2 | 1281 | |
1282 | /* | |
e1d6a774 | 1283 | * See if we already have this DBCS/VBCS charset map loaded... |
ef416fc2 | 1284 | */ |
ef416fc2 | 1285 | |
e1d6a774 | 1286 | for (vmap = cg->vmap_cache; vmap; vmap = vmap->next) |
1287 | { | |
1288 | if (vmap->encoding == encoding) | |
ef416fc2 | 1289 | { |
e1d6a774 | 1290 | vmap->used ++; |
1291 | DEBUG_printf((" returning existing vmap=%p\n", vmap)); | |
1292 | return ((void *)vmap); | |
ef416fc2 | 1293 | } |
ef416fc2 | 1294 | } |
ef416fc2 | 1295 | |
1296 | /* | |
e1d6a774 | 1297 | * Open VBCS charset map input file... |
ef416fc2 | 1298 | */ |
ef416fc2 | 1299 | |
e1d6a774 | 1300 | if ((fp = cupsFileOpen(filename, "r")) == NULL) |
1301 | { | |
1302 | DEBUG_printf((" Unable to open file: %s\n", strerror(errno))); | |
1303 | return (NULL); | |
1304 | } | |
ef416fc2 | 1305 | |
1306 | /* | |
e1d6a774 | 1307 | * Count lines in charmap file... |
ef416fc2 | 1308 | */ |
e1d6a774 | 1309 | |
1310 | if ((mapcount = get_charmap_count(fp)) <= 0) | |
1311 | { | |
1312 | DEBUG_puts(" Unable to get charmap count!"); | |
1313 | return (NULL); | |
1314 | } | |
1315 | ||
1316 | DEBUG_printf((" mapcount=%d\n", mapcount)); | |
ef416fc2 | 1317 | |
1318 | /* | |
e1d6a774 | 1319 | * Allocate memory for DBCS/VBCS charset map... |
ef416fc2 | 1320 | */ |
e1d6a774 | 1321 | |
1322 | if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL) | |
1323 | { | |
1324 | cupsFileClose(fp); | |
1325 | DEBUG_puts(" Unable to allocate memory!"); | |
1326 | return (NULL); | |
1327 | } | |
1328 | ||
1329 | vmap->used ++; | |
1330 | vmap->encoding = encoding; | |
ef416fc2 | 1331 | |
1332 | /* | |
e1d6a774 | 1333 | * Save DBCS/VBCS charset map into memory for transcoding... |
ef416fc2 | 1334 | */ |
e1d6a774 | 1335 | |
1336 | leadchar = 0; | |
1337 | wide2uni = NULL; | |
1338 | ||
1339 | cupsFileRewind(fp); | |
1340 | ||
1341 | i = 0; | |
1342 | wide = 0; | |
1343 | ||
1344 | while (cupsFileGets(fp, line, sizeof(line))) | |
ef416fc2 | 1345 | { |
e1d6a774 | 1346 | if (line[0] != '0') |
1347 | continue; | |
1348 | ||
1349 | legchar = strtoul(line, &s, 16); | |
1350 | if (legchar == ULONG_MAX) | |
1351 | goto vbcs_error; | |
1352 | ||
1353 | unichar = strtol(s, NULL, 16); | |
1354 | if (unichar < 0 || unichar > 0xffff) | |
1355 | goto vbcs_error; | |
1356 | ||
1357 | i ++; | |
1358 | ||
1359 | /* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i, | |
1360 | legchar, (unsigned)unichar)); */ | |
ef416fc2 | 1361 | |
1362 | /* | |
e1d6a774 | 1363 | * Save lead char of 2/3/4-byte legacy char... |
ef416fc2 | 1364 | */ |
e1d6a774 | 1365 | |
1366 | if (legchar > 0xff && legchar <= 0xffff) | |
ef416fc2 | 1367 | { |
e1d6a774 | 1368 | leadchar = (cups_sbcs_t)(legchar >> 8); |
1369 | vmap->lead2char[leadchar] = leadchar; | |
1370 | } | |
1371 | ||
1372 | if (legchar > 0xffff && legchar <= 0xffffff) | |
1373 | { | |
1374 | leadchar = (cups_sbcs_t)(legchar >> 16); | |
1375 | vmap->lead3char[leadchar] = leadchar; | |
1376 | } | |
1377 | ||
1378 | if (legchar > 0xffffff) | |
1379 | { | |
1380 | leadchar = (cups_sbcs_t)(legchar >> 24); | |
1381 | vmap->lead4char[leadchar] = leadchar; | |
ef416fc2 | 1382 | } |
1383 | ||
1384 | /* | |
e1d6a774 | 1385 | * Save Legacy to Unicode mapping... |
ef416fc2 | 1386 | */ |
e1d6a774 | 1387 | |
1388 | if (legchar <= 0xffff) | |
ef416fc2 | 1389 | { |
ef416fc2 | 1390 | /* |
e1d6a774 | 1391 | * Save DBCS 16-bit to Unicode mapping in indirect lookup table... |
ef416fc2 | 1392 | */ |
e1d6a774 | 1393 | |
1394 | crow = vmap->char2uni[(int)leadchar]; | |
1395 | if (!crow) | |
1396 | { | |
1397 | crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t)); | |
1398 | if (!crow) | |
1399 | goto vbcs_error; | |
1400 | ||
1401 | vmap->char2uni[(int)leadchar] = crow; | |
1402 | } | |
1403 | ||
1404 | crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar; | |
1405 | } | |
1406 | else | |
1407 | { | |
1408 | /* | |
1409 | * Save VBCS 32-bit to Unicode mapping in sorted list table... | |
1410 | */ | |
1411 | ||
1412 | if (!wide) | |
1413 | { | |
1414 | wide = 1; | |
1415 | vmap->widecount = (mapcount - i + 1); | |
1416 | wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount, | |
1417 | sizeof(_cups_wide2uni_t)); | |
1418 | if (!wide2uni) | |
1419 | goto vbcs_error; | |
1420 | ||
1421 | vmap->wide2uni = wide2uni; | |
1422 | } | |
1423 | ||
1424 | wide2uni->widechar = (cups_vbcs_t)legchar; | |
1425 | wide2uni->unichar = (cups_ucs2_t)unichar; | |
1426 | wide2uni ++; | |
ef416fc2 | 1427 | } |
1428 | ||
1429 | /* | |
e1d6a774 | 1430 | * Save Unicode to legacy mapping in indirect lookup table... |
ef416fc2 | 1431 | */ |
e1d6a774 | 1432 | |
1433 | vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)]; | |
1434 | if (!vrow) | |
ef416fc2 | 1435 | { |
e1d6a774 | 1436 | vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t)); |
1437 | if (!vrow) | |
1438 | goto vbcs_error; | |
1439 | ||
1440 | vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow; | |
ef416fc2 | 1441 | } |
e1d6a774 | 1442 | |
1443 | vrow += (int)(unichar & 0xff); | |
ef416fc2 | 1444 | |
1445 | /* | |
e1d6a774 | 1446 | * Convert Replacement Character to visible replacement... |
ef416fc2 | 1447 | */ |
e1d6a774 | 1448 | |
1449 | if (unichar == 0xfffd) | |
1450 | legchar = (unsigned long)'?'; | |
ef416fc2 | 1451 | |
1452 | /* | |
e1d6a774 | 1453 | * First (oldest) legacy character uses Unicode mapping cell... |
ef416fc2 | 1454 | */ |
e1d6a774 | 1455 | |
1456 | if (!*vrow) | |
1457 | *vrow = (cups_vbcs_t)legchar; | |
ef416fc2 | 1458 | } |
e1d6a774 | 1459 | |
1460 | vmap->charcount = (i - vmap->widecount); | |
1461 | ||
1462 | cupsFileClose(fp); | |
ef416fc2 | 1463 | |
1464 | /* | |
e1d6a774 | 1465 | * Add it to the cache and return... |
ef416fc2 | 1466 | */ |
ef416fc2 | 1467 | |
e1d6a774 | 1468 | vmap->next = cg->vmap_cache; |
1469 | cg->vmap_cache = vmap; | |
1470 | ||
1471 | DEBUG_printf((" returning new vmap=%p\n", vmap)); | |
1472 | ||
1473 | return (vmap); | |
1474 | ||
1475 | /* | |
1476 | * If we get here, the file contains errors... | |
1477 | */ | |
1478 | ||
1479 | vbcs_error: | |
1480 | ||
1481 | free_vbcs_charmap(vmap); | |
1482 | ||
1483 | cupsFileClose(fp); | |
1484 | ||
1485 | DEBUG_puts(" Error, returning NULL!"); | |
1486 | ||
1487 | return (NULL); | |
ef416fc2 | 1488 | } |
1489 | ||
1490 | ||
1491 | /* | |
e1d6a774 | 1492 | * End of "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $" |
ef416fc2 | 1493 | */ |