]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/transcode.c
Load cups into easysw/current.
[thirdparty/cups.git] / cups / transcode.c
CommitLineData
ef416fc2 1/*
bc44d920 2 * "$Id: transcode.c 6649 2007-07-11 21:46:42Z mike $"
ef416fc2 3 *
4 * Transcoding support for the Common UNIX Printing System (CUPS).
5 *
bc44d920 6 * Copyright 2007 by Apple Inc.
b86bc4cf 7 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 8 *
bc44d920 9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
ef416fc2 14 *
bc44d920 15 * This file is subject to the Apple OS-Developed Software exception.
ef416fc2 16 *
17 * Contents:
18 *
fa73b229 19 * _cupsCharmapFlush() - Flush all character set maps out of cache.
e1d6a774 20 * _cupsCharmapFree() - Free a character set map.
21 * _cupsCharmapGet() - Get a character set map.
ef416fc2 22 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
e1d6a774 23 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
ef416fc2 24 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
25 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
e1d6a774 26 * compare_wide() - Compare key for wide (VBCS) match.
27 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
ef416fc2 28 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
29 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 30 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
e1d6a774 31 * free_sbcs_charmap() - Free memory used by a single byte character set.
32 * free_vbcs_charmap() - Free memory used by a variable byte character set.
d6ae789d 33 * get_charmap() - Lookup or get a character set map (private).
e1d6a774 34 * get_charmap_count() - Count lines in a charmap file.
35 * get_sbcs_charmap() - Get SBCS Charmap.
36 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
ef416fc2 37 */
38
39/*
40 * Include necessary headers...
41 */
42
43#include "globals.h"
e1d6a774 44#include "debug.h"
e53920b9 45#include <limits.h>
ef416fc2 46#include <stdlib.h>
47#include <errno.h>
48#include <time.h>
49
50
d6ae789d 51/*
52 * Local globals...
53 */
54
55#ifdef HAVE_PTHREAD_H
56static pthread_mutex_t map_mutex = PTHREAD_MUTEX_INITIALIZER;
57 /* Mutex to control access to maps */
58#endif /* HAVE_PTHREAD_H */
59static _cups_cmap_t *cmap_cache = NULL;
60 /* SBCS Charmap Cache */
61static _cups_vmap_t *vmap_cache = NULL;
62 /* VBCS Charmap Cache */
63
64
ef416fc2 65/*
e1d6a774 66 * Local functions...
ef416fc2 67 */
68
e1d6a774 69static int compare_wide(const void *k1, const void *k2);
70static int conv_sbcs_to_utf8(cups_utf8_t *dest,
71 const cups_sbcs_t *src,
72 int maxout,
73 const cups_encoding_t encoding);
74static int conv_utf8_to_sbcs(cups_sbcs_t *dest,
75 const cups_utf8_t *src,
76 int maxout,
77 const cups_encoding_t encoding);
78static int conv_utf8_to_vbcs(cups_sbcs_t *dest,
79 const cups_utf8_t *src,
80 int maxout,
81 const cups_encoding_t encoding);
82static int conv_vbcs_to_utf8(cups_utf8_t *dest,
83 const cups_sbcs_t *src,
84 int maxout,
85 const cups_encoding_t encoding);
86static void free_sbcs_charmap(_cups_cmap_t *sbcs);
87static void free_vbcs_charmap(_cups_vmap_t *vbcs);
d6ae789d 88static void *get_charmap(const cups_encoding_t encoding);
e1d6a774 89static int get_charmap_count(cups_file_t *fp);
90static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
91 const char *filename);
92static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
93 const char *filename);
94
ef416fc2 95
96/*
e1d6a774 97 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
ef416fc2 98 */
99
e1d6a774 100void
d6ae789d 101_cupsCharmapFlush(void)
ef416fc2 102{
e1d6a774 103 _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */
104 *cnext; /* Next Legacy SBCS Charset Map */
105 _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */
106 *vnext; /* Next Legacy VBCS Charset Map */
ef416fc2 107
108
d6ae789d 109#ifdef HAVE_PTHREAD_H
110 pthread_mutex_lock(&map_mutex);
111#endif /* HAVE_PTHREAD_H */
112
ef416fc2 113 /*
e1d6a774 114 * Loop through SBCS charset map cache, free all memory...
ef416fc2 115 */
116
d6ae789d 117 for (cmap = cmap_cache; cmap; cmap = cnext)
e1d6a774 118 {
119 cnext = cmap->next;
ef416fc2 120
e1d6a774 121 free_sbcs_charmap(cmap);
122 }
ef416fc2 123
d6ae789d 124 cmap_cache = NULL;
ef416fc2 125
126 /*
e1d6a774 127 * Loop through DBCS/VBCS charset map cache, free all memory...
ef416fc2 128 */
129
d6ae789d 130 for (vmap = vmap_cache; vmap; vmap = vnext)
e1d6a774 131 {
132 vnext = vmap->next;
133
134 free_vbcs_charmap(vmap);
135
136 free(vmap);
137 }
138
d6ae789d 139 vmap_cache = NULL;
140
141#ifdef HAVE_PTHREAD_H
142 pthread_mutex_unlock(&map_mutex);
143#endif /* HAVE_PTHREAD_H */
ef416fc2 144}
145
e1d6a774 146
ef416fc2 147/*
e1d6a774 148 * '_cupsCharmapFree()' - Free a character set map.
ef416fc2 149 *
e1d6a774 150 * This does not actually free; use '_cupsCharmapFlush()' for that.
ef416fc2 151 */
e1d6a774 152
ef416fc2 153void
e1d6a774 154_cupsCharmapFree(
155 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 156{
e1d6a774 157 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
158 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
e1d6a774 159
ef416fc2 160
161 /*
162 * See if we already have this SBCS charset map loaded...
163 */
e1d6a774 164
d6ae789d 165#ifdef HAVE_PTHREAD_H
166 pthread_mutex_lock(&map_mutex);
167#endif /* HAVE_PTHREAD_H */
168
169 for (cmap = cmap_cache; cmap; cmap = cmap->next)
ef416fc2 170 {
171 if (cmap->encoding == encoding)
172 {
173 if (cmap->used > 0)
174 cmap->used --;
d6ae789d 175 break;
ef416fc2 176 }
177 }
178
179 /*
180 * See if we already have this DBCS/VBCS charset map loaded...
181 */
e1d6a774 182
d6ae789d 183 for (vmap = vmap_cache; vmap; vmap = vmap->next)
ef416fc2 184 {
185 if (vmap->encoding == encoding)
186 {
187 if (vmap->used > 0)
188 vmap->used --;
d6ae789d 189 break;
ef416fc2 190 }
191 }
d6ae789d 192
193#ifdef HAVE_PTHREAD_H
194 pthread_mutex_unlock(&map_mutex);
195#endif /* HAVE_PTHREAD_H */
fa73b229 196}
197
198
199/*
e1d6a774 200 * '_cupsCharmapGet()' - Get a character set map.
201 *
202 * This code handles single-byte (SBCS), double-byte (DBCS), and
203 * variable-byte (VBCS) character sets _without_ charset escapes...
204 * This code does not handle multiple-byte character sets (MBCS)
205 * (such as ISO-2022-JP) with charset switching via escapes...
fa73b229 206 */
207
e1d6a774 208void * /* O - Charset map pointer */
209_cupsCharmapGet(
210 const cups_encoding_t encoding) /* I - Encoding */
fa73b229 211{
d6ae789d 212 void *charmap; /* Charset map pointer */
e1d6a774 213
fa73b229 214
e1d6a774 215 DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
ef416fc2 216
217 /*
e1d6a774 218 * Check for valid arguments...
ef416fc2 219 */
e1d6a774 220
221 if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
ef416fc2 222 {
e1d6a774 223 DEBUG_puts(" Bad encoding, returning NULL!");
224 return (NULL);
ef416fc2 225 }
ef416fc2 226
227 /*
d6ae789d 228 * Lookup or get the charset map pointer and return...
ef416fc2 229 */
e1d6a774 230
d6ae789d 231#ifdef HAVE_PTHREAD_H
232 pthread_mutex_lock(&map_mutex);
233#endif /* HAVE_PTHREAD_H */
e1d6a774 234
d6ae789d 235 charmap = get_charmap(encoding);
e1d6a774 236
d6ae789d 237#ifdef HAVE_PTHREAD_H
238 pthread_mutex_unlock(&map_mutex);
239#endif /* HAVE_PTHREAD_H */
e1d6a774 240
d6ae789d 241 return (charmap);
ef416fc2 242}
243
e1d6a774 244
ef416fc2 245/*
e1d6a774 246 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
ef416fc2 247 *
248 * This code handles single-byte (SBCS), double-byte (DBCS), and
249 * variable-byte (VBCS) character sets _without_ charset escapes...
250 * This code does not handle multiple-byte character sets (MBCS)
251 * (such as ISO-2022-JP) with charset switching via escapes...
252 */
e1d6a774 253
254int /* O - Count or -1 on error */
255cupsCharsetToUTF8(
256 cups_utf8_t *dest, /* O - Target string */
257 const char *src, /* I - Source string */
258 const int maxout, /* I - Max output */
259 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 260{
d6ae789d 261 int bytes; /* Number of bytes converted */
262
263
ef416fc2 264 /*
265 * Check for valid arguments...
266 */
267
e1d6a774 268 DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
269 dest, src, maxout, encoding));
270
271 if (dest)
272 *dest = '\0';
273
ef416fc2 274 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
e1d6a774 275 {
276 DEBUG_puts(" Bad arguments, returning -1");
ef416fc2 277 return (-1);
e1d6a774 278 }
ef416fc2 279
280 /*
281 * Handle identity conversions...
282 */
283
284 if (encoding == CUPS_UTF8 ||
285 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
286 {
e1d6a774 287 strlcpy((char *)dest, src, maxout);
b86bc4cf 288 return ((int)strlen((char *)dest));
ef416fc2 289 }
290
411affcf 291 /*
292 * Handle ISO-8859-1 to UTF-8 directly...
293 */
294
295 if (encoding == CUPS_ISO8859_1)
296 {
297 int ch; /* Character from string */
298 cups_utf8_t *destptr, /* Pointer into UTF-8 buffer */
299 *destend; /* End of UTF-8 buffer */
300
301
302 destptr = dest;
303 destend = dest + maxout - 2;
304
305 while (*src && destptr < destend)
306 {
307 ch = *src++ & 255;
308
309 if (ch & 128)
310 {
311 *destptr++ = 0xc0 | (ch >> 6);
312 *destptr++ = 0x80 | (ch & 0x3f);
313 }
314 else
315 *destptr++ = ch;
316 }
317
318 *destptr = '\0';
319
b86bc4cf 320 return ((int)(destptr - dest));
411affcf 321 }
322
ef416fc2 323 /*
e1d6a774 324 * Convert input legacy charset to UTF-8...
ef416fc2 325 */
e1d6a774 326
d6ae789d 327#ifdef HAVE_PTHREAD_H
328 pthread_mutex_lock(&map_mutex);
329#endif /* HAVE_PTHREAD_H */
330
ef416fc2 331 if (encoding < CUPS_ENCODING_SBCS_END)
d6ae789d 332 bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
ef416fc2 333 else if (encoding < CUPS_ENCODING_VBCS_END)
d6ae789d 334 bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
ef416fc2 335 else
e1d6a774 336 {
d6ae789d 337 DEBUG_puts(" Bad encoding, returning -1");
338 bytes = -1;
e1d6a774 339 }
d6ae789d 340
341#ifdef HAVE_PTHREAD_H
342 pthread_mutex_unlock(&map_mutex);
343#endif /* HAVE_PTHREAD_H */
344
345 return (bytes);
ef416fc2 346}
347
e1d6a774 348
ef416fc2 349/*
e1d6a774 350 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
ef416fc2 351 *
352 * This code handles single-byte (SBCS), double-byte (DBCS), and
353 * variable-byte (VBCS) character sets _without_ charset escapes...
354 * This code does not handle multiple-byte character sets (MBCS)
355 * (such as ISO-2022-JP) with charset switching via escapes...
356 */
e1d6a774 357
358int /* O - Count or -1 on error */
359cupsUTF8ToCharset(
360 char *dest, /* O - Target string */
361 const cups_utf8_t *src, /* I - Source string */
362 const int maxout, /* I - Max output */
363 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 364{
d6ae789d 365 int bytes; /* Number of bytes converted */
366
367
ef416fc2 368 /*
369 * Check for valid arguments...
370 */
371
372 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
e1d6a774 373 {
374 if (dest)
375 *dest = '\0';
376
ef416fc2 377 return (-1);
e1d6a774 378 }
ef416fc2 379
380 /*
381 * Handle identity conversions...
382 */
383
384 if (encoding == CUPS_UTF8 ||
385 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
386 {
e1d6a774 387 strlcpy(dest, (char *)src, maxout);
b86bc4cf 388 return ((int)strlen(dest));
ef416fc2 389 }
390
411affcf 391 /*
392 * Handle UTF-8 to ISO-8859-1 directly...
393 */
394
395 if (encoding == CUPS_ISO8859_1)
396 {
397 int ch; /* Character from string */
398 char *destptr, /* Pointer into ISO-8859-1 buffer */
399 *destend; /* End of ISO-8859-1 buffer */
400
401
402 destptr = dest;
403 destend = dest + maxout - 1;
404
405 while (*src && destptr < destend)
406 {
407 ch = *src++;
408
409 if ((ch & 0xe0) == 0xc0)
410 {
411 ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
412
413 if (ch < 256)
414 *destptr++ = ch;
415 else
416 *destptr++ = '?';
417 }
418 else if ((ch & 0xf0) == 0xe0 ||
419 (ch & 0xf8) == 0xf0)
420 *destptr++ = '?';
421 else if (!(ch & 0x80))
422 *destptr++ = ch;
423 }
424
425 *destptr = '\0';
426
b86bc4cf 427 return ((int)(destptr - dest));
411affcf 428 }
429
ef416fc2 430 /*
e1d6a774 431 * Convert input UTF-8 to legacy charset...
ef416fc2 432 */
e1d6a774 433
d6ae789d 434#ifdef HAVE_PTHREAD_H
435 pthread_mutex_lock(&map_mutex);
436#endif /* HAVE_PTHREAD_H */
437
ef416fc2 438 if (encoding < CUPS_ENCODING_SBCS_END)
d6ae789d 439 bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
ef416fc2 440 else if (encoding < CUPS_ENCODING_VBCS_END)
d6ae789d 441 bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
ef416fc2 442 else
d6ae789d 443 bytes = -1;
444
445#ifdef HAVE_PTHREAD_H
446 pthread_mutex_unlock(&map_mutex);
447#endif /* HAVE_PTHREAD_H */
448
449 return (bytes);
ef416fc2 450}
451
ef416fc2 452
453/*
454 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
455 *
456 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
457 *
458 * UTF-32 char UTF-8 char(s)
459 * --------------------------------------------------
e1d6a774 460 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 461 * 128 to 2047 = 110xxxxx 10yyyyyy
462 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 463 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 464 *
465 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
466 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 467 */
e1d6a774 468
469int /* O - Count or -1 on error */
470cupsUTF8ToUTF32(
471 cups_utf32_t *dest, /* O - Target string */
472 const cups_utf8_t *src, /* I - Source string */
473 const int maxout) /* I - Max output */
ef416fc2 474{
e1d6a774 475 int i; /* Looping variable */
476 cups_utf8_t ch; /* Character value */
477 cups_utf8_t next; /* Next character value */
478 cups_utf32_t ch32; /* UTF-32 character value */
479
ef416fc2 480
481 /*
482 * Check for valid arguments and clear output...
483 */
e1d6a774 484
485 if (dest)
486 *dest = 0;
487
488 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
ef416fc2 489 return (-1);
ef416fc2 490
491 /*
492 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
493 */
e1d6a774 494
495 *dest++ = 0xfeff;
e1d6a774 496
497 for (i = maxout - 1; *src && i > 0; i --)
ef416fc2 498 {
e1d6a774 499 ch = *src++;
ef416fc2 500
501 /*
502 * Convert UTF-8 character(s) to UTF-32 character...
503 */
e1d6a774 504
505 if (!(ch & 0x80))
ef416fc2 506 {
507 /*
508 * One-octet UTF-8 <= 127 (US-ASCII)...
509 */
e1d6a774 510
511 *dest++ = ch;
2abf387c 512 continue;
ef416fc2 513 }
514 else if ((ch & 0xe0) == 0xc0)
515 {
516 /*
517 * Two-octet UTF-8 <= 2047 (Latin-x)...
518 */
e1d6a774 519
520 next = *src++;
521 if (!next)
ef416fc2 522 return (-1);
e1d6a774 523
ef416fc2 524 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
525
526 /*
527 * Check for non-shortest form (invalid UTF-8)...
528 */
e1d6a774 529
530 if (ch32 < 0x80)
ef416fc2 531 return (-1);
e1d6a774 532
533 *dest++ = ch32;
ef416fc2 534 }
535 else if ((ch & 0xf0) == 0xe0)
536 {
537 /*
538 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
539 */
e1d6a774 540
541 next = *src++;
542 if (!next)
ef416fc2 543 return (-1);
e1d6a774 544
545 ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
546
547 next = *src++;
548 if (!next)
ef416fc2 549 return (-1);
e1d6a774 550
551 ch32 = (ch32 << 6) | (next & 0x3f);
ef416fc2 552
553 /*
554 * Check for non-shortest form (invalid UTF-8)...
555 */
e1d6a774 556
557 if (ch32 < 0x800)
ef416fc2 558 return (-1);
e1d6a774 559
560 *dest++ = ch32;
ef416fc2 561 }
562 else if ((ch & 0xf8) == 0xf0)
563 {
564 /*
e1d6a774 565 * Four-octet UTF-8...
ef416fc2 566 */
e1d6a774 567
568 next = *src++;
569 if (!next)
ef416fc2 570 return (-1);
e1d6a774 571
572 ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
573
574 next = *src++;
575 if (!next)
576 return (-1);
577
578 ch32 = (ch32 << 6) | (next & 0x3f);
579
580 next = *src++;
581 if (!next)
582 return (-1);
583
584 ch32 = (ch32 << 6) | (next & 0x3f);
585
ef416fc2 586 /*
e1d6a774 587 * Check for non-shortest form (invalid UTF-8)...
ef416fc2 588 */
e1d6a774 589
590 if (ch32 < 0x10000)
591 return (-1);
592
593 *dest++ = ch32;
ef416fc2 594 }
595 else
596 {
597 /*
e1d6a774 598 * More than 4-octet (invalid UTF-8 sequence)...
ef416fc2 599 */
e1d6a774 600
ef416fc2 601 return (-1);
602 }
603
604 /*
605 * Check for UTF-16 surrogate (illegal UTF-8)...
606 */
ef416fc2 607
2abf387c 608 if (ch32 >= 0xd800 && ch32 <= 0xdfff)
ef416fc2 609 return (-1);
610 }
e1d6a774 611
ef416fc2 612 *dest = 0;
e1d6a774 613
ef416fc2 614 return (i);
615}
616
e1d6a774 617
ef416fc2 618/*
619 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
620 *
621 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
622 *
623 * UTF-32 char UTF-8 char(s)
624 * --------------------------------------------------
e1d6a774 625 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 626 * 128 to 2047 = 110xxxxx 10yyyyyy
627 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 628 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 629 *
630 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
631 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 632 */
e1d6a774 633
634int /* O - Count or -1 on error */
635cupsUTF32ToUTF8(
636 cups_utf8_t *dest, /* O - Target string */
637 const cups_utf32_t *src, /* I - Source string */
638 const int maxout) /* I - Max output */
ef416fc2 639{
e1d6a774 640 cups_utf8_t *start; /* Start of destination string */
641 int i; /* Looping variable */
642 int swap; /* Byte-swap input to output */
643 cups_utf32_t ch; /* Character value */
644
ef416fc2 645
646 /*
647 * Check for valid arguments and clear output...
648 */
e1d6a774 649
650 if (dest)
651 *dest = '\0';
652
653 if (!dest || !src || maxout < 1)
ef416fc2 654 return (-1);
ef416fc2 655
656 /*
657 * Check for leading BOM in UTF-32 and inverted BOM...
658 */
e1d6a774 659
660 start = dest;
661 swap = *src == 0xfffe0000;
662
663 if (*src == 0xfffe0000 || *src == 0xfeff)
664 src ++;
ef416fc2 665
666 /*
667 * Convert input UTF-32 to output UTF-8...
668 */
e1d6a774 669
670 for (i = maxout - 1; *src && i > 0;)
ef416fc2 671 {
e1d6a774 672 ch = *src++;
ef416fc2 673
674 /*
675 * Byte swap input UTF-32, if necessary...
e1d6a774 676 * (only byte-swapping 24 of 32 bits)
ef416fc2 677 */
e1d6a774 678
ef416fc2 679 if (swap)
680 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
681
682 /*
e1d6a774 683 * Check for beyond Plane 16 (invalid UTF-32)...
ef416fc2 684 */
ef416fc2 685
ef416fc2 686 if (ch > 0x10ffff)
687 return (-1);
688
ef416fc2 689 /*
690 * Convert UTF-32 character to UTF-8 character(s)...
691 */
e1d6a774 692
693 if (ch < 0x80)
ef416fc2 694 {
695 /*
696 * One-octet UTF-8 <= 127 (US-ASCII)...
697 */
e1d6a774 698
699 *dest++ = (cups_utf8_t)ch;
700 i --;
ef416fc2 701 }
e1d6a774 702 else if (ch < 0x800)
ef416fc2 703 {
704 /*
705 * Two-octet UTF-8 <= 2047 (Latin-x)...
706 */
e1d6a774 707
708 if (i < 2)
709 return (-1);
710
711 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
712 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
713 i -= 2;
ef416fc2 714 }
e1d6a774 715 else if (ch < 0x10000)
ef416fc2 716 {
717 /*
718 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
719 */
e1d6a774 720
721 if (i < 3)
722 return (-1);
723
724 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
725 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
726 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
727 i -= 3;
728 }
729 else
730 {
731 /*
732 * Four-octet UTF-8...
733 */
734
735 if (i < 4)
736 return (-1);
737
738 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
739 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
740 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
741 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
742 i -= 4;
ef416fc2 743 }
744 }
e1d6a774 745
ef416fc2 746 *dest = '\0';
e1d6a774 747
748 return ((int)(dest - start));
ef416fc2 749}
750
e1d6a774 751
ef416fc2 752/*
e1d6a774 753 * 'compare_wide()' - Compare key for wide (VBCS) match.
754 */
755
756static int
757compare_wide(const void *k1, /* I - Key char */
758 const void *k2) /* I - Map char */
759{
760 cups_vbcs_t key; /* Legacy key character */
761 cups_vbcs_t map; /* Legacy map character */
762
763
764 key = *((cups_vbcs_t *)k1);
765 map = ((_cups_wide2uni_t *)k2)->widechar;
766
767 return ((int)(key - map));
768}
769
770
771/*
772 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
ef416fc2 773 */
e1d6a774 774
775static int /* O - Count or -1 on error */
776conv_sbcs_to_utf8(
777 cups_utf8_t *dest, /* O - Target string */
778 const cups_sbcs_t *src, /* I - Source string */
779 int maxout, /* I - Max output */
780 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 781{
e1d6a774 782 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
783 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
784 cups_sbcs_t legchar; /* Legacy character value */
785 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
786 *workptr; /* Pointer into string */
787
ef416fc2 788
789 /*
e1d6a774 790 * Find legacy charset map in cache...
ef416fc2 791 */
e1d6a774 792
d6ae789d 793 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
ef416fc2 794 return (-1);
ef416fc2 795
796 /*
e1d6a774 797 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 798 */
ef416fc2 799
e1d6a774 800 work[0] = 0xfeff;
801 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 802 {
e1d6a774 803 legchar = *src++;
ef416fc2 804
805 /*
e1d6a774 806 * Convert ASCII verbatim (optimization)...
ef416fc2 807 */
ef416fc2 808
e1d6a774 809 if (legchar < 0x80)
810 *workptr++ = (cups_utf32_t)legchar;
811 else
ef416fc2 812 {
e1d6a774 813 /*
814 * Convert unknown character to Replacement Character...
815 */
ef416fc2 816
e1d6a774 817 crow = cmap->char2uni + legchar;
818
819 if (!*crow)
820 *workptr++ = 0xfffd;
821 else
822 *workptr++ = (cups_utf32_t)*crow;
ef416fc2 823 }
ef416fc2 824 }
e1d6a774 825
826 *workptr = 0;
827
828 /*
829 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
830 */
831
d6ae789d 832 cmap->used --;
e1d6a774 833
834 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 835}
836
e1d6a774 837
ef416fc2 838/*
e1d6a774 839 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
ef416fc2 840 */
e1d6a774 841
842static int /* O - Count or -1 on error */
843conv_utf8_to_sbcs(
844 cups_sbcs_t *dest, /* O - Target string */
845 const cups_utf8_t *src, /* I - Source string */
846 int maxout, /* I - Max output */
847 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 848{
e1d6a774 849 cups_sbcs_t *start; /* Start of destination string */
850 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
851 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
852 cups_utf32_t unichar; /* Character value */
853 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
854 *workptr; /* Pointer into string */
855
ef416fc2 856
857 /*
e1d6a774 858 * Find legacy charset map in cache...
ef416fc2 859 */
e1d6a774 860
d6ae789d 861 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
ef416fc2 862 return (-1);
ef416fc2 863
864 /*
e1d6a774 865 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 866 */
e1d6a774 867
868 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
869 return (-1);
ef416fc2 870
871 /*
e1d6a774 872 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
ef416fc2 873 */
e1d6a774 874
875 for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
ef416fc2 876 {
e1d6a774 877 unichar = *workptr++;
878 if (!unichar)
ef416fc2 879 break;
ef416fc2 880
881 /*
e1d6a774 882 * Convert ASCII verbatim (optimization)...
ef416fc2 883 */
ef416fc2 884
e1d6a774 885 if (unichar < 0x80)
886 {
887 *dest++ = (cups_sbcs_t)unichar;
888 continue;
889 }
ef416fc2 890
891 /*
e1d6a774 892 * Convert unknown character to visible replacement...
ef416fc2 893 */
ef416fc2 894
e1d6a774 895 srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
ef416fc2 896
e1d6a774 897 if (srow)
898 srow += (int)(unichar & 0xff);
ef416fc2 899
e1d6a774 900 if (!srow || !*srow)
901 *dest++ = '?';
902 else
903 *dest++ = *srow;
ef416fc2 904 }
ef416fc2 905
e1d6a774 906 *dest = '\0';
907
d6ae789d 908 cmap->used --;
e1d6a774 909
910 return ((int)(dest - start));
ef416fc2 911}
912
e1d6a774 913
ef416fc2 914/*
e1d6a774 915 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 916 */
e1d6a774 917
918static int /* O - Count or -1 on error */
919conv_utf8_to_vbcs(
920 cups_sbcs_t *dest, /* O - Target string */
921 const cups_utf8_t *src, /* I - Source string */
922 int maxout, /* I - Max output */
923 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 924{
e1d6a774 925 cups_sbcs_t *start; /* Start of destination string */
926 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
927 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
928 cups_utf32_t unichar; /* Character value */
929 cups_vbcs_t legchar; /* Legacy character value */
930 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
931 *workptr; /* Pointer into string */
ef416fc2 932
ef416fc2 933
934 /*
e1d6a774 935 * Find legacy charset map in cache...
ef416fc2 936 */
ef416fc2 937
d6ae789d 938 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
e1d6a774 939 return (-1);
ef416fc2 940
941 /*
e1d6a774 942 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 943 */
e1d6a774 944
945 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
946 return (-1);
ef416fc2 947
948 /*
e1d6a774 949 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
ef416fc2 950 */
e1d6a774 951
952 for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
ef416fc2 953 {
e1d6a774 954 unichar = *workptr++;
955 if (!unichar)
ef416fc2 956 break;
ef416fc2 957
958 /*
e1d6a774 959 * Convert ASCII verbatim (optimization)...
ef416fc2 960 */
e1d6a774 961
962 if (unichar < 0x80)
963 {
b86bc4cf 964 *dest++ = (cups_sbcs_t)unichar;
e1d6a774 965 continue;
966 }
ef416fc2 967
968 /*
e1d6a774 969 * Convert unknown character to visible replacement...
ef416fc2 970 */
e1d6a774 971
972 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
973
974 if (vrow)
975 vrow += (int)(unichar & 0xff);
976
977 if (!vrow || !*vrow)
978 legchar = (cups_vbcs_t)'?';
979 else
980 legchar = (cups_vbcs_t)*vrow;
ef416fc2 981
982 /*
e1d6a774 983 * Save n-byte legacy character...
ef416fc2 984 */
e1d6a774 985
986 if (legchar > 0xffffff)
ef416fc2 987 {
e1d6a774 988 if (maxout < 5)
989 return (-1);
990
991 *dest++ = (cups_sbcs_t)(legchar >> 24);
992 *dest++ = (cups_sbcs_t)(legchar >> 16);
993 *dest++ = (cups_sbcs_t)(legchar >> 8);
994 *dest++ = (cups_sbcs_t)legchar;
995
996 maxout -= 3;
ef416fc2 997 }
e1d6a774 998 else if (legchar > 0xffff)
999 {
1000 if (maxout < 4)
1001 return (-1);
ef416fc2 1002
e1d6a774 1003 *dest++ = (cups_sbcs_t)(legchar >> 16);
1004 *dest++ = (cups_sbcs_t)(legchar >> 8);
1005 *dest++ = (cups_sbcs_t)legchar;
ef416fc2 1006
e1d6a774 1007 maxout -= 2;
1008 }
1009 else if (legchar > 0xff)
1010 {
1011 *dest++ = (cups_sbcs_t)(legchar >> 8);
1012 *dest++ = (cups_sbcs_t)legchar;
1013
1014 maxout --;
1015 }
ef416fc2 1016 }
e1d6a774 1017
1018 *dest = '\0';
1019
d6ae789d 1020 vmap->used --;
e1d6a774 1021
1022 return ((int)(dest - start));
ef416fc2 1023}
1024
e1d6a774 1025
ef416fc2 1026/*
e1d6a774 1027 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
ef416fc2 1028 */
e1d6a774 1029
1030static int /* O - Count or -1 on error */
1031conv_vbcs_to_utf8(
1032 cups_utf8_t *dest, /* O - Target string */
1033 const cups_sbcs_t *src, /* I - Source string */
1034 int maxout, /* I - Max output */
1035 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 1036{
e1d6a774 1037 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1038 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1039 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1040 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1041 cups_vbcs_t legchar; /* Legacy character value */
1042 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1043 *workptr; /* Pointer into string */
ef416fc2 1044
ef416fc2 1045
1046 /*
e1d6a774 1047 * Find legacy charset map in cache...
ef416fc2 1048 */
ef416fc2 1049
d6ae789d 1050 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
e1d6a774 1051 return (-1);
ef416fc2 1052
1053 /*
e1d6a774 1054 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 1055 */
ef416fc2 1056
e1d6a774 1057 work[0] = 0xfeff;
1058 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 1059 {
e1d6a774 1060 legchar = *src++;
1061 leadchar = (cups_sbcs_t)legchar;
ef416fc2 1062
1063 /*
e1d6a774 1064 * Convert ASCII verbatim (optimization)...
ef416fc2 1065 */
ef416fc2 1066
e1d6a774 1067 if (legchar < 0x80)
ef416fc2 1068 {
e1d6a774 1069 *workptr++ = (cups_utf32_t)legchar;
1070 continue;
ef416fc2 1071 }
1072
1073 /*
e1d6a774 1074 * Convert 2-byte legacy character...
ef416fc2 1075 */
e1d6a774 1076
1077 if (vmap->lead2char[(int)leadchar] == leadchar)
ef416fc2 1078 {
e1d6a774 1079 if (!*src)
1080 return (-1);
1081
1082 legchar = (legchar << 8) | *src++;
1083
ef416fc2 1084 /*
e1d6a774 1085 * Convert unknown character to Replacement Character...
ef416fc2 1086 */
e1d6a774 1087
1088 crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1089 if (crow)
1090 crow += (int) (legchar & 0xff);
1091
1092 if (!crow || !*crow)
1093 *workptr++ = 0xfffd;
1094 else
1095 *workptr++ = (cups_utf32_t)*crow;
1096 continue;
ef416fc2 1097 }
1098
1099 /*
e1d6a774 1100 * Fetch 3-byte or 4-byte legacy character...
ef416fc2 1101 */
e1d6a774 1102
1103 if (vmap->lead3char[(int)leadchar] == leadchar)
ef416fc2 1104 {
e1d6a774 1105 if (!*src || !src[1])
1106 return (-1);
1107
1108 legchar = (legchar << 8) | *src++;
1109 legchar = (legchar << 8) | *src++;
ef416fc2 1110 }
e1d6a774 1111 else if (vmap->lead4char[(int)leadchar] == leadchar)
1112 {
1113 if (!*src || !src[1] || !src[2])
1114 return (-1);
1115
1116 legchar = (legchar << 8) | *src++;
1117 legchar = (legchar << 8) | *src++;
1118 legchar = (legchar << 8) | *src++;
1119 }
1120 else
1121 return (-1);
ef416fc2 1122
1123 /*
e1d6a774 1124 * Find 3-byte or 4-byte legacy character...
ef416fc2 1125 */
e1d6a774 1126
1127 wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1128 vmap->wide2uni,
1129 vmap->widecount,
1130 sizeof(_cups_wide2uni_t),
1131 compare_wide);
ef416fc2 1132
1133 /*
e1d6a774 1134 * Convert unknown character to Replacement Character...
ef416fc2 1135 */
e1d6a774 1136
1137 if (!wide2uni || !wide2uni->unichar)
1138 *workptr++ = 0xfffd;
1139 else
1140 *workptr++ = wide2uni->unichar;
ef416fc2 1141 }
e1d6a774 1142
1143 *workptr = 0;
1144
d6ae789d 1145 vmap->used --;
e1d6a774 1146
1147 /*
1148 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1149 */
1150
1151 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 1152}
1153
e1d6a774 1154
ef416fc2 1155/*
e1d6a774 1156 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
ef416fc2 1157 */
e1d6a774 1158
1159static void
1160free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
ef416fc2 1161{
e1d6a774 1162 int i; /* Looping variable */
ef416fc2 1163
ef416fc2 1164
e1d6a774 1165 for (i = 0; i < 256; i ++)
1166 if (cmap->uni2char[i])
1167 free(cmap->uni2char[i]);
1168
1169 free(cmap);
1170}
1171
1172
1173/*
1174 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1175 */
1176
1177static void
1178free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
1179{
1180 int i; /* Looping variable */
1181
1182
1183 for (i = 0; i < 256; i ++)
1184 if (vmap->char2uni[i])
1185 free(vmap->char2uni[i]);
1186
1187 for (i = 0; i < 256; i ++)
1188 if (vmap->uni2char[i])
1189 free(vmap->uni2char[i]);
1190
1191 if (vmap->wide2uni)
1192 free(vmap->wide2uni);
1193
1194 free(vmap);
1195}
1196
1197
d6ae789d 1198/*
1199 * 'get_charmap()' - Lookup or get a character set map (private).
1200 *
1201 * This code handles single-byte (SBCS), double-byte (DBCS), and
1202 * variable-byte (VBCS) character sets _without_ charset escapes...
1203 * This code does not handle multiple-byte character sets (MBCS)
1204 * (such as ISO-2022-JP) with charset switching via escapes...
1205 */
1206
1207
d09495fa 1208static void * /* O - Charset map pointer */
d6ae789d 1209get_charmap(
1210 const cups_encoding_t encoding) /* I - Encoding */
1211{
1212 char filename[1024]; /* Filename for charset map file */
1213 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1214
1215
1216 /*
1217 * Get the data directory and charset map name...
1218 */
1219
1220 snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1221 cg->cups_datadir, _cupsEncodingName(encoding));
1222
1223 DEBUG_printf((" filename=\"%s\"\n", filename));
1224
1225 /*
1226 * Read charset map input file into cache...
1227 */
1228
1229 if (encoding < CUPS_ENCODING_SBCS_END)
1230 return (get_sbcs_charmap(encoding, filename));
1231 else if (encoding < CUPS_ENCODING_VBCS_END)
1232 return (get_vbcs_charmap(encoding, filename));
1233 else
1234 return (NULL);
1235}
1236
1237
e1d6a774 1238/*
1239 * 'get_charmap_count()' - Count lines in a charmap file.
1240 */
1241
1242static int /* O - Count or -1 on error */
1243get_charmap_count(cups_file_t *fp) /* I - File to read from */
1244{
1245 int count; /* Number of lines */
1246 char line[256]; /* Line from input map file */
ef416fc2 1247
ef416fc2 1248
1249 /*
e1d6a774 1250 * Count lines in map input file...
ef416fc2 1251 */
ef416fc2 1252
e1d6a774 1253 count = 0;
ef416fc2 1254
e1d6a774 1255 while (cupsFileGets(fp, line, sizeof(line)))
1256 if (line[0] == '0')
1257 count ++;
ef416fc2 1258
e1d6a774 1259 /*
1260 * Return the number of lines...
1261 */
1262
1263 if (count > 0)
1264 return (count);
1265 else
1266 return (-1);
ef416fc2 1267}
1268
e1d6a774 1269
ef416fc2 1270/*
e1d6a774 1271 * 'get_sbcs_charmap()' - Get SBCS Charmap.
ef416fc2 1272 */
e1d6a774 1273
1274static _cups_cmap_t * /* O - Charmap or 0 on error */
1275get_sbcs_charmap(
1276 const cups_encoding_t encoding, /* I - Charmap Encoding */
1277 const char *filename) /* I - Charmap Filename */
ef416fc2 1278{
e1d6a774 1279 unsigned long legchar; /* Legacy character value */
1280 cups_utf32_t unichar; /* Unicode character value */
1281 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1282 cups_file_t *fp; /* Charset map file pointer */
1283 char *s; /* Line parsing pointer */
1284 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1285 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1286 char line[256]; /* Line from charset map file */
e1d6a774 1287
ef416fc2 1288
1289 /*
e1d6a774 1290 * See if we already have this SBCS charset map loaded...
ef416fc2 1291 */
e1d6a774 1292
d6ae789d 1293 for (cmap = cmap_cache; cmap; cmap = cmap->next)
e1d6a774 1294 {
1295 if (cmap->encoding == encoding)
1296 {
1297 cmap->used ++;
1298 DEBUG_printf((" returning existing cmap=%p\n", cmap));
d6ae789d 1299
e1d6a774 1300 return ((void *)cmap);
1301 }
1302 }
ef416fc2 1303
1304 /*
e1d6a774 1305 * Open SBCS charset map input file...
ef416fc2 1306 */
e1d6a774 1307
1308 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1309 return (NULL);
ef416fc2 1310
1311 /*
e1d6a774 1312 * Allocate memory for SBCS charset map...
ef416fc2 1313 */
e1d6a774 1314
1315 if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1316 {
1317 cupsFileClose(fp);
1318 DEBUG_puts(" Unable to allocate memory!");
d6ae789d 1319
e1d6a774 1320 return (NULL);
1321 }
1322
1323 cmap->used ++;
1324 cmap->encoding = encoding;
ef416fc2 1325
1326 /*
e1d6a774 1327 * Save SBCS charset map into memory for transcoding...
ef416fc2 1328 */
e1d6a774 1329
1330 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1331 {
e1d6a774 1332 if (line[0] != '0')
1333 continue;
1334
1335 legchar = strtol(line, &s, 16);
1336 if (legchar < 0 || legchar > 0xff)
1337 goto sbcs_error;
1338
1339 unichar = strtol(s, NULL, 16);
1340 if (unichar < 0 || unichar > 0xffff)
1341 goto sbcs_error;
ef416fc2 1342
1343 /*
e1d6a774 1344 * Save legacy to Unicode mapping in direct lookup table...
ef416fc2 1345 */
e1d6a774 1346
1347 crow = cmap->char2uni + legchar;
1348 *crow = (cups_ucs2_t)(unichar & 0xffff);
ef416fc2 1349
1350 /*
e1d6a774 1351 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1352 */
e1d6a774 1353
1354 srow = cmap->uni2char[(unichar >> 8) & 0xff];
1355 if (!srow)
ef416fc2 1356 {
e1d6a774 1357 srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1358 if (!srow)
1359 goto sbcs_error;
1360
1361 cmap->uni2char[(unichar >> 8) & 0xff] = srow;
ef416fc2 1362 }
1363
e1d6a774 1364 srow += unichar & 0xff;
1365
ef416fc2 1366 /*
e1d6a774 1367 * Convert Replacement Character to visible replacement...
ef416fc2 1368 */
e1d6a774 1369
1370 if (unichar == 0xfffd)
1371 legchar = (unsigned long)'?';
ef416fc2 1372
1373 /*
e1d6a774 1374 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1375 */
ef416fc2 1376
e1d6a774 1377 if (!*srow)
1378 *srow = (cups_sbcs_t)legchar;
1379 }
ef416fc2 1380
e1d6a774 1381 cupsFileClose(fp);
1382
ef416fc2 1383 /*
e1d6a774 1384 * Add it to the cache and return...
ef416fc2 1385 */
e1d6a774 1386
d6ae789d 1387 cmap->next = cmap_cache;
1388 cmap_cache = cmap;
e1d6a774 1389
1390 DEBUG_printf((" returning new cmap=%p\n", cmap));
1391
1392 return (cmap);
ef416fc2 1393
1394 /*
e1d6a774 1395 * If we get here, there was an error in the cmap file...
ef416fc2 1396 */
e1d6a774 1397
1398 sbcs_error:
1399
1400 free_sbcs_charmap(cmap);
1401
1402 cupsFileClose(fp);
1403
1404 DEBUG_puts(" Error, returning NULL!");
1405
1406 return (NULL);
1407}
1408
1409
1410/*
1411 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1412 */
1413
1414static _cups_vmap_t * /* O - Charmap or 0 on error */
1415get_vbcs_charmap(
1416 const cups_encoding_t encoding, /* I - Charmap Encoding */
1417 const char *filename) /* I - Charmap Filename */
1418{
1419 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1420 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1421 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1422 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1423 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1424 unsigned long legchar; /* Legacy character value */
1425 cups_utf32_t unichar; /* Unicode character value */
1426 int mapcount; /* Count of lines in charmap file */
1427 cups_file_t *fp; /* Charset map file pointer */
1428 char *s; /* Line parsing pointer */
1429 char line[256]; /* Line from charset map file */
1430 int i; /* Loop variable */
09a101d6 1431 int legacy; /* 32-bit legacy char */
e1d6a774 1432
1433
1434 DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1435 encoding, filename));
ef416fc2 1436
1437 /*
e1d6a774 1438 * See if we already have this DBCS/VBCS charset map loaded...
ef416fc2 1439 */
ef416fc2 1440
d6ae789d 1441 for (vmap = vmap_cache; vmap; vmap = vmap->next)
e1d6a774 1442 {
1443 if (vmap->encoding == encoding)
ef416fc2 1444 {
e1d6a774 1445 vmap->used ++;
1446 DEBUG_printf((" returning existing vmap=%p\n", vmap));
d6ae789d 1447
e1d6a774 1448 return ((void *)vmap);
ef416fc2 1449 }
ef416fc2 1450 }
ef416fc2 1451
1452 /*
e1d6a774 1453 * Open VBCS charset map input file...
ef416fc2 1454 */
ef416fc2 1455
e1d6a774 1456 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1457 {
1458 DEBUG_printf((" Unable to open file: %s\n", strerror(errno)));
d6ae789d 1459
e1d6a774 1460 return (NULL);
1461 }
ef416fc2 1462
1463 /*
e1d6a774 1464 * Count lines in charmap file...
ef416fc2 1465 */
e1d6a774 1466
1467 if ((mapcount = get_charmap_count(fp)) <= 0)
1468 {
1469 DEBUG_puts(" Unable to get charmap count!");
d6ae789d 1470
e1d6a774 1471 return (NULL);
1472 }
1473
1474 DEBUG_printf((" mapcount=%d\n", mapcount));
ef416fc2 1475
1476 /*
e1d6a774 1477 * Allocate memory for DBCS/VBCS charset map...
ef416fc2 1478 */
e1d6a774 1479
1480 if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1481 {
1482 cupsFileClose(fp);
1483 DEBUG_puts(" Unable to allocate memory!");
d6ae789d 1484
e1d6a774 1485 return (NULL);
1486 }
1487
1488 vmap->used ++;
1489 vmap->encoding = encoding;
ef416fc2 1490
1491 /*
e1d6a774 1492 * Save DBCS/VBCS charset map into memory for transcoding...
ef416fc2 1493 */
e1d6a774 1494
1495 leadchar = 0;
1496 wide2uni = NULL;
1497
1498 cupsFileRewind(fp);
1499
09a101d6 1500 i = 0;
1501 legacy = 0;
e1d6a774 1502
1503 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1504 {
e1d6a774 1505 if (line[0] != '0')
1506 continue;
1507
1508 legchar = strtoul(line, &s, 16);
1509 if (legchar == ULONG_MAX)
1510 goto vbcs_error;
1511
1512 unichar = strtol(s, NULL, 16);
1513 if (unichar < 0 || unichar > 0xffff)
1514 goto vbcs_error;
1515
1516 i ++;
1517
1518/* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1519 legchar, (unsigned)unichar)); */
ef416fc2 1520
1521 /*
e1d6a774 1522 * Save lead char of 2/3/4-byte legacy char...
ef416fc2 1523 */
e1d6a774 1524
1525 if (legchar > 0xff && legchar <= 0xffff)
ef416fc2 1526 {
e1d6a774 1527 leadchar = (cups_sbcs_t)(legchar >> 8);
1528 vmap->lead2char[leadchar] = leadchar;
1529 }
1530
1531 if (legchar > 0xffff && legchar <= 0xffffff)
1532 {
1533 leadchar = (cups_sbcs_t)(legchar >> 16);
1534 vmap->lead3char[leadchar] = leadchar;
1535 }
1536
1537 if (legchar > 0xffffff)
1538 {
1539 leadchar = (cups_sbcs_t)(legchar >> 24);
1540 vmap->lead4char[leadchar] = leadchar;
ef416fc2 1541 }
1542
1543 /*
e1d6a774 1544 * Save Legacy to Unicode mapping...
ef416fc2 1545 */
e1d6a774 1546
1547 if (legchar <= 0xffff)
ef416fc2 1548 {
ef416fc2 1549 /*
e1d6a774 1550 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
ef416fc2 1551 */
e1d6a774 1552
1553 crow = vmap->char2uni[(int)leadchar];
1554 if (!crow)
1555 {
1556 crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1557 if (!crow)
1558 goto vbcs_error;
1559
1560 vmap->char2uni[(int)leadchar] = crow;
1561 }
1562
1563 crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1564 }
1565 else
1566 {
1567 /*
1568 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1569 */
1570
09a101d6 1571 if (!legacy)
e1d6a774 1572 {
09a101d6 1573 legacy = 1;
e1d6a774 1574 vmap->widecount = (mapcount - i + 1);
1575 wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
1576 sizeof(_cups_wide2uni_t));
1577 if (!wide2uni)
1578 goto vbcs_error;
1579
1580 vmap->wide2uni = wide2uni;
1581 }
1582
1583 wide2uni->widechar = (cups_vbcs_t)legchar;
1584 wide2uni->unichar = (cups_ucs2_t)unichar;
1585 wide2uni ++;
ef416fc2 1586 }
1587
1588 /*
e1d6a774 1589 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1590 */
e1d6a774 1591
1592 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1593 if (!vrow)
ef416fc2 1594 {
e1d6a774 1595 vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1596 if (!vrow)
1597 goto vbcs_error;
1598
1599 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
ef416fc2 1600 }
e1d6a774 1601
1602 vrow += (int)(unichar & 0xff);
ef416fc2 1603
1604 /*
e1d6a774 1605 * Convert Replacement Character to visible replacement...
ef416fc2 1606 */
e1d6a774 1607
1608 if (unichar == 0xfffd)
1609 legchar = (unsigned long)'?';
ef416fc2 1610
1611 /*
e1d6a774 1612 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1613 */
e1d6a774 1614
1615 if (!*vrow)
1616 *vrow = (cups_vbcs_t)legchar;
ef416fc2 1617 }
e1d6a774 1618
1619 vmap->charcount = (i - vmap->widecount);
1620
1621 cupsFileClose(fp);
ef416fc2 1622
1623 /*
e1d6a774 1624 * Add it to the cache and return...
ef416fc2 1625 */
ef416fc2 1626
d6ae789d 1627 vmap->next = vmap_cache;
1628 vmap_cache = vmap;
e1d6a774 1629
1630 DEBUG_printf((" returning new vmap=%p\n", vmap));
1631
1632 return (vmap);
1633
1634 /*
1635 * If we get here, the file contains errors...
1636 */
1637
1638 vbcs_error:
1639
1640 free_vbcs_charmap(vmap);
1641
1642 cupsFileClose(fp);
1643
1644 DEBUG_puts(" Error, returning NULL!");
1645
1646 return (NULL);
ef416fc2 1647}
1648
1649
1650/*
bc44d920 1651 * End of "$Id: transcode.c 6649 2007-07-11 21:46:42Z mike $"
ef416fc2 1652 */