]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/transcode.c
Load cups into easysw/current.
[thirdparty/cups.git] / cups / transcode.c
CommitLineData
ef416fc2 1/*
d09495fa 2 * "$Id: transcode.c 5838 2006-08-17 14:41:42Z mike $"
ef416fc2 3 *
4 * Transcoding support for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2006 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
13 * Products at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
fa73b229 26 * _cupsCharmapFlush() - Flush all character set maps out of cache.
e1d6a774 27 * _cupsCharmapFree() - Free a character set map.
28 * _cupsCharmapGet() - Get a character set map.
ef416fc2 29 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
e1d6a774 30 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
ef416fc2 31 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
32 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
e1d6a774 33 * compare_wide() - Compare key for wide (VBCS) match.
34 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
ef416fc2 35 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
36 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 37 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
e1d6a774 38 * free_sbcs_charmap() - Free memory used by a single byte character set.
39 * free_vbcs_charmap() - Free memory used by a variable byte character set.
d6ae789d 40 * get_charmap() - Lookup or get a character set map (private).
e1d6a774 41 * get_charmap_count() - Count lines in a charmap file.
42 * get_sbcs_charmap() - Get SBCS Charmap.
43 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
ef416fc2 44 */
45
46/*
47 * Include necessary headers...
48 */
49
50#include "globals.h"
e1d6a774 51#include "debug.h"
e53920b9 52#include <limits.h>
ef416fc2 53#include <stdlib.h>
54#include <errno.h>
55#include <time.h>
56
57
d6ae789d 58/*
59 * Local globals...
60 */
61
62#ifdef HAVE_PTHREAD_H
63static pthread_mutex_t map_mutex = PTHREAD_MUTEX_INITIALIZER;
64 /* Mutex to control access to maps */
65#endif /* HAVE_PTHREAD_H */
66static _cups_cmap_t *cmap_cache = NULL;
67 /* SBCS Charmap Cache */
68static _cups_vmap_t *vmap_cache = NULL;
69 /* VBCS Charmap Cache */
70
71
ef416fc2 72/*
e1d6a774 73 * Local functions...
ef416fc2 74 */
75
e1d6a774 76static int compare_wide(const void *k1, const void *k2);
77static int conv_sbcs_to_utf8(cups_utf8_t *dest,
78 const cups_sbcs_t *src,
79 int maxout,
80 const cups_encoding_t encoding);
81static int conv_utf8_to_sbcs(cups_sbcs_t *dest,
82 const cups_utf8_t *src,
83 int maxout,
84 const cups_encoding_t encoding);
85static int conv_utf8_to_vbcs(cups_sbcs_t *dest,
86 const cups_utf8_t *src,
87 int maxout,
88 const cups_encoding_t encoding);
89static int conv_vbcs_to_utf8(cups_utf8_t *dest,
90 const cups_sbcs_t *src,
91 int maxout,
92 const cups_encoding_t encoding);
93static void free_sbcs_charmap(_cups_cmap_t *sbcs);
94static void free_vbcs_charmap(_cups_vmap_t *vbcs);
d6ae789d 95static void *get_charmap(const cups_encoding_t encoding);
e1d6a774 96static int get_charmap_count(cups_file_t *fp);
97static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
98 const char *filename);
99static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
100 const char *filename);
101
ef416fc2 102
103/*
e1d6a774 104 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
ef416fc2 105 */
106
e1d6a774 107void
d6ae789d 108_cupsCharmapFlush(void)
ef416fc2 109{
e1d6a774 110 _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */
111 *cnext; /* Next Legacy SBCS Charset Map */
112 _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */
113 *vnext; /* Next Legacy VBCS Charset Map */
ef416fc2 114
115
d6ae789d 116#ifdef HAVE_PTHREAD_H
117 pthread_mutex_lock(&map_mutex);
118#endif /* HAVE_PTHREAD_H */
119
ef416fc2 120 /*
e1d6a774 121 * Loop through SBCS charset map cache, free all memory...
ef416fc2 122 */
123
d6ae789d 124 for (cmap = cmap_cache; cmap; cmap = cnext)
e1d6a774 125 {
126 cnext = cmap->next;
ef416fc2 127
e1d6a774 128 free_sbcs_charmap(cmap);
129 }
ef416fc2 130
d6ae789d 131 cmap_cache = NULL;
ef416fc2 132
133 /*
e1d6a774 134 * Loop through DBCS/VBCS charset map cache, free all memory...
ef416fc2 135 */
136
d6ae789d 137 for (vmap = vmap_cache; vmap; vmap = vnext)
e1d6a774 138 {
139 vnext = vmap->next;
140
141 free_vbcs_charmap(vmap);
142
143 free(vmap);
144 }
145
d6ae789d 146 vmap_cache = NULL;
147
148#ifdef HAVE_PTHREAD_H
149 pthread_mutex_unlock(&map_mutex);
150#endif /* HAVE_PTHREAD_H */
ef416fc2 151}
152
e1d6a774 153
ef416fc2 154/*
e1d6a774 155 * '_cupsCharmapFree()' - Free a character set map.
ef416fc2 156 *
e1d6a774 157 * This does not actually free; use '_cupsCharmapFlush()' for that.
ef416fc2 158 */
e1d6a774 159
ef416fc2 160void
e1d6a774 161_cupsCharmapFree(
162 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 163{
e1d6a774 164 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
165 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
e1d6a774 166
ef416fc2 167
168 /*
169 * See if we already have this SBCS charset map loaded...
170 */
e1d6a774 171
d6ae789d 172#ifdef HAVE_PTHREAD_H
173 pthread_mutex_lock(&map_mutex);
174#endif /* HAVE_PTHREAD_H */
175
176 for (cmap = cmap_cache; cmap; cmap = cmap->next)
ef416fc2 177 {
178 if (cmap->encoding == encoding)
179 {
180 if (cmap->used > 0)
181 cmap->used --;
d6ae789d 182 break;
ef416fc2 183 }
184 }
185
186 /*
187 * See if we already have this DBCS/VBCS charset map loaded...
188 */
e1d6a774 189
d6ae789d 190 for (vmap = vmap_cache; vmap; vmap = vmap->next)
ef416fc2 191 {
192 if (vmap->encoding == encoding)
193 {
194 if (vmap->used > 0)
195 vmap->used --;
d6ae789d 196 break;
ef416fc2 197 }
198 }
d6ae789d 199
200#ifdef HAVE_PTHREAD_H
201 pthread_mutex_unlock(&map_mutex);
202#endif /* HAVE_PTHREAD_H */
fa73b229 203}
204
205
206/*
e1d6a774 207 * '_cupsCharmapGet()' - Get a character set map.
208 *
209 * This code handles single-byte (SBCS), double-byte (DBCS), and
210 * variable-byte (VBCS) character sets _without_ charset escapes...
211 * This code does not handle multiple-byte character sets (MBCS)
212 * (such as ISO-2022-JP) with charset switching via escapes...
fa73b229 213 */
214
e1d6a774 215void * /* O - Charset map pointer */
216_cupsCharmapGet(
217 const cups_encoding_t encoding) /* I - Encoding */
fa73b229 218{
d6ae789d 219 void *charmap; /* Charset map pointer */
e1d6a774 220
fa73b229 221
e1d6a774 222 DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
ef416fc2 223
224 /*
e1d6a774 225 * Check for valid arguments...
ef416fc2 226 */
e1d6a774 227
228 if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
ef416fc2 229 {
e1d6a774 230 DEBUG_puts(" Bad encoding, returning NULL!");
231 return (NULL);
ef416fc2 232 }
ef416fc2 233
234 /*
d6ae789d 235 * Lookup or get the charset map pointer and return...
ef416fc2 236 */
e1d6a774 237
d6ae789d 238#ifdef HAVE_PTHREAD_H
239 pthread_mutex_lock(&map_mutex);
240#endif /* HAVE_PTHREAD_H */
e1d6a774 241
d6ae789d 242 charmap = get_charmap(encoding);
e1d6a774 243
d6ae789d 244#ifdef HAVE_PTHREAD_H
245 pthread_mutex_unlock(&map_mutex);
246#endif /* HAVE_PTHREAD_H */
e1d6a774 247
d6ae789d 248 return (charmap);
ef416fc2 249}
250
e1d6a774 251
ef416fc2 252/*
e1d6a774 253 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
ef416fc2 254 *
255 * This code handles single-byte (SBCS), double-byte (DBCS), and
256 * variable-byte (VBCS) character sets _without_ charset escapes...
257 * This code does not handle multiple-byte character sets (MBCS)
258 * (such as ISO-2022-JP) with charset switching via escapes...
259 */
e1d6a774 260
261int /* O - Count or -1 on error */
262cupsCharsetToUTF8(
263 cups_utf8_t *dest, /* O - Target string */
264 const char *src, /* I - Source string */
265 const int maxout, /* I - Max output */
266 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 267{
d6ae789d 268 int bytes; /* Number of bytes converted */
269
270
ef416fc2 271 /*
272 * Check for valid arguments...
273 */
274
e1d6a774 275 DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
276 dest, src, maxout, encoding));
277
278 if (dest)
279 *dest = '\0';
280
ef416fc2 281 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
e1d6a774 282 {
283 DEBUG_puts(" Bad arguments, returning -1");
ef416fc2 284 return (-1);
e1d6a774 285 }
ef416fc2 286
287 /*
288 * Handle identity conversions...
289 */
290
291 if (encoding == CUPS_UTF8 ||
292 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
293 {
e1d6a774 294 strlcpy((char *)dest, src, maxout);
295 return (strlen((char *)dest));
ef416fc2 296 }
297
298 /*
e1d6a774 299 * Convert input legacy charset to UTF-8...
ef416fc2 300 */
e1d6a774 301
d6ae789d 302#ifdef HAVE_PTHREAD_H
303 pthread_mutex_lock(&map_mutex);
304#endif /* HAVE_PTHREAD_H */
305
ef416fc2 306 if (encoding < CUPS_ENCODING_SBCS_END)
d6ae789d 307 bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
ef416fc2 308 else if (encoding < CUPS_ENCODING_VBCS_END)
d6ae789d 309 bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
ef416fc2 310 else
e1d6a774 311 {
d6ae789d 312 DEBUG_puts(" Bad encoding, returning -1");
313 bytes = -1;
e1d6a774 314 }
d6ae789d 315
316#ifdef HAVE_PTHREAD_H
317 pthread_mutex_unlock(&map_mutex);
318#endif /* HAVE_PTHREAD_H */
319
320 return (bytes);
ef416fc2 321}
322
e1d6a774 323
ef416fc2 324/*
e1d6a774 325 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
ef416fc2 326 *
327 * This code handles single-byte (SBCS), double-byte (DBCS), and
328 * variable-byte (VBCS) character sets _without_ charset escapes...
329 * This code does not handle multiple-byte character sets (MBCS)
330 * (such as ISO-2022-JP) with charset switching via escapes...
331 */
e1d6a774 332
333int /* O - Count or -1 on error */
334cupsUTF8ToCharset(
335 char *dest, /* O - Target string */
336 const cups_utf8_t *src, /* I - Source string */
337 const int maxout, /* I - Max output */
338 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 339{
d6ae789d 340 int bytes; /* Number of bytes converted */
341
342
ef416fc2 343 /*
344 * Check for valid arguments...
345 */
346
347 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
e1d6a774 348 {
349 if (dest)
350 *dest = '\0';
351
ef416fc2 352 return (-1);
e1d6a774 353 }
ef416fc2 354
355 /*
356 * Handle identity conversions...
357 */
358
359 if (encoding == CUPS_UTF8 ||
360 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
361 {
e1d6a774 362 strlcpy(dest, (char *)src, maxout);
363 return (strlen(dest));
ef416fc2 364 }
365
366 /*
e1d6a774 367 * Convert input UTF-8 to legacy charset...
ef416fc2 368 */
e1d6a774 369
d6ae789d 370#ifdef HAVE_PTHREAD_H
371 pthread_mutex_lock(&map_mutex);
372#endif /* HAVE_PTHREAD_H */
373
ef416fc2 374 if (encoding < CUPS_ENCODING_SBCS_END)
d6ae789d 375 bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
ef416fc2 376 else if (encoding < CUPS_ENCODING_VBCS_END)
d6ae789d 377 bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
ef416fc2 378 else
d6ae789d 379 bytes = -1;
380
381#ifdef HAVE_PTHREAD_H
382 pthread_mutex_unlock(&map_mutex);
383#endif /* HAVE_PTHREAD_H */
384
385 return (bytes);
ef416fc2 386}
387
ef416fc2 388
389/*
390 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
391 *
392 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
393 *
394 * UTF-32 char UTF-8 char(s)
395 * --------------------------------------------------
e1d6a774 396 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 397 * 128 to 2047 = 110xxxxx 10yyyyyy
398 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 399 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 400 *
401 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
402 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 403 */
e1d6a774 404
405int /* O - Count or -1 on error */
406cupsUTF8ToUTF32(
407 cups_utf32_t *dest, /* O - Target string */
408 const cups_utf8_t *src, /* I - Source string */
409 const int maxout) /* I - Max output */
ef416fc2 410{
e1d6a774 411 int i; /* Looping variable */
412 cups_utf8_t ch; /* Character value */
413 cups_utf8_t next; /* Next character value */
414 cups_utf32_t ch32; /* UTF-32 character value */
415
ef416fc2 416
417 /*
418 * Check for valid arguments and clear output...
419 */
e1d6a774 420
421 if (dest)
422 *dest = 0;
423
424 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
ef416fc2 425 return (-1);
ef416fc2 426
427 /*
428 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
429 */
e1d6a774 430
431 *dest++ = 0xfeff;
e1d6a774 432
433 for (i = maxout - 1; *src && i > 0; i --)
ef416fc2 434 {
e1d6a774 435 ch = *src++;
ef416fc2 436
437 /*
438 * Convert UTF-8 character(s) to UTF-32 character...
439 */
e1d6a774 440
441 if (!(ch & 0x80))
ef416fc2 442 {
443 /*
444 * One-octet UTF-8 <= 127 (US-ASCII)...
445 */
e1d6a774 446
447 *dest++ = ch;
ef416fc2 448 }
449 else if ((ch & 0xe0) == 0xc0)
450 {
451 /*
452 * Two-octet UTF-8 <= 2047 (Latin-x)...
453 */
e1d6a774 454
455 next = *src++;
456 if (!next)
ef416fc2 457 return (-1);
e1d6a774 458
ef416fc2 459 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
460
461 /*
462 * Check for non-shortest form (invalid UTF-8)...
463 */
e1d6a774 464
465 if (ch32 < 0x80)
ef416fc2 466 return (-1);
e1d6a774 467
468 *dest++ = ch32;
ef416fc2 469 }
470 else if ((ch & 0xf0) == 0xe0)
471 {
472 /*
473 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
474 */
e1d6a774 475
476 next = *src++;
477 if (!next)
ef416fc2 478 return (-1);
e1d6a774 479
480 ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
481
482 next = *src++;
483 if (!next)
ef416fc2 484 return (-1);
e1d6a774 485
486 ch32 = (ch32 << 6) | (next & 0x3f);
ef416fc2 487
488 /*
489 * Check for non-shortest form (invalid UTF-8)...
490 */
e1d6a774 491
492 if (ch32 < 0x800)
ef416fc2 493 return (-1);
e1d6a774 494
495 *dest++ = ch32;
ef416fc2 496 }
497 else if ((ch & 0xf8) == 0xf0)
498 {
499 /*
e1d6a774 500 * Four-octet UTF-8...
ef416fc2 501 */
e1d6a774 502
503 next = *src++;
504 if (!next)
ef416fc2 505 return (-1);
e1d6a774 506
507 ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
508
509 next = *src++;
510 if (!next)
511 return (-1);
512
513 ch32 = (ch32 << 6) | (next & 0x3f);
514
515 next = *src++;
516 if (!next)
517 return (-1);
518
519 ch32 = (ch32 << 6) | (next & 0x3f);
520
ef416fc2 521 /*
e1d6a774 522 * Check for non-shortest form (invalid UTF-8)...
ef416fc2 523 */
e1d6a774 524
525 if (ch32 < 0x10000)
526 return (-1);
527
528 *dest++ = ch32;
ef416fc2 529 }
530 else
531 {
532 /*
e1d6a774 533 * More than 4-octet (invalid UTF-8 sequence)...
ef416fc2 534 */
e1d6a774 535
ef416fc2 536 return (-1);
537 }
538
539 /*
540 * Check for UTF-16 surrogate (illegal UTF-8)...
541 */
ef416fc2 542
e1d6a774 543 if (*dest >= 0xd800 && *dest <= 0xdfff)
ef416fc2 544 return (-1);
545 }
e1d6a774 546
ef416fc2 547 *dest = 0;
e1d6a774 548
ef416fc2 549 return (i);
550}
551
e1d6a774 552
ef416fc2 553/*
554 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
555 *
556 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
557 *
558 * UTF-32 char UTF-8 char(s)
559 * --------------------------------------------------
e1d6a774 560 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 561 * 128 to 2047 = 110xxxxx 10yyyyyy
562 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 563 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 564 *
565 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
566 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 567 */
e1d6a774 568
569int /* O - Count or -1 on error */
570cupsUTF32ToUTF8(
571 cups_utf8_t *dest, /* O - Target string */
572 const cups_utf32_t *src, /* I - Source string */
573 const int maxout) /* I - Max output */
ef416fc2 574{
e1d6a774 575 cups_utf8_t *start; /* Start of destination string */
576 int i; /* Looping variable */
577 int swap; /* Byte-swap input to output */
578 cups_utf32_t ch; /* Character value */
579
ef416fc2 580
581 /*
582 * Check for valid arguments and clear output...
583 */
e1d6a774 584
585 if (dest)
586 *dest = '\0';
587
588 if (!dest || !src || maxout < 1)
ef416fc2 589 return (-1);
ef416fc2 590
591 /*
592 * Check for leading BOM in UTF-32 and inverted BOM...
593 */
e1d6a774 594
595 start = dest;
596 swap = *src == 0xfffe0000;
597
598 if (*src == 0xfffe0000 || *src == 0xfeff)
599 src ++;
ef416fc2 600
601 /*
602 * Convert input UTF-32 to output UTF-8...
603 */
e1d6a774 604
605 for (i = maxout - 1; *src && i > 0;)
ef416fc2 606 {
e1d6a774 607 ch = *src++;
ef416fc2 608
609 /*
610 * Byte swap input UTF-32, if necessary...
e1d6a774 611 * (only byte-swapping 24 of 32 bits)
ef416fc2 612 */
e1d6a774 613
ef416fc2 614 if (swap)
615 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
616
617 /*
e1d6a774 618 * Check for beyond Plane 16 (invalid UTF-32)...
ef416fc2 619 */
ef416fc2 620
ef416fc2 621 if (ch > 0x10ffff)
622 return (-1);
623
ef416fc2 624 /*
625 * Convert UTF-32 character to UTF-8 character(s)...
626 */
e1d6a774 627
628 if (ch < 0x80)
ef416fc2 629 {
630 /*
631 * One-octet UTF-8 <= 127 (US-ASCII)...
632 */
e1d6a774 633
634 *dest++ = (cups_utf8_t)ch;
635 i --;
ef416fc2 636 }
e1d6a774 637 else if (ch < 0x800)
ef416fc2 638 {
639 /*
640 * Two-octet UTF-8 <= 2047 (Latin-x)...
641 */
e1d6a774 642
643 if (i < 2)
644 return (-1);
645
646 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
647 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
648 i -= 2;
ef416fc2 649 }
e1d6a774 650 else if (ch < 0x10000)
ef416fc2 651 {
652 /*
653 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
654 */
e1d6a774 655
656 if (i < 3)
657 return (-1);
658
659 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
660 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
661 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
662 i -= 3;
663 }
664 else
665 {
666 /*
667 * Four-octet UTF-8...
668 */
669
670 if (i < 4)
671 return (-1);
672
673 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
674 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
675 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
676 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
677 i -= 4;
ef416fc2 678 }
679 }
e1d6a774 680
ef416fc2 681 *dest = '\0';
e1d6a774 682
683 return ((int)(dest - start));
ef416fc2 684}
685
e1d6a774 686
ef416fc2 687/*
e1d6a774 688 * 'compare_wide()' - Compare key for wide (VBCS) match.
689 */
690
691static int
692compare_wide(const void *k1, /* I - Key char */
693 const void *k2) /* I - Map char */
694{
695 cups_vbcs_t key; /* Legacy key character */
696 cups_vbcs_t map; /* Legacy map character */
697
698
699 key = *((cups_vbcs_t *)k1);
700 map = ((_cups_wide2uni_t *)k2)->widechar;
701
702 return ((int)(key - map));
703}
704
705
706/*
707 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
ef416fc2 708 */
e1d6a774 709
710static int /* O - Count or -1 on error */
711conv_sbcs_to_utf8(
712 cups_utf8_t *dest, /* O - Target string */
713 const cups_sbcs_t *src, /* I - Source string */
714 int maxout, /* I - Max output */
715 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 716{
e1d6a774 717 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
718 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
719 cups_sbcs_t legchar; /* Legacy character value */
720 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
721 *workptr; /* Pointer into string */
722
ef416fc2 723
724 /*
e1d6a774 725 * Find legacy charset map in cache...
ef416fc2 726 */
e1d6a774 727
d6ae789d 728 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
ef416fc2 729 return (-1);
ef416fc2 730
731 /*
e1d6a774 732 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 733 */
ef416fc2 734
e1d6a774 735 work[0] = 0xfeff;
736 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 737 {
e1d6a774 738 legchar = *src++;
ef416fc2 739
740 /*
e1d6a774 741 * Convert ASCII verbatim (optimization)...
ef416fc2 742 */
ef416fc2 743
e1d6a774 744 if (legchar < 0x80)
745 *workptr++ = (cups_utf32_t)legchar;
746 else
ef416fc2 747 {
e1d6a774 748 /*
749 * Convert unknown character to Replacement Character...
750 */
ef416fc2 751
e1d6a774 752 crow = cmap->char2uni + legchar;
753
754 if (!*crow)
755 *workptr++ = 0xfffd;
756 else
757 *workptr++ = (cups_utf32_t)*crow;
ef416fc2 758 }
ef416fc2 759 }
e1d6a774 760
761 *workptr = 0;
762
763 /*
764 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
765 */
766
d6ae789d 767 cmap->used --;
e1d6a774 768
769 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 770}
771
e1d6a774 772
ef416fc2 773/*
e1d6a774 774 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
ef416fc2 775 */
e1d6a774 776
777static int /* O - Count or -1 on error */
778conv_utf8_to_sbcs(
779 cups_sbcs_t *dest, /* O - Target string */
780 const cups_utf8_t *src, /* I - Source string */
781 int maxout, /* I - Max output */
782 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 783{
e1d6a774 784 cups_sbcs_t *start; /* Start of destination string */
785 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
786 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
787 cups_utf32_t unichar; /* Character value */
788 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
789 *workptr; /* Pointer into string */
790
ef416fc2 791
792 /*
e1d6a774 793 * Find legacy charset map in cache...
ef416fc2 794 */
e1d6a774 795
d6ae789d 796 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
ef416fc2 797 return (-1);
ef416fc2 798
799 /*
e1d6a774 800 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 801 */
e1d6a774 802
803 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
804 return (-1);
ef416fc2 805
806 /*
e1d6a774 807 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
ef416fc2 808 */
e1d6a774 809
810 for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
ef416fc2 811 {
e1d6a774 812 unichar = *workptr++;
813 if (!unichar)
ef416fc2 814 break;
ef416fc2 815
816 /*
e1d6a774 817 * Convert ASCII verbatim (optimization)...
ef416fc2 818 */
ef416fc2 819
e1d6a774 820 if (unichar < 0x80)
821 {
822 *dest++ = (cups_sbcs_t)unichar;
823 continue;
824 }
ef416fc2 825
826 /*
e1d6a774 827 * Convert unknown character to visible replacement...
ef416fc2 828 */
ef416fc2 829
e1d6a774 830 srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
ef416fc2 831
e1d6a774 832 if (srow)
833 srow += (int)(unichar & 0xff);
ef416fc2 834
e1d6a774 835 if (!srow || !*srow)
836 *dest++ = '?';
837 else
838 *dest++ = *srow;
ef416fc2 839 }
ef416fc2 840
e1d6a774 841 *dest = '\0';
842
d6ae789d 843 cmap->used --;
e1d6a774 844
845 return ((int)(dest - start));
ef416fc2 846}
847
e1d6a774 848
ef416fc2 849/*
e1d6a774 850 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 851 */
e1d6a774 852
853static int /* O - Count or -1 on error */
854conv_utf8_to_vbcs(
855 cups_sbcs_t *dest, /* O - Target string */
856 const cups_utf8_t *src, /* I - Source string */
857 int maxout, /* I - Max output */
858 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 859{
e1d6a774 860 cups_sbcs_t *start; /* Start of destination string */
861 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
862 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
863 cups_utf32_t unichar; /* Character value */
864 cups_vbcs_t legchar; /* Legacy character value */
865 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
866 *workptr; /* Pointer into string */
ef416fc2 867
ef416fc2 868
869 /*
e1d6a774 870 * Find legacy charset map in cache...
ef416fc2 871 */
ef416fc2 872
d6ae789d 873 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
e1d6a774 874 return (-1);
ef416fc2 875
876 /*
e1d6a774 877 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 878 */
e1d6a774 879
880 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
881 return (-1);
ef416fc2 882
883 /*
e1d6a774 884 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
ef416fc2 885 */
e1d6a774 886
887 for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
ef416fc2 888 {
e1d6a774 889 unichar = *workptr++;
890 if (!unichar)
ef416fc2 891 break;
ef416fc2 892
893 /*
e1d6a774 894 * Convert ASCII verbatim (optimization)...
ef416fc2 895 */
e1d6a774 896
897 if (unichar < 0x80)
898 {
899 *dest++ = (cups_vbcs_t)unichar;
900 continue;
901 }
ef416fc2 902
903 /*
e1d6a774 904 * Convert unknown character to visible replacement...
ef416fc2 905 */
e1d6a774 906
907 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
908
909 if (vrow)
910 vrow += (int)(unichar & 0xff);
911
912 if (!vrow || !*vrow)
913 legchar = (cups_vbcs_t)'?';
914 else
915 legchar = (cups_vbcs_t)*vrow;
ef416fc2 916
917 /*
e1d6a774 918 * Save n-byte legacy character...
ef416fc2 919 */
e1d6a774 920
921 if (legchar > 0xffffff)
ef416fc2 922 {
e1d6a774 923 if (maxout < 5)
924 return (-1);
925
926 *dest++ = (cups_sbcs_t)(legchar >> 24);
927 *dest++ = (cups_sbcs_t)(legchar >> 16);
928 *dest++ = (cups_sbcs_t)(legchar >> 8);
929 *dest++ = (cups_sbcs_t)legchar;
930
931 maxout -= 3;
ef416fc2 932 }
e1d6a774 933 else if (legchar > 0xffff)
934 {
935 if (maxout < 4)
936 return (-1);
ef416fc2 937
e1d6a774 938 *dest++ = (cups_sbcs_t)(legchar >> 16);
939 *dest++ = (cups_sbcs_t)(legchar >> 8);
940 *dest++ = (cups_sbcs_t)legchar;
ef416fc2 941
e1d6a774 942 maxout -= 2;
943 }
944 else if (legchar > 0xff)
945 {
946 *dest++ = (cups_sbcs_t)(legchar >> 8);
947 *dest++ = (cups_sbcs_t)legchar;
948
949 maxout --;
950 }
ef416fc2 951 }
e1d6a774 952
953 *dest = '\0';
954
d6ae789d 955 vmap->used --;
e1d6a774 956
957 return ((int)(dest - start));
ef416fc2 958}
959
e1d6a774 960
ef416fc2 961/*
e1d6a774 962 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
ef416fc2 963 */
e1d6a774 964
965static int /* O - Count or -1 on error */
966conv_vbcs_to_utf8(
967 cups_utf8_t *dest, /* O - Target string */
968 const cups_sbcs_t *src, /* I - Source string */
969 int maxout, /* I - Max output */
970 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 971{
e1d6a774 972 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
973 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
974 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
975 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
976 cups_vbcs_t legchar; /* Legacy character value */
977 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
978 *workptr; /* Pointer into string */
ef416fc2 979
ef416fc2 980
981 /*
e1d6a774 982 * Find legacy charset map in cache...
ef416fc2 983 */
ef416fc2 984
d6ae789d 985 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
e1d6a774 986 return (-1);
ef416fc2 987
988 /*
e1d6a774 989 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 990 */
ef416fc2 991
e1d6a774 992 work[0] = 0xfeff;
993 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 994 {
e1d6a774 995 legchar = *src++;
996 leadchar = (cups_sbcs_t)legchar;
ef416fc2 997
998 /*
e1d6a774 999 * Convert ASCII verbatim (optimization)...
ef416fc2 1000 */
ef416fc2 1001
e1d6a774 1002 if (legchar < 0x80)
ef416fc2 1003 {
e1d6a774 1004 *workptr++ = (cups_utf32_t)legchar;
1005 continue;
ef416fc2 1006 }
1007
1008 /*
e1d6a774 1009 * Convert 2-byte legacy character...
ef416fc2 1010 */
e1d6a774 1011
1012 if (vmap->lead2char[(int)leadchar] == leadchar)
ef416fc2 1013 {
e1d6a774 1014 if (!*src)
1015 return (-1);
1016
1017 legchar = (legchar << 8) | *src++;
1018
ef416fc2 1019 /*
e1d6a774 1020 * Convert unknown character to Replacement Character...
ef416fc2 1021 */
e1d6a774 1022
1023 crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1024 if (crow)
1025 crow += (int) (legchar & 0xff);
1026
1027 if (!crow || !*crow)
1028 *workptr++ = 0xfffd;
1029 else
1030 *workptr++ = (cups_utf32_t)*crow;
1031 continue;
ef416fc2 1032 }
1033
1034 /*
e1d6a774 1035 * Fetch 3-byte or 4-byte legacy character...
ef416fc2 1036 */
e1d6a774 1037
1038 if (vmap->lead3char[(int)leadchar] == leadchar)
ef416fc2 1039 {
e1d6a774 1040 if (!*src || !src[1])
1041 return (-1);
1042
1043 legchar = (legchar << 8) | *src++;
1044 legchar = (legchar << 8) | *src++;
ef416fc2 1045 }
e1d6a774 1046 else if (vmap->lead4char[(int)leadchar] == leadchar)
1047 {
1048 if (!*src || !src[1] || !src[2])
1049 return (-1);
1050
1051 legchar = (legchar << 8) | *src++;
1052 legchar = (legchar << 8) | *src++;
1053 legchar = (legchar << 8) | *src++;
1054 }
1055 else
1056 return (-1);
ef416fc2 1057
1058 /*
e1d6a774 1059 * Find 3-byte or 4-byte legacy character...
ef416fc2 1060 */
e1d6a774 1061
1062 wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1063 vmap->wide2uni,
1064 vmap->widecount,
1065 sizeof(_cups_wide2uni_t),
1066 compare_wide);
ef416fc2 1067
1068 /*
e1d6a774 1069 * Convert unknown character to Replacement Character...
ef416fc2 1070 */
e1d6a774 1071
1072 if (!wide2uni || !wide2uni->unichar)
1073 *workptr++ = 0xfffd;
1074 else
1075 *workptr++ = wide2uni->unichar;
ef416fc2 1076 }
e1d6a774 1077
1078 *workptr = 0;
1079
d6ae789d 1080 vmap->used --;
e1d6a774 1081
1082 /*
1083 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1084 */
1085
1086 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 1087}
1088
e1d6a774 1089
ef416fc2 1090/*
e1d6a774 1091 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
ef416fc2 1092 */
e1d6a774 1093
1094static void
1095free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
ef416fc2 1096{
e1d6a774 1097 int i; /* Looping variable */
ef416fc2 1098
ef416fc2 1099
e1d6a774 1100 for (i = 0; i < 256; i ++)
1101 if (cmap->uni2char[i])
1102 free(cmap->uni2char[i]);
1103
1104 free(cmap);
1105}
1106
1107
1108/*
1109 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1110 */
1111
1112static void
1113free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
1114{
1115 int i; /* Looping variable */
1116
1117
1118 for (i = 0; i < 256; i ++)
1119 if (vmap->char2uni[i])
1120 free(vmap->char2uni[i]);
1121
1122 for (i = 0; i < 256; i ++)
1123 if (vmap->uni2char[i])
1124 free(vmap->uni2char[i]);
1125
1126 if (vmap->wide2uni)
1127 free(vmap->wide2uni);
1128
1129 free(vmap);
1130}
1131
1132
d6ae789d 1133/*
1134 * 'get_charmap()' - Lookup or get a character set map (private).
1135 *
1136 * This code handles single-byte (SBCS), double-byte (DBCS), and
1137 * variable-byte (VBCS) character sets _without_ charset escapes...
1138 * This code does not handle multiple-byte character sets (MBCS)
1139 * (such as ISO-2022-JP) with charset switching via escapes...
1140 */
1141
1142
d09495fa 1143static void * /* O - Charset map pointer */
d6ae789d 1144get_charmap(
1145 const cups_encoding_t encoding) /* I - Encoding */
1146{
1147 char filename[1024]; /* Filename for charset map file */
1148 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1149
1150
1151 /*
1152 * Get the data directory and charset map name...
1153 */
1154
1155 snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1156 cg->cups_datadir, _cupsEncodingName(encoding));
1157
1158 DEBUG_printf((" filename=\"%s\"\n", filename));
1159
1160 /*
1161 * Read charset map input file into cache...
1162 */
1163
1164 if (encoding < CUPS_ENCODING_SBCS_END)
1165 return (get_sbcs_charmap(encoding, filename));
1166 else if (encoding < CUPS_ENCODING_VBCS_END)
1167 return (get_vbcs_charmap(encoding, filename));
1168 else
1169 return (NULL);
1170}
1171
1172
e1d6a774 1173/*
1174 * 'get_charmap_count()' - Count lines in a charmap file.
1175 */
1176
1177static int /* O - Count or -1 on error */
1178get_charmap_count(cups_file_t *fp) /* I - File to read from */
1179{
1180 int count; /* Number of lines */
1181 char line[256]; /* Line from input map file */
ef416fc2 1182
ef416fc2 1183
1184 /*
e1d6a774 1185 * Count lines in map input file...
ef416fc2 1186 */
ef416fc2 1187
e1d6a774 1188 count = 0;
ef416fc2 1189
e1d6a774 1190 while (cupsFileGets(fp, line, sizeof(line)))
1191 if (line[0] == '0')
1192 count ++;
ef416fc2 1193
e1d6a774 1194 /*
1195 * Return the number of lines...
1196 */
1197
1198 if (count > 0)
1199 return (count);
1200 else
1201 return (-1);
ef416fc2 1202}
1203
e1d6a774 1204
ef416fc2 1205/*
e1d6a774 1206 * 'get_sbcs_charmap()' - Get SBCS Charmap.
ef416fc2 1207 */
e1d6a774 1208
1209static _cups_cmap_t * /* O - Charmap or 0 on error */
1210get_sbcs_charmap(
1211 const cups_encoding_t encoding, /* I - Charmap Encoding */
1212 const char *filename) /* I - Charmap Filename */
ef416fc2 1213{
e1d6a774 1214 unsigned long legchar; /* Legacy character value */
1215 cups_utf32_t unichar; /* Unicode character value */
1216 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1217 cups_file_t *fp; /* Charset map file pointer */
1218 char *s; /* Line parsing pointer */
1219 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1220 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1221 char line[256]; /* Line from charset map file */
e1d6a774 1222
ef416fc2 1223
1224 /*
e1d6a774 1225 * See if we already have this SBCS charset map loaded...
ef416fc2 1226 */
e1d6a774 1227
d6ae789d 1228 for (cmap = cmap_cache; cmap; cmap = cmap->next)
e1d6a774 1229 {
1230 if (cmap->encoding == encoding)
1231 {
1232 cmap->used ++;
1233 DEBUG_printf((" returning existing cmap=%p\n", cmap));
d6ae789d 1234
e1d6a774 1235 return ((void *)cmap);
1236 }
1237 }
ef416fc2 1238
1239 /*
e1d6a774 1240 * Open SBCS charset map input file...
ef416fc2 1241 */
e1d6a774 1242
1243 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1244 return (NULL);
ef416fc2 1245
1246 /*
e1d6a774 1247 * Allocate memory for SBCS charset map...
ef416fc2 1248 */
e1d6a774 1249
1250 if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1251 {
1252 cupsFileClose(fp);
1253 DEBUG_puts(" Unable to allocate memory!");
d6ae789d 1254
e1d6a774 1255 return (NULL);
1256 }
1257
1258 cmap->used ++;
1259 cmap->encoding = encoding;
ef416fc2 1260
1261 /*
e1d6a774 1262 * Save SBCS charset map into memory for transcoding...
ef416fc2 1263 */
e1d6a774 1264
1265 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1266 {
e1d6a774 1267 if (line[0] != '0')
1268 continue;
1269
1270 legchar = strtol(line, &s, 16);
1271 if (legchar < 0 || legchar > 0xff)
1272 goto sbcs_error;
1273
1274 unichar = strtol(s, NULL, 16);
1275 if (unichar < 0 || unichar > 0xffff)
1276 goto sbcs_error;
ef416fc2 1277
1278 /*
e1d6a774 1279 * Save legacy to Unicode mapping in direct lookup table...
ef416fc2 1280 */
e1d6a774 1281
1282 crow = cmap->char2uni + legchar;
1283 *crow = (cups_ucs2_t)(unichar & 0xffff);
ef416fc2 1284
1285 /*
e1d6a774 1286 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1287 */
e1d6a774 1288
1289 srow = cmap->uni2char[(unichar >> 8) & 0xff];
1290 if (!srow)
ef416fc2 1291 {
e1d6a774 1292 srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1293 if (!srow)
1294 goto sbcs_error;
1295
1296 cmap->uni2char[(unichar >> 8) & 0xff] = srow;
ef416fc2 1297 }
1298
e1d6a774 1299 srow += unichar & 0xff;
1300
ef416fc2 1301 /*
e1d6a774 1302 * Convert Replacement Character to visible replacement...
ef416fc2 1303 */
e1d6a774 1304
1305 if (unichar == 0xfffd)
1306 legchar = (unsigned long)'?';
ef416fc2 1307
1308 /*
e1d6a774 1309 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1310 */
ef416fc2 1311
e1d6a774 1312 if (!*srow)
1313 *srow = (cups_sbcs_t)legchar;
1314 }
ef416fc2 1315
e1d6a774 1316 cupsFileClose(fp);
1317
ef416fc2 1318 /*
e1d6a774 1319 * Add it to the cache and return...
ef416fc2 1320 */
e1d6a774 1321
d6ae789d 1322 cmap->next = cmap_cache;
1323 cmap_cache = cmap;
e1d6a774 1324
1325 DEBUG_printf((" returning new cmap=%p\n", cmap));
1326
1327 return (cmap);
ef416fc2 1328
1329 /*
e1d6a774 1330 * If we get here, there was an error in the cmap file...
ef416fc2 1331 */
e1d6a774 1332
1333 sbcs_error:
1334
1335 free_sbcs_charmap(cmap);
1336
1337 cupsFileClose(fp);
1338
1339 DEBUG_puts(" Error, returning NULL!");
1340
1341 return (NULL);
1342}
1343
1344
1345/*
1346 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1347 */
1348
1349static _cups_vmap_t * /* O - Charmap or 0 on error */
1350get_vbcs_charmap(
1351 const cups_encoding_t encoding, /* I - Charmap Encoding */
1352 const char *filename) /* I - Charmap Filename */
1353{
1354 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1355 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1356 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1357 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1358 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1359 unsigned long legchar; /* Legacy character value */
1360 cups_utf32_t unichar; /* Unicode character value */
1361 int mapcount; /* Count of lines in charmap file */
1362 cups_file_t *fp; /* Charset map file pointer */
1363 char *s; /* Line parsing pointer */
1364 char line[256]; /* Line from charset map file */
1365 int i; /* Loop variable */
1366 int wide; /* 32-bit legacy char */
e1d6a774 1367
1368
1369 DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1370 encoding, filename));
ef416fc2 1371
1372 /*
e1d6a774 1373 * See if we already have this DBCS/VBCS charset map loaded...
ef416fc2 1374 */
ef416fc2 1375
d6ae789d 1376 for (vmap = vmap_cache; vmap; vmap = vmap->next)
e1d6a774 1377 {
1378 if (vmap->encoding == encoding)
ef416fc2 1379 {
e1d6a774 1380 vmap->used ++;
1381 DEBUG_printf((" returning existing vmap=%p\n", vmap));
d6ae789d 1382
e1d6a774 1383 return ((void *)vmap);
ef416fc2 1384 }
ef416fc2 1385 }
ef416fc2 1386
1387 /*
e1d6a774 1388 * Open VBCS charset map input file...
ef416fc2 1389 */
ef416fc2 1390
e1d6a774 1391 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1392 {
1393 DEBUG_printf((" Unable to open file: %s\n", strerror(errno)));
d6ae789d 1394
e1d6a774 1395 return (NULL);
1396 }
ef416fc2 1397
1398 /*
e1d6a774 1399 * Count lines in charmap file...
ef416fc2 1400 */
e1d6a774 1401
1402 if ((mapcount = get_charmap_count(fp)) <= 0)
1403 {
1404 DEBUG_puts(" Unable to get charmap count!");
d6ae789d 1405
e1d6a774 1406 return (NULL);
1407 }
1408
1409 DEBUG_printf((" mapcount=%d\n", mapcount));
ef416fc2 1410
1411 /*
e1d6a774 1412 * Allocate memory for DBCS/VBCS charset map...
ef416fc2 1413 */
e1d6a774 1414
1415 if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1416 {
1417 cupsFileClose(fp);
1418 DEBUG_puts(" Unable to allocate memory!");
d6ae789d 1419
e1d6a774 1420 return (NULL);
1421 }
1422
1423 vmap->used ++;
1424 vmap->encoding = encoding;
ef416fc2 1425
1426 /*
e1d6a774 1427 * Save DBCS/VBCS charset map into memory for transcoding...
ef416fc2 1428 */
e1d6a774 1429
1430 leadchar = 0;
1431 wide2uni = NULL;
1432
1433 cupsFileRewind(fp);
1434
1435 i = 0;
1436 wide = 0;
1437
1438 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1439 {
e1d6a774 1440 if (line[0] != '0')
1441 continue;
1442
1443 legchar = strtoul(line, &s, 16);
1444 if (legchar == ULONG_MAX)
1445 goto vbcs_error;
1446
1447 unichar = strtol(s, NULL, 16);
1448 if (unichar < 0 || unichar > 0xffff)
1449 goto vbcs_error;
1450
1451 i ++;
1452
1453/* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1454 legchar, (unsigned)unichar)); */
ef416fc2 1455
1456 /*
e1d6a774 1457 * Save lead char of 2/3/4-byte legacy char...
ef416fc2 1458 */
e1d6a774 1459
1460 if (legchar > 0xff && legchar <= 0xffff)
ef416fc2 1461 {
e1d6a774 1462 leadchar = (cups_sbcs_t)(legchar >> 8);
1463 vmap->lead2char[leadchar] = leadchar;
1464 }
1465
1466 if (legchar > 0xffff && legchar <= 0xffffff)
1467 {
1468 leadchar = (cups_sbcs_t)(legchar >> 16);
1469 vmap->lead3char[leadchar] = leadchar;
1470 }
1471
1472 if (legchar > 0xffffff)
1473 {
1474 leadchar = (cups_sbcs_t)(legchar >> 24);
1475 vmap->lead4char[leadchar] = leadchar;
ef416fc2 1476 }
1477
1478 /*
e1d6a774 1479 * Save Legacy to Unicode mapping...
ef416fc2 1480 */
e1d6a774 1481
1482 if (legchar <= 0xffff)
ef416fc2 1483 {
ef416fc2 1484 /*
e1d6a774 1485 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
ef416fc2 1486 */
e1d6a774 1487
1488 crow = vmap->char2uni[(int)leadchar];
1489 if (!crow)
1490 {
1491 crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1492 if (!crow)
1493 goto vbcs_error;
1494
1495 vmap->char2uni[(int)leadchar] = crow;
1496 }
1497
1498 crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1499 }
1500 else
1501 {
1502 /*
1503 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1504 */
1505
1506 if (!wide)
1507 {
1508 wide = 1;
1509 vmap->widecount = (mapcount - i + 1);
1510 wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
1511 sizeof(_cups_wide2uni_t));
1512 if (!wide2uni)
1513 goto vbcs_error;
1514
1515 vmap->wide2uni = wide2uni;
1516 }
1517
1518 wide2uni->widechar = (cups_vbcs_t)legchar;
1519 wide2uni->unichar = (cups_ucs2_t)unichar;
1520 wide2uni ++;
ef416fc2 1521 }
1522
1523 /*
e1d6a774 1524 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1525 */
e1d6a774 1526
1527 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1528 if (!vrow)
ef416fc2 1529 {
e1d6a774 1530 vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1531 if (!vrow)
1532 goto vbcs_error;
1533
1534 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
ef416fc2 1535 }
e1d6a774 1536
1537 vrow += (int)(unichar & 0xff);
ef416fc2 1538
1539 /*
e1d6a774 1540 * Convert Replacement Character to visible replacement...
ef416fc2 1541 */
e1d6a774 1542
1543 if (unichar == 0xfffd)
1544 legchar = (unsigned long)'?';
ef416fc2 1545
1546 /*
e1d6a774 1547 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1548 */
e1d6a774 1549
1550 if (!*vrow)
1551 *vrow = (cups_vbcs_t)legchar;
ef416fc2 1552 }
e1d6a774 1553
1554 vmap->charcount = (i - vmap->widecount);
1555
1556 cupsFileClose(fp);
ef416fc2 1557
1558 /*
e1d6a774 1559 * Add it to the cache and return...
ef416fc2 1560 */
ef416fc2 1561
d6ae789d 1562 vmap->next = vmap_cache;
1563 vmap_cache = vmap;
e1d6a774 1564
1565 DEBUG_printf((" returning new vmap=%p\n", vmap));
1566
1567 return (vmap);
1568
1569 /*
1570 * If we get here, the file contains errors...
1571 */
1572
1573 vbcs_error:
1574
1575 free_vbcs_charmap(vmap);
1576
1577 cupsFileClose(fp);
1578
1579 DEBUG_puts(" Error, returning NULL!");
1580
1581 return (NULL);
ef416fc2 1582}
1583
1584
1585/*
d09495fa 1586 * End of "$Id: transcode.c 5838 2006-08-17 14:41:42Z mike $"
ef416fc2 1587 */