]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/transcode.c
Remove svn:keywords since they cause svn_load_dirs.pl to complain about every file.
[thirdparty/cups.git] / cups / transcode.c
CommitLineData
ef416fc2 1/*
c07d5b2d 2 * "$Id: transcode.c 177 2006-06-21 00:20:03Z jlovell $"
ef416fc2 3 *
4 * Transcoding support for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2006 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
13 * Products at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
fa73b229 26 * _cupsCharmapFlush() - Flush all character set maps out of cache.
e1d6a774 27 * _cupsCharmapFree() - Free a character set map.
28 * _cupsCharmapGet() - Get a character set map.
ef416fc2 29 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
e1d6a774 30 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
ef416fc2 31 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
32 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
e1d6a774 33 * compare_wide() - Compare key for wide (VBCS) match.
34 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
ef416fc2 35 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
36 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 37 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
e1d6a774 38 * free_sbcs_charmap() - Free memory used by a single byte character set.
39 * free_vbcs_charmap() - Free memory used by a variable byte character set.
d6ae789d 40 * get_charmap() - Lookup or get a character set map (private).
e1d6a774 41 * get_charmap_count() - Count lines in a charmap file.
42 * get_sbcs_charmap() - Get SBCS Charmap.
43 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
ef416fc2 44 */
45
46/*
47 * Include necessary headers...
48 */
49
50#include "globals.h"
e1d6a774 51#include "debug.h"
e53920b9 52#include <limits.h>
ef416fc2 53#include <stdlib.h>
54#include <errno.h>
55#include <time.h>
56
57
d6ae789d 58/*
59 * Local globals...
60 */
61
62#ifdef HAVE_PTHREAD_H
63static pthread_mutex_t map_mutex = PTHREAD_MUTEX_INITIALIZER;
64 /* Mutex to control access to maps */
65#endif /* HAVE_PTHREAD_H */
66static _cups_cmap_t *cmap_cache = NULL;
67 /* SBCS Charmap Cache */
68static _cups_vmap_t *vmap_cache = NULL;
69 /* VBCS Charmap Cache */
70
71
ef416fc2 72/*
e1d6a774 73 * Local functions...
ef416fc2 74 */
75
e1d6a774 76static int compare_wide(const void *k1, const void *k2);
77static int conv_sbcs_to_utf8(cups_utf8_t *dest,
78 const cups_sbcs_t *src,
79 int maxout,
80 const cups_encoding_t encoding);
81static int conv_utf8_to_sbcs(cups_sbcs_t *dest,
82 const cups_utf8_t *src,
83 int maxout,
84 const cups_encoding_t encoding);
85static int conv_utf8_to_vbcs(cups_sbcs_t *dest,
86 const cups_utf8_t *src,
87 int maxout,
88 const cups_encoding_t encoding);
89static int conv_vbcs_to_utf8(cups_utf8_t *dest,
90 const cups_sbcs_t *src,
91 int maxout,
92 const cups_encoding_t encoding);
93static void free_sbcs_charmap(_cups_cmap_t *sbcs);
94static void free_vbcs_charmap(_cups_vmap_t *vbcs);
d6ae789d 95static void *get_charmap(const cups_encoding_t encoding);
e1d6a774 96static int get_charmap_count(cups_file_t *fp);
97static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
98 const char *filename);
99static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
100 const char *filename);
101
ef416fc2 102
103/*
e1d6a774 104 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
ef416fc2 105 */
106
e1d6a774 107void
d6ae789d 108_cupsCharmapFlush(void)
ef416fc2 109{
e1d6a774 110 _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */
111 *cnext; /* Next Legacy SBCS Charset Map */
112 _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */
113 *vnext; /* Next Legacy VBCS Charset Map */
ef416fc2 114
115
d6ae789d 116#ifdef HAVE_PTHREAD_H
117 pthread_mutex_lock(&map_mutex);
118#endif /* HAVE_PTHREAD_H */
119
ef416fc2 120 /*
e1d6a774 121 * Loop through SBCS charset map cache, free all memory...
ef416fc2 122 */
123
d6ae789d 124 for (cmap = cmap_cache; cmap; cmap = cnext)
e1d6a774 125 {
126 cnext = cmap->next;
ef416fc2 127
e1d6a774 128 free_sbcs_charmap(cmap);
129 }
ef416fc2 130
d6ae789d 131 cmap_cache = NULL;
ef416fc2 132
133 /*
e1d6a774 134 * Loop through DBCS/VBCS charset map cache, free all memory...
ef416fc2 135 */
136
d6ae789d 137 for (vmap = vmap_cache; vmap; vmap = vnext)
e1d6a774 138 {
139 vnext = vmap->next;
140
141 free_vbcs_charmap(vmap);
142
143 free(vmap);
144 }
145
d6ae789d 146 vmap_cache = NULL;
147
148#ifdef HAVE_PTHREAD_H
149 pthread_mutex_unlock(&map_mutex);
150#endif /* HAVE_PTHREAD_H */
ef416fc2 151}
152
e1d6a774 153
ef416fc2 154/*
e1d6a774 155 * '_cupsCharmapFree()' - Free a character set map.
ef416fc2 156 *
e1d6a774 157 * This does not actually free; use '_cupsCharmapFlush()' for that.
ef416fc2 158 */
e1d6a774 159
ef416fc2 160void
e1d6a774 161_cupsCharmapFree(
162 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 163{
e1d6a774 164 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
165 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
e1d6a774 166
ef416fc2 167
168 /*
169 * See if we already have this SBCS charset map loaded...
170 */
e1d6a774 171
d6ae789d 172#ifdef HAVE_PTHREAD_H
173 pthread_mutex_lock(&map_mutex);
174#endif /* HAVE_PTHREAD_H */
175
176 for (cmap = cmap_cache; cmap; cmap = cmap->next)
ef416fc2 177 {
178 if (cmap->encoding == encoding)
179 {
180 if (cmap->used > 0)
181 cmap->used --;
d6ae789d 182 break;
ef416fc2 183 }
184 }
185
186 /*
187 * See if we already have this DBCS/VBCS charset map loaded...
188 */
e1d6a774 189
d6ae789d 190 for (vmap = vmap_cache; vmap; vmap = vmap->next)
ef416fc2 191 {
192 if (vmap->encoding == encoding)
193 {
194 if (vmap->used > 0)
195 vmap->used --;
d6ae789d 196 break;
ef416fc2 197 }
198 }
d6ae789d 199
200#ifdef HAVE_PTHREAD_H
201 pthread_mutex_unlock(&map_mutex);
202#endif /* HAVE_PTHREAD_H */
fa73b229 203}
204
205
206/*
e1d6a774 207 * '_cupsCharmapGet()' - Get a character set map.
208 *
209 * This code handles single-byte (SBCS), double-byte (DBCS), and
210 * variable-byte (VBCS) character sets _without_ charset escapes...
211 * This code does not handle multiple-byte character sets (MBCS)
212 * (such as ISO-2022-JP) with charset switching via escapes...
fa73b229 213 */
214
e1d6a774 215void * /* O - Charset map pointer */
216_cupsCharmapGet(
217 const cups_encoding_t encoding) /* I - Encoding */
fa73b229 218{
d6ae789d 219 void *charmap; /* Charset map pointer */
e1d6a774 220
fa73b229 221
e1d6a774 222 DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
ef416fc2 223
224 /*
e1d6a774 225 * Check for valid arguments...
ef416fc2 226 */
e1d6a774 227
228 if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
ef416fc2 229 {
e1d6a774 230 DEBUG_puts(" Bad encoding, returning NULL!");
231 return (NULL);
ef416fc2 232 }
ef416fc2 233
234 /*
d6ae789d 235 * Lookup or get the charset map pointer and return...
ef416fc2 236 */
e1d6a774 237
d6ae789d 238#ifdef HAVE_PTHREAD_H
239 pthread_mutex_lock(&map_mutex);
240#endif /* HAVE_PTHREAD_H */
e1d6a774 241
d6ae789d 242 charmap = get_charmap(encoding);
e1d6a774 243
d6ae789d 244#ifdef HAVE_PTHREAD_H
245 pthread_mutex_unlock(&map_mutex);
246#endif /* HAVE_PTHREAD_H */
e1d6a774 247
d6ae789d 248 return (charmap);
ef416fc2 249}
250
e1d6a774 251
ef416fc2 252/*
e1d6a774 253 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
ef416fc2 254 *
255 * This code handles single-byte (SBCS), double-byte (DBCS), and
256 * variable-byte (VBCS) character sets _without_ charset escapes...
257 * This code does not handle multiple-byte character sets (MBCS)
258 * (such as ISO-2022-JP) with charset switching via escapes...
259 */
e1d6a774 260
261int /* O - Count or -1 on error */
262cupsCharsetToUTF8(
263 cups_utf8_t *dest, /* O - Target string */
264 const char *src, /* I - Source string */
265 const int maxout, /* I - Max output */
266 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 267{
d6ae789d 268 int bytes; /* Number of bytes converted */
269
270
ef416fc2 271 /*
272 * Check for valid arguments...
273 */
274
e1d6a774 275 DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
276 dest, src, maxout, encoding));
277
278 if (dest)
279 *dest = '\0';
280
ef416fc2 281 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
e1d6a774 282 {
283 DEBUG_puts(" Bad arguments, returning -1");
ef416fc2 284 return (-1);
e1d6a774 285 }
ef416fc2 286
287 /*
288 * Handle identity conversions...
289 */
290
291 if (encoding == CUPS_UTF8 ||
292 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
293 {
e1d6a774 294 strlcpy((char *)dest, src, maxout);
295 return (strlen((char *)dest));
ef416fc2 296 }
297
298 /*
e1d6a774 299 * Convert input legacy charset to UTF-8...
ef416fc2 300 */
e1d6a774 301
d6ae789d 302#ifdef HAVE_PTHREAD_H
303 pthread_mutex_lock(&map_mutex);
304#endif /* HAVE_PTHREAD_H */
305
ef416fc2 306 if (encoding < CUPS_ENCODING_SBCS_END)
d6ae789d 307 bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
ef416fc2 308 else if (encoding < CUPS_ENCODING_VBCS_END)
d6ae789d 309 bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
ef416fc2 310 else
e1d6a774 311 {
d6ae789d 312 DEBUG_puts(" Bad encoding, returning -1");
313 bytes = -1;
e1d6a774 314 }
d6ae789d 315
316#ifdef HAVE_PTHREAD_H
317 pthread_mutex_unlock(&map_mutex);
318#endif /* HAVE_PTHREAD_H */
319
320 return (bytes);
ef416fc2 321}
322
e1d6a774 323
ef416fc2 324/*
e1d6a774 325 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
ef416fc2 326 *
327 * This code handles single-byte (SBCS), double-byte (DBCS), and
328 * variable-byte (VBCS) character sets _without_ charset escapes...
329 * This code does not handle multiple-byte character sets (MBCS)
330 * (such as ISO-2022-JP) with charset switching via escapes...
331 */
e1d6a774 332
333int /* O - Count or -1 on error */
334cupsUTF8ToCharset(
335 char *dest, /* O - Target string */
336 const cups_utf8_t *src, /* I - Source string */
337 const int maxout, /* I - Max output */
338 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 339{
d6ae789d 340 int bytes; /* Number of bytes converted */
341
342
ef416fc2 343 /*
344 * Check for valid arguments...
345 */
346
347 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
e1d6a774 348 {
349 if (dest)
350 *dest = '\0';
351
ef416fc2 352 return (-1);
e1d6a774 353 }
ef416fc2 354
355 /*
356 * Handle identity conversions...
357 */
358
359 if (encoding == CUPS_UTF8 ||
360 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
361 {
e1d6a774 362 strlcpy(dest, (char *)src, maxout);
363 return (strlen(dest));
ef416fc2 364 }
365
366 /*
e1d6a774 367 * Convert input UTF-8 to legacy charset...
ef416fc2 368 */
e1d6a774 369
d6ae789d 370#ifdef HAVE_PTHREAD_H
371 pthread_mutex_lock(&map_mutex);
372#endif /* HAVE_PTHREAD_H */
373
ef416fc2 374 if (encoding < CUPS_ENCODING_SBCS_END)
d6ae789d 375 bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
ef416fc2 376 else if (encoding < CUPS_ENCODING_VBCS_END)
d6ae789d 377 bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
ef416fc2 378 else
d6ae789d 379 bytes = -1;
380
381#ifdef HAVE_PTHREAD_H
382 pthread_mutex_unlock(&map_mutex);
383#endif /* HAVE_PTHREAD_H */
384
385 return (bytes);
ef416fc2 386}
387
ef416fc2 388
389/*
390 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
391 *
392 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
393 *
394 * UTF-32 char UTF-8 char(s)
395 * --------------------------------------------------
e1d6a774 396 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 397 * 128 to 2047 = 110xxxxx 10yyyyyy
398 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 399 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 400 *
401 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
402 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 403 */
e1d6a774 404
405int /* O - Count or -1 on error */
406cupsUTF8ToUTF32(
407 cups_utf32_t *dest, /* O - Target string */
408 const cups_utf8_t *src, /* I - Source string */
409 const int maxout) /* I - Max output */
ef416fc2 410{
e1d6a774 411 size_t srclen; /* Source string length */
412 int i; /* Looping variable */
413 cups_utf8_t ch; /* Character value */
414 cups_utf8_t next; /* Next character value */
415 cups_utf32_t ch32; /* UTF-32 character value */
416
ef416fc2 417
418 /*
419 * Check for valid arguments and clear output...
420 */
e1d6a774 421
422 if (dest)
423 *dest = 0;
424
425 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
ef416fc2 426 return (-1);
ef416fc2 427
428 /*
429 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
430 */
e1d6a774 431
432 *dest++ = 0xfeff;
433 srclen = strlen((char *)src);
434
435 for (i = maxout - 1; *src && i > 0; i --)
ef416fc2 436 {
e1d6a774 437 ch = *src++;
ef416fc2 438
439 /*
440 * Convert UTF-8 character(s) to UTF-32 character...
441 */
e1d6a774 442
443 if (!(ch & 0x80))
ef416fc2 444 {
445 /*
446 * One-octet UTF-8 <= 127 (US-ASCII)...
447 */
e1d6a774 448
449 *dest++ = ch;
ef416fc2 450 }
451 else if ((ch & 0xe0) == 0xc0)
452 {
453 /*
454 * Two-octet UTF-8 <= 2047 (Latin-x)...
455 */
e1d6a774 456
457 next = *src++;
458 if (!next)
ef416fc2 459 return (-1);
e1d6a774 460
ef416fc2 461 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
462
463 /*
464 * Check for non-shortest form (invalid UTF-8)...
465 */
e1d6a774 466
467 if (ch32 < 0x80)
ef416fc2 468 return (-1);
e1d6a774 469
470 *dest++ = ch32;
ef416fc2 471 }
472 else if ((ch & 0xf0) == 0xe0)
473 {
474 /*
475 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
476 */
e1d6a774 477
478 next = *src++;
479 if (!next)
ef416fc2 480 return (-1);
e1d6a774 481
482 ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
483
484 next = *src++;
485 if (!next)
ef416fc2 486 return (-1);
e1d6a774 487
488 ch32 = (ch32 << 6) | (next & 0x3f);
ef416fc2 489
490 /*
491 * Check for non-shortest form (invalid UTF-8)...
492 */
e1d6a774 493
494 if (ch32 < 0x800)
ef416fc2 495 return (-1);
e1d6a774 496
497 *dest++ = ch32;
ef416fc2 498 }
499 else if ((ch & 0xf8) == 0xf0)
500 {
501 /*
e1d6a774 502 * Four-octet UTF-8...
ef416fc2 503 */
e1d6a774 504
505 next = *src++;
506 if (!next)
ef416fc2 507 return (-1);
e1d6a774 508
509 ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
510
511 next = *src++;
512 if (!next)
513 return (-1);
514
515 ch32 = (ch32 << 6) | (next & 0x3f);
516
517 next = *src++;
518 if (!next)
519 return (-1);
520
521 ch32 = (ch32 << 6) | (next & 0x3f);
522
ef416fc2 523 /*
e1d6a774 524 * Check for non-shortest form (invalid UTF-8)...
ef416fc2 525 */
e1d6a774 526
527 if (ch32 < 0x10000)
528 return (-1);
529
530 *dest++ = ch32;
ef416fc2 531 }
532 else
533 {
534 /*
e1d6a774 535 * More than 4-octet (invalid UTF-8 sequence)...
ef416fc2 536 */
e1d6a774 537
ef416fc2 538 return (-1);
539 }
540
541 /*
542 * Check for UTF-16 surrogate (illegal UTF-8)...
543 */
ef416fc2 544
e1d6a774 545 if (*dest >= 0xd800 && *dest <= 0xdfff)
ef416fc2 546 return (-1);
547 }
e1d6a774 548
ef416fc2 549 *dest = 0;
e1d6a774 550
ef416fc2 551 return (i);
552}
553
e1d6a774 554
ef416fc2 555/*
556 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
557 *
558 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
559 *
560 * UTF-32 char UTF-8 char(s)
561 * --------------------------------------------------
e1d6a774 562 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 563 * 128 to 2047 = 110xxxxx 10yyyyyy
564 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 565 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 566 *
567 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
568 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 569 */
e1d6a774 570
571int /* O - Count or -1 on error */
572cupsUTF32ToUTF8(
573 cups_utf8_t *dest, /* O - Target string */
574 const cups_utf32_t *src, /* I - Source string */
575 const int maxout) /* I - Max output */
ef416fc2 576{
e1d6a774 577 cups_utf8_t *start; /* Start of destination string */
578 int i; /* Looping variable */
579 int swap; /* Byte-swap input to output */
580 cups_utf32_t ch; /* Character value */
581
ef416fc2 582
583 /*
584 * Check for valid arguments and clear output...
585 */
e1d6a774 586
587 if (dest)
588 *dest = '\0';
589
590 if (!dest || !src || maxout < 1)
ef416fc2 591 return (-1);
ef416fc2 592
593 /*
594 * Check for leading BOM in UTF-32 and inverted BOM...
595 */
e1d6a774 596
597 start = dest;
598 swap = *src == 0xfffe0000;
599
600 if (*src == 0xfffe0000 || *src == 0xfeff)
601 src ++;
ef416fc2 602
603 /*
604 * Convert input UTF-32 to output UTF-8...
605 */
e1d6a774 606
607 for (i = maxout - 1; *src && i > 0;)
ef416fc2 608 {
e1d6a774 609 ch = *src++;
ef416fc2 610
611 /*
612 * Byte swap input UTF-32, if necessary...
e1d6a774 613 * (only byte-swapping 24 of 32 bits)
ef416fc2 614 */
e1d6a774 615
ef416fc2 616 if (swap)
617 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
618
619 /*
e1d6a774 620 * Check for beyond Plane 16 (invalid UTF-32)...
ef416fc2 621 */
ef416fc2 622
ef416fc2 623 if (ch > 0x10ffff)
624 return (-1);
625
ef416fc2 626 /*
627 * Convert UTF-32 character to UTF-8 character(s)...
628 */
e1d6a774 629
630 if (ch < 0x80)
ef416fc2 631 {
632 /*
633 * One-octet UTF-8 <= 127 (US-ASCII)...
634 */
e1d6a774 635
636 *dest++ = (cups_utf8_t)ch;
637 i --;
ef416fc2 638 }
e1d6a774 639 else if (ch < 0x800)
ef416fc2 640 {
641 /*
642 * Two-octet UTF-8 <= 2047 (Latin-x)...
643 */
e1d6a774 644
645 if (i < 2)
646 return (-1);
647
648 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
649 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
650 i -= 2;
ef416fc2 651 }
e1d6a774 652 else if (ch < 0x10000)
ef416fc2 653 {
654 /*
655 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
656 */
e1d6a774 657
658 if (i < 3)
659 return (-1);
660
661 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
662 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
663 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
664 i -= 3;
665 }
666 else
667 {
668 /*
669 * Four-octet UTF-8...
670 */
671
672 if (i < 4)
673 return (-1);
674
675 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
676 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
677 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
678 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
679 i -= 4;
ef416fc2 680 }
681 }
e1d6a774 682
ef416fc2 683 *dest = '\0';
e1d6a774 684
685 return ((int)(dest - start));
ef416fc2 686}
687
e1d6a774 688
ef416fc2 689/*
e1d6a774 690 * 'compare_wide()' - Compare key for wide (VBCS) match.
691 */
692
693static int
694compare_wide(const void *k1, /* I - Key char */
695 const void *k2) /* I - Map char */
696{
697 cups_vbcs_t key; /* Legacy key character */
698 cups_vbcs_t map; /* Legacy map character */
699
700
701 key = *((cups_vbcs_t *)k1);
702 map = ((_cups_wide2uni_t *)k2)->widechar;
703
704 return ((int)(key - map));
705}
706
707
708/*
709 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
ef416fc2 710 */
e1d6a774 711
712static int /* O - Count or -1 on error */
713conv_sbcs_to_utf8(
714 cups_utf8_t *dest, /* O - Target string */
715 const cups_sbcs_t *src, /* I - Source string */
716 int maxout, /* I - Max output */
717 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 718{
e1d6a774 719 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
720 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
721 cups_sbcs_t legchar; /* Legacy character value */
722 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
723 *workptr; /* Pointer into string */
724
ef416fc2 725
726 /*
e1d6a774 727 * Find legacy charset map in cache...
ef416fc2 728 */
e1d6a774 729
d6ae789d 730 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
ef416fc2 731 return (-1);
ef416fc2 732
733 /*
e1d6a774 734 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 735 */
ef416fc2 736
e1d6a774 737 work[0] = 0xfeff;
738 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 739 {
e1d6a774 740 legchar = *src++;
ef416fc2 741
742 /*
e1d6a774 743 * Convert ASCII verbatim (optimization)...
ef416fc2 744 */
ef416fc2 745
e1d6a774 746 if (legchar < 0x80)
747 *workptr++ = (cups_utf32_t)legchar;
748 else
ef416fc2 749 {
e1d6a774 750 /*
751 * Convert unknown character to Replacement Character...
752 */
ef416fc2 753
e1d6a774 754 crow = cmap->char2uni + legchar;
755
756 if (!*crow)
757 *workptr++ = 0xfffd;
758 else
759 *workptr++ = (cups_utf32_t)*crow;
ef416fc2 760 }
ef416fc2 761 }
e1d6a774 762
763 *workptr = 0;
764
765 /*
766 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
767 */
768
d6ae789d 769 cmap->used --;
e1d6a774 770
771 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 772}
773
e1d6a774 774
ef416fc2 775/*
e1d6a774 776 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
ef416fc2 777 */
e1d6a774 778
779static int /* O - Count or -1 on error */
780conv_utf8_to_sbcs(
781 cups_sbcs_t *dest, /* O - Target string */
782 const cups_utf8_t *src, /* I - Source string */
783 int maxout, /* I - Max output */
784 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 785{
e1d6a774 786 cups_sbcs_t *start; /* Start of destination string */
787 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
788 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
789 cups_utf32_t unichar; /* Character value */
790 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
791 *workptr; /* Pointer into string */
792
ef416fc2 793
794 /*
e1d6a774 795 * Find legacy charset map in cache...
ef416fc2 796 */
e1d6a774 797
d6ae789d 798 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
ef416fc2 799 return (-1);
ef416fc2 800
801 /*
e1d6a774 802 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 803 */
e1d6a774 804
805 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
806 return (-1);
ef416fc2 807
808 /*
e1d6a774 809 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
ef416fc2 810 */
e1d6a774 811
812 for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
ef416fc2 813 {
e1d6a774 814 unichar = *workptr++;
815 if (!unichar)
ef416fc2 816 break;
ef416fc2 817
818 /*
e1d6a774 819 * Convert ASCII verbatim (optimization)...
ef416fc2 820 */
ef416fc2 821
e1d6a774 822 if (unichar < 0x80)
823 {
824 *dest++ = (cups_sbcs_t)unichar;
825 continue;
826 }
ef416fc2 827
828 /*
e1d6a774 829 * Convert unknown character to visible replacement...
ef416fc2 830 */
ef416fc2 831
e1d6a774 832 srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
ef416fc2 833
e1d6a774 834 if (srow)
835 srow += (int)(unichar & 0xff);
ef416fc2 836
e1d6a774 837 if (!srow || !*srow)
838 *dest++ = '?';
839 else
840 *dest++ = *srow;
ef416fc2 841 }
ef416fc2 842
e1d6a774 843 *dest = '\0';
844
d6ae789d 845 cmap->used --;
e1d6a774 846
847 return ((int)(dest - start));
ef416fc2 848}
849
e1d6a774 850
ef416fc2 851/*
e1d6a774 852 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 853 */
e1d6a774 854
855static int /* O - Count or -1 on error */
856conv_utf8_to_vbcs(
857 cups_sbcs_t *dest, /* O - Target string */
858 const cups_utf8_t *src, /* I - Source string */
859 int maxout, /* I - Max output */
860 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 861{
e1d6a774 862 cups_sbcs_t *start; /* Start of destination string */
863 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
864 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
865 cups_utf32_t unichar; /* Character value */
866 cups_vbcs_t legchar; /* Legacy character value */
867 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
868 *workptr; /* Pointer into string */
ef416fc2 869
ef416fc2 870
871 /*
e1d6a774 872 * Find legacy charset map in cache...
ef416fc2 873 */
ef416fc2 874
d6ae789d 875 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
e1d6a774 876 return (-1);
ef416fc2 877
878 /*
e1d6a774 879 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 880 */
e1d6a774 881
882 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
883 return (-1);
ef416fc2 884
885 /*
e1d6a774 886 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
ef416fc2 887 */
e1d6a774 888
889 for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
ef416fc2 890 {
e1d6a774 891 unichar = *workptr++;
892 if (!unichar)
ef416fc2 893 break;
ef416fc2 894
895 /*
e1d6a774 896 * Convert ASCII verbatim (optimization)...
ef416fc2 897 */
e1d6a774 898
899 if (unichar < 0x80)
900 {
901 *dest++ = (cups_vbcs_t)unichar;
902 continue;
903 }
ef416fc2 904
905 /*
e1d6a774 906 * Convert unknown character to visible replacement...
ef416fc2 907 */
e1d6a774 908
909 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
910
911 if (vrow)
912 vrow += (int)(unichar & 0xff);
913
914 if (!vrow || !*vrow)
915 legchar = (cups_vbcs_t)'?';
916 else
917 legchar = (cups_vbcs_t)*vrow;
ef416fc2 918
919 /*
e1d6a774 920 * Save n-byte legacy character...
ef416fc2 921 */
e1d6a774 922
923 if (legchar > 0xffffff)
ef416fc2 924 {
e1d6a774 925 if (maxout < 5)
926 return (-1);
927
928 *dest++ = (cups_sbcs_t)(legchar >> 24);
929 *dest++ = (cups_sbcs_t)(legchar >> 16);
930 *dest++ = (cups_sbcs_t)(legchar >> 8);
931 *dest++ = (cups_sbcs_t)legchar;
932
933 maxout -= 3;
ef416fc2 934 }
e1d6a774 935 else if (legchar > 0xffff)
936 {
937 if (maxout < 4)
938 return (-1);
ef416fc2 939
e1d6a774 940 *dest++ = (cups_sbcs_t)(legchar >> 16);
941 *dest++ = (cups_sbcs_t)(legchar >> 8);
942 *dest++ = (cups_sbcs_t)legchar;
ef416fc2 943
e1d6a774 944 maxout -= 2;
945 }
946 else if (legchar > 0xff)
947 {
948 *dest++ = (cups_sbcs_t)(legchar >> 8);
949 *dest++ = (cups_sbcs_t)legchar;
950
951 maxout --;
952 }
ef416fc2 953 }
e1d6a774 954
955 *dest = '\0';
956
d6ae789d 957 vmap->used --;
e1d6a774 958
959 return ((int)(dest - start));
ef416fc2 960}
961
e1d6a774 962
ef416fc2 963/*
e1d6a774 964 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
ef416fc2 965 */
e1d6a774 966
967static int /* O - Count or -1 on error */
968conv_vbcs_to_utf8(
969 cups_utf8_t *dest, /* O - Target string */
970 const cups_sbcs_t *src, /* I - Source string */
971 int maxout, /* I - Max output */
972 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 973{
e1d6a774 974 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
975 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
976 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
977 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
978 cups_vbcs_t legchar; /* Legacy character value */
979 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
980 *workptr; /* Pointer into string */
ef416fc2 981
ef416fc2 982
983 /*
e1d6a774 984 * Find legacy charset map in cache...
ef416fc2 985 */
ef416fc2 986
d6ae789d 987 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
e1d6a774 988 return (-1);
ef416fc2 989
990 /*
e1d6a774 991 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 992 */
ef416fc2 993
e1d6a774 994 work[0] = 0xfeff;
995 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 996 {
e1d6a774 997 legchar = *src++;
998 leadchar = (cups_sbcs_t)legchar;
ef416fc2 999
1000 /*
e1d6a774 1001 * Convert ASCII verbatim (optimization)...
ef416fc2 1002 */
ef416fc2 1003
e1d6a774 1004 if (legchar < 0x80)
ef416fc2 1005 {
e1d6a774 1006 *workptr++ = (cups_utf32_t)legchar;
1007 continue;
ef416fc2 1008 }
1009
1010 /*
e1d6a774 1011 * Convert 2-byte legacy character...
ef416fc2 1012 */
e1d6a774 1013
1014 if (vmap->lead2char[(int)leadchar] == leadchar)
ef416fc2 1015 {
e1d6a774 1016 if (!*src)
1017 return (-1);
1018
1019 legchar = (legchar << 8) | *src++;
1020
ef416fc2 1021 /*
e1d6a774 1022 * Convert unknown character to Replacement Character...
ef416fc2 1023 */
e1d6a774 1024
1025 crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1026 if (crow)
1027 crow += (int) (legchar & 0xff);
1028
1029 if (!crow || !*crow)
1030 *workptr++ = 0xfffd;
1031 else
1032 *workptr++ = (cups_utf32_t)*crow;
1033 continue;
ef416fc2 1034 }
1035
1036 /*
e1d6a774 1037 * Fetch 3-byte or 4-byte legacy character...
ef416fc2 1038 */
e1d6a774 1039
1040 if (vmap->lead3char[(int)leadchar] == leadchar)
ef416fc2 1041 {
e1d6a774 1042 if (!*src || !src[1])
1043 return (-1);
1044
1045 legchar = (legchar << 8) | *src++;
1046 legchar = (legchar << 8) | *src++;
ef416fc2 1047 }
e1d6a774 1048 else if (vmap->lead4char[(int)leadchar] == leadchar)
1049 {
1050 if (!*src || !src[1] || !src[2])
1051 return (-1);
1052
1053 legchar = (legchar << 8) | *src++;
1054 legchar = (legchar << 8) | *src++;
1055 legchar = (legchar << 8) | *src++;
1056 }
1057 else
1058 return (-1);
ef416fc2 1059
1060 /*
e1d6a774 1061 * Find 3-byte or 4-byte legacy character...
ef416fc2 1062 */
e1d6a774 1063
1064 wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1065 vmap->wide2uni,
1066 vmap->widecount,
1067 sizeof(_cups_wide2uni_t),
1068 compare_wide);
ef416fc2 1069
1070 /*
e1d6a774 1071 * Convert unknown character to Replacement Character...
ef416fc2 1072 */
e1d6a774 1073
1074 if (!wide2uni || !wide2uni->unichar)
1075 *workptr++ = 0xfffd;
1076 else
1077 *workptr++ = wide2uni->unichar;
ef416fc2 1078 }
e1d6a774 1079
1080 *workptr = 0;
1081
d6ae789d 1082 vmap->used --;
e1d6a774 1083
1084 /*
1085 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1086 */
1087
1088 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 1089}
1090
e1d6a774 1091
ef416fc2 1092/*
e1d6a774 1093 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
ef416fc2 1094 */
e1d6a774 1095
1096static void
1097free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
ef416fc2 1098{
e1d6a774 1099 int i; /* Looping variable */
ef416fc2 1100
ef416fc2 1101
e1d6a774 1102 for (i = 0; i < 256; i ++)
1103 if (cmap->uni2char[i])
1104 free(cmap->uni2char[i]);
1105
1106 free(cmap);
1107}
1108
1109
1110/*
1111 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1112 */
1113
1114static void
1115free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
1116{
1117 int i; /* Looping variable */
1118
1119
1120 for (i = 0; i < 256; i ++)
1121 if (vmap->char2uni[i])
1122 free(vmap->char2uni[i]);
1123
1124 for (i = 0; i < 256; i ++)
1125 if (vmap->uni2char[i])
1126 free(vmap->uni2char[i]);
1127
1128 if (vmap->wide2uni)
1129 free(vmap->wide2uni);
1130
1131 free(vmap);
1132}
1133
1134
d6ae789d 1135/*
1136 * 'get_charmap()' - Lookup or get a character set map (private).
1137 *
1138 * This code handles single-byte (SBCS), double-byte (DBCS), and
1139 * variable-byte (VBCS) character sets _without_ charset escapes...
1140 * This code does not handle multiple-byte character sets (MBCS)
1141 * (such as ISO-2022-JP) with charset switching via escapes...
1142 */
1143
1144
1145void * /* O - Charset map pointer */
1146get_charmap(
1147 const cups_encoding_t encoding) /* I - Encoding */
1148{
1149 char filename[1024]; /* Filename for charset map file */
1150 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1151
1152
1153 /*
1154 * Get the data directory and charset map name...
1155 */
1156
1157 snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1158 cg->cups_datadir, _cupsEncodingName(encoding));
1159
1160 DEBUG_printf((" filename=\"%s\"\n", filename));
1161
1162 /*
1163 * Read charset map input file into cache...
1164 */
1165
1166 if (encoding < CUPS_ENCODING_SBCS_END)
1167 return (get_sbcs_charmap(encoding, filename));
1168 else if (encoding < CUPS_ENCODING_VBCS_END)
1169 return (get_vbcs_charmap(encoding, filename));
1170 else
1171 return (NULL);
1172}
1173
1174
e1d6a774 1175/*
1176 * 'get_charmap_count()' - Count lines in a charmap file.
1177 */
1178
1179static int /* O - Count or -1 on error */
1180get_charmap_count(cups_file_t *fp) /* I - File to read from */
1181{
1182 int count; /* Number of lines */
1183 char line[256]; /* Line from input map file */
ef416fc2 1184
ef416fc2 1185
1186 /*
e1d6a774 1187 * Count lines in map input file...
ef416fc2 1188 */
ef416fc2 1189
e1d6a774 1190 count = 0;
ef416fc2 1191
e1d6a774 1192 while (cupsFileGets(fp, line, sizeof(line)))
1193 if (line[0] == '0')
1194 count ++;
ef416fc2 1195
e1d6a774 1196 /*
1197 * Return the number of lines...
1198 */
1199
1200 if (count > 0)
1201 return (count);
1202 else
1203 return (-1);
ef416fc2 1204}
1205
e1d6a774 1206
ef416fc2 1207/*
e1d6a774 1208 * 'get_sbcs_charmap()' - Get SBCS Charmap.
ef416fc2 1209 */
e1d6a774 1210
1211static _cups_cmap_t * /* O - Charmap or 0 on error */
1212get_sbcs_charmap(
1213 const cups_encoding_t encoding, /* I - Charmap Encoding */
1214 const char *filename) /* I - Charmap Filename */
ef416fc2 1215{
e1d6a774 1216 unsigned long legchar; /* Legacy character value */
1217 cups_utf32_t unichar; /* Unicode character value */
1218 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1219 cups_file_t *fp; /* Charset map file pointer */
1220 char *s; /* Line parsing pointer */
1221 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1222 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1223 char line[256]; /* Line from charset map file */
e1d6a774 1224
ef416fc2 1225
1226 /*
e1d6a774 1227 * See if we already have this SBCS charset map loaded...
ef416fc2 1228 */
e1d6a774 1229
d6ae789d 1230 for (cmap = cmap_cache; cmap; cmap = cmap->next)
e1d6a774 1231 {
1232 if (cmap->encoding == encoding)
1233 {
1234 cmap->used ++;
1235 DEBUG_printf((" returning existing cmap=%p\n", cmap));
d6ae789d 1236
e1d6a774 1237 return ((void *)cmap);
1238 }
1239 }
ef416fc2 1240
1241 /*
e1d6a774 1242 * Open SBCS charset map input file...
ef416fc2 1243 */
e1d6a774 1244
1245 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1246 return (NULL);
ef416fc2 1247
1248 /*
e1d6a774 1249 * Allocate memory for SBCS charset map...
ef416fc2 1250 */
e1d6a774 1251
1252 if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1253 {
1254 cupsFileClose(fp);
1255 DEBUG_puts(" Unable to allocate memory!");
d6ae789d 1256
e1d6a774 1257 return (NULL);
1258 }
1259
1260 cmap->used ++;
1261 cmap->encoding = encoding;
ef416fc2 1262
1263 /*
e1d6a774 1264 * Save SBCS charset map into memory for transcoding...
ef416fc2 1265 */
e1d6a774 1266
1267 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1268 {
e1d6a774 1269 if (line[0] != '0')
1270 continue;
1271
1272 legchar = strtol(line, &s, 16);
1273 if (legchar < 0 || legchar > 0xff)
1274 goto sbcs_error;
1275
1276 unichar = strtol(s, NULL, 16);
1277 if (unichar < 0 || unichar > 0xffff)
1278 goto sbcs_error;
ef416fc2 1279
1280 /*
e1d6a774 1281 * Save legacy to Unicode mapping in direct lookup table...
ef416fc2 1282 */
e1d6a774 1283
1284 crow = cmap->char2uni + legchar;
1285 *crow = (cups_ucs2_t)(unichar & 0xffff);
ef416fc2 1286
1287 /*
e1d6a774 1288 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1289 */
e1d6a774 1290
1291 srow = cmap->uni2char[(unichar >> 8) & 0xff];
1292 if (!srow)
ef416fc2 1293 {
e1d6a774 1294 srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1295 if (!srow)
1296 goto sbcs_error;
1297
1298 cmap->uni2char[(unichar >> 8) & 0xff] = srow;
ef416fc2 1299 }
1300
e1d6a774 1301 srow += unichar & 0xff;
1302
ef416fc2 1303 /*
e1d6a774 1304 * Convert Replacement Character to visible replacement...
ef416fc2 1305 */
e1d6a774 1306
1307 if (unichar == 0xfffd)
1308 legchar = (unsigned long)'?';
ef416fc2 1309
1310 /*
e1d6a774 1311 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1312 */
ef416fc2 1313
e1d6a774 1314 if (!*srow)
1315 *srow = (cups_sbcs_t)legchar;
1316 }
ef416fc2 1317
e1d6a774 1318 cupsFileClose(fp);
1319
ef416fc2 1320 /*
e1d6a774 1321 * Add it to the cache and return...
ef416fc2 1322 */
e1d6a774 1323
d6ae789d 1324 cmap->next = cmap_cache;
1325 cmap_cache = cmap;
e1d6a774 1326
1327 DEBUG_printf((" returning new cmap=%p\n", cmap));
1328
1329 return (cmap);
ef416fc2 1330
1331 /*
e1d6a774 1332 * If we get here, there was an error in the cmap file...
ef416fc2 1333 */
e1d6a774 1334
1335 sbcs_error:
1336
1337 free_sbcs_charmap(cmap);
1338
1339 cupsFileClose(fp);
1340
1341 DEBUG_puts(" Error, returning NULL!");
1342
1343 return (NULL);
1344}
1345
1346
1347/*
1348 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1349 */
1350
1351static _cups_vmap_t * /* O - Charmap or 0 on error */
1352get_vbcs_charmap(
1353 const cups_encoding_t encoding, /* I - Charmap Encoding */
1354 const char *filename) /* I - Charmap Filename */
1355{
1356 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1357 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1358 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1359 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1360 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1361 unsigned long legchar; /* Legacy character value */
1362 cups_utf32_t unichar; /* Unicode character value */
1363 int mapcount; /* Count of lines in charmap file */
1364 cups_file_t *fp; /* Charset map file pointer */
1365 char *s; /* Line parsing pointer */
1366 char line[256]; /* Line from charset map file */
1367 int i; /* Loop variable */
1368 int wide; /* 32-bit legacy char */
e1d6a774 1369
1370
1371 DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1372 encoding, filename));
ef416fc2 1373
1374 /*
e1d6a774 1375 * See if we already have this DBCS/VBCS charset map loaded...
ef416fc2 1376 */
ef416fc2 1377
d6ae789d 1378 for (vmap = vmap_cache; vmap; vmap = vmap->next)
e1d6a774 1379 {
1380 if (vmap->encoding == encoding)
ef416fc2 1381 {
e1d6a774 1382 vmap->used ++;
1383 DEBUG_printf((" returning existing vmap=%p\n", vmap));
d6ae789d 1384
e1d6a774 1385 return ((void *)vmap);
ef416fc2 1386 }
ef416fc2 1387 }
ef416fc2 1388
1389 /*
e1d6a774 1390 * Open VBCS charset map input file...
ef416fc2 1391 */
ef416fc2 1392
e1d6a774 1393 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1394 {
1395 DEBUG_printf((" Unable to open file: %s\n", strerror(errno)));
d6ae789d 1396
e1d6a774 1397 return (NULL);
1398 }
ef416fc2 1399
1400 /*
e1d6a774 1401 * Count lines in charmap file...
ef416fc2 1402 */
e1d6a774 1403
1404 if ((mapcount = get_charmap_count(fp)) <= 0)
1405 {
1406 DEBUG_puts(" Unable to get charmap count!");
d6ae789d 1407
e1d6a774 1408 return (NULL);
1409 }
1410
1411 DEBUG_printf((" mapcount=%d\n", mapcount));
ef416fc2 1412
1413 /*
e1d6a774 1414 * Allocate memory for DBCS/VBCS charset map...
ef416fc2 1415 */
e1d6a774 1416
1417 if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1418 {
1419 cupsFileClose(fp);
1420 DEBUG_puts(" Unable to allocate memory!");
d6ae789d 1421
e1d6a774 1422 return (NULL);
1423 }
1424
1425 vmap->used ++;
1426 vmap->encoding = encoding;
ef416fc2 1427
1428 /*
e1d6a774 1429 * Save DBCS/VBCS charset map into memory for transcoding...
ef416fc2 1430 */
e1d6a774 1431
1432 leadchar = 0;
1433 wide2uni = NULL;
1434
1435 cupsFileRewind(fp);
1436
1437 i = 0;
1438 wide = 0;
1439
1440 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1441 {
e1d6a774 1442 if (line[0] != '0')
1443 continue;
1444
1445 legchar = strtoul(line, &s, 16);
1446 if (legchar == ULONG_MAX)
1447 goto vbcs_error;
1448
1449 unichar = strtol(s, NULL, 16);
1450 if (unichar < 0 || unichar > 0xffff)
1451 goto vbcs_error;
1452
1453 i ++;
1454
1455/* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1456 legchar, (unsigned)unichar)); */
ef416fc2 1457
1458 /*
e1d6a774 1459 * Save lead char of 2/3/4-byte legacy char...
ef416fc2 1460 */
e1d6a774 1461
1462 if (legchar > 0xff && legchar <= 0xffff)
ef416fc2 1463 {
e1d6a774 1464 leadchar = (cups_sbcs_t)(legchar >> 8);
1465 vmap->lead2char[leadchar] = leadchar;
1466 }
1467
1468 if (legchar > 0xffff && legchar <= 0xffffff)
1469 {
1470 leadchar = (cups_sbcs_t)(legchar >> 16);
1471 vmap->lead3char[leadchar] = leadchar;
1472 }
1473
1474 if (legchar > 0xffffff)
1475 {
1476 leadchar = (cups_sbcs_t)(legchar >> 24);
1477 vmap->lead4char[leadchar] = leadchar;
ef416fc2 1478 }
1479
1480 /*
e1d6a774 1481 * Save Legacy to Unicode mapping...
ef416fc2 1482 */
e1d6a774 1483
1484 if (legchar <= 0xffff)
ef416fc2 1485 {
ef416fc2 1486 /*
e1d6a774 1487 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
ef416fc2 1488 */
e1d6a774 1489
1490 crow = vmap->char2uni[(int)leadchar];
1491 if (!crow)
1492 {
1493 crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1494 if (!crow)
1495 goto vbcs_error;
1496
1497 vmap->char2uni[(int)leadchar] = crow;
1498 }
1499
1500 crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1501 }
1502 else
1503 {
1504 /*
1505 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1506 */
1507
1508 if (!wide)
1509 {
1510 wide = 1;
1511 vmap->widecount = (mapcount - i + 1);
1512 wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
1513 sizeof(_cups_wide2uni_t));
1514 if (!wide2uni)
1515 goto vbcs_error;
1516
1517 vmap->wide2uni = wide2uni;
1518 }
1519
1520 wide2uni->widechar = (cups_vbcs_t)legchar;
1521 wide2uni->unichar = (cups_ucs2_t)unichar;
1522 wide2uni ++;
ef416fc2 1523 }
1524
1525 /*
e1d6a774 1526 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1527 */
e1d6a774 1528
1529 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1530 if (!vrow)
ef416fc2 1531 {
e1d6a774 1532 vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1533 if (!vrow)
1534 goto vbcs_error;
1535
1536 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
ef416fc2 1537 }
e1d6a774 1538
1539 vrow += (int)(unichar & 0xff);
ef416fc2 1540
1541 /*
e1d6a774 1542 * Convert Replacement Character to visible replacement...
ef416fc2 1543 */
e1d6a774 1544
1545 if (unichar == 0xfffd)
1546 legchar = (unsigned long)'?';
ef416fc2 1547
1548 /*
e1d6a774 1549 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1550 */
e1d6a774 1551
1552 if (!*vrow)
1553 *vrow = (cups_vbcs_t)legchar;
ef416fc2 1554 }
e1d6a774 1555
1556 vmap->charcount = (i - vmap->widecount);
1557
1558 cupsFileClose(fp);
ef416fc2 1559
1560 /*
e1d6a774 1561 * Add it to the cache and return...
ef416fc2 1562 */
ef416fc2 1563
d6ae789d 1564 vmap->next = vmap_cache;
1565 vmap_cache = vmap;
e1d6a774 1566
1567 DEBUG_printf((" returning new vmap=%p\n", vmap));
1568
1569 return (vmap);
1570
1571 /*
1572 * If we get here, the file contains errors...
1573 */
1574
1575 vbcs_error:
1576
1577 free_vbcs_charmap(vmap);
1578
1579 cupsFileClose(fp);
1580
1581 DEBUG_puts(" Error, returning NULL!");
1582
1583 return (NULL);
ef416fc2 1584}
1585
1586
1587/*
c07d5b2d 1588 * End of "$Id: transcode.c 177 2006-06-21 00:20:03Z jlovell $"
ef416fc2 1589 */