]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/transcode.c
Load cups into easysw/current.
[thirdparty/cups.git] / cups / transcode.c
CommitLineData
ef416fc2 1/*
e1d6a774 2 * "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $"
ef416fc2 3 *
4 * Transcoding support for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2006 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
13 * Products at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
fa73b229 26 * _cupsCharmapFlush() - Flush all character set maps out of cache.
e1d6a774 27 * _cupsCharmapFree() - Free a character set map.
28 * _cupsCharmapGet() - Get a character set map.
ef416fc2 29 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
e1d6a774 30 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
ef416fc2 31 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
32 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
e1d6a774 33 * compare_wide() - Compare key for wide (VBCS) match.
34 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
ef416fc2 35 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
36 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 37 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
e1d6a774 38 * free_sbcs_charmap() - Free memory used by a single byte character set.
39 * free_vbcs_charmap() - Free memory used by a variable byte character set.
40 * get_charmap_count() - Count lines in a charmap file.
41 * get_sbcs_charmap() - Get SBCS Charmap.
42 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
ef416fc2 43 */
44
45/*
46 * Include necessary headers...
47 */
48
49#include "globals.h"
e1d6a774 50#include "debug.h"
ef416fc2 51#include <stdlib.h>
52#include <errno.h>
53#include <time.h>
54
55
56/*
e1d6a774 57 * Local functions...
ef416fc2 58 */
59
e1d6a774 60static int compare_wide(const void *k1, const void *k2);
61static int conv_sbcs_to_utf8(cups_utf8_t *dest,
62 const cups_sbcs_t *src,
63 int maxout,
64 const cups_encoding_t encoding);
65static int conv_utf8_to_sbcs(cups_sbcs_t *dest,
66 const cups_utf8_t *src,
67 int maxout,
68 const cups_encoding_t encoding);
69static int conv_utf8_to_vbcs(cups_sbcs_t *dest,
70 const cups_utf8_t *src,
71 int maxout,
72 const cups_encoding_t encoding);
73static int conv_vbcs_to_utf8(cups_utf8_t *dest,
74 const cups_sbcs_t *src,
75 int maxout,
76 const cups_encoding_t encoding);
77static void free_sbcs_charmap(_cups_cmap_t *sbcs);
78static void free_vbcs_charmap(_cups_vmap_t *vbcs);
79static int get_charmap_count(cups_file_t *fp);
80static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
81 const char *filename);
82static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
83 const char *filename);
84
ef416fc2 85
86/*
e1d6a774 87 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
ef416fc2 88 */
89
e1d6a774 90void
91_cupsCharmapFlush(_cups_globals_t *cg) /* I - Global data */
ef416fc2 92{
e1d6a774 93 _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */
94 *cnext; /* Next Legacy SBCS Charset Map */
95 _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */
96 *vnext; /* Next Legacy VBCS Charset Map */
ef416fc2 97
98
99 /*
e1d6a774 100 * Loop through SBCS charset map cache, free all memory...
ef416fc2 101 */
102
e1d6a774 103 for (cmap = cg->cmap_cache; cmap; cmap = cnext)
104 {
105 cnext = cmap->next;
ef416fc2 106
e1d6a774 107 free_sbcs_charmap(cmap);
108 }
ef416fc2 109
e1d6a774 110 cg->cmap_cache = NULL;
ef416fc2 111
112 /*
e1d6a774 113 * Loop through DBCS/VBCS charset map cache, free all memory...
ef416fc2 114 */
115
e1d6a774 116 for (vmap = cg->vmap_cache; vmap; vmap = vnext)
117 {
118 vnext = vmap->next;
119
120 free_vbcs_charmap(vmap);
121
122 free(vmap);
123 }
124
125 cg->vmap_cache = NULL;
ef416fc2 126}
127
e1d6a774 128
ef416fc2 129/*
e1d6a774 130 * '_cupsCharmapFree()' - Free a character set map.
ef416fc2 131 *
e1d6a774 132 * This does not actually free; use '_cupsCharmapFlush()' for that.
ef416fc2 133 */
e1d6a774 134
ef416fc2 135void
e1d6a774 136_cupsCharmapFree(
137 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 138{
e1d6a774 139 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
140 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
141 _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */
142
ef416fc2 143
144 /*
145 * See if we already have this SBCS charset map loaded...
146 */
e1d6a774 147
148 for (cmap = cg->cmap_cache; cmap; cmap = cmap->next)
ef416fc2 149 {
150 if (cmap->encoding == encoding)
151 {
152 if (cmap->used > 0)
153 cmap->used --;
e1d6a774 154
ef416fc2 155 return;
156 }
157 }
158
159 /*
160 * See if we already have this DBCS/VBCS charset map loaded...
161 */
e1d6a774 162
163 for (vmap = cg->vmap_cache; vmap; vmap = vmap->next)
ef416fc2 164 {
165 if (vmap->encoding == encoding)
166 {
167 if (vmap->used > 0)
168 vmap->used --;
169 return;
170 }
171 }
fa73b229 172}
173
174
175/*
e1d6a774 176 * '_cupsCharmapGet()' - Get a character set map.
177 *
178 * This code handles single-byte (SBCS), double-byte (DBCS), and
179 * variable-byte (VBCS) character sets _without_ charset escapes...
180 * This code does not handle multiple-byte character sets (MBCS)
181 * (such as ISO-2022-JP) with charset switching via escapes...
fa73b229 182 */
183
e1d6a774 184void * /* O - Charset map pointer */
185_cupsCharmapGet(
186 const cups_encoding_t encoding) /* I - Encoding */
fa73b229 187{
e1d6a774 188 char filename[1024]; /* Filename for charset map file */
189 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
190
fa73b229 191
e1d6a774 192 DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
ef416fc2 193
194 /*
e1d6a774 195 * Check for valid arguments...
ef416fc2 196 */
e1d6a774 197
198 if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
ef416fc2 199 {
e1d6a774 200 DEBUG_puts(" Bad encoding, returning NULL!");
201 return (NULL);
ef416fc2 202 }
ef416fc2 203
204 /*
e1d6a774 205 * Get the data directory and charset map name...
ef416fc2 206 */
e1d6a774 207
208 snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
209 cg->cups_datadir, _cupsEncodingName(encoding));
210
211 DEBUG_printf((" filename=\"%s\"\n", filename));
212
213 /*
214 * Read charset map input file into cache...
215 */
216
217 if (encoding < CUPS_ENCODING_SBCS_END)
218 return (get_sbcs_charmap(encoding, filename));
219 else if (encoding < CUPS_ENCODING_VBCS_END)
220 return (get_vbcs_charmap(encoding, filename));
221 else
222 return (NULL);
ef416fc2 223}
224
e1d6a774 225
ef416fc2 226/*
e1d6a774 227 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
ef416fc2 228 *
229 * This code handles single-byte (SBCS), double-byte (DBCS), and
230 * variable-byte (VBCS) character sets _without_ charset escapes...
231 * This code does not handle multiple-byte character sets (MBCS)
232 * (such as ISO-2022-JP) with charset switching via escapes...
233 */
e1d6a774 234
235int /* O - Count or -1 on error */
236cupsCharsetToUTF8(
237 cups_utf8_t *dest, /* O - Target string */
238 const char *src, /* I - Source string */
239 const int maxout, /* I - Max output */
240 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 241{
242 /*
243 * Check for valid arguments...
244 */
245
e1d6a774 246 DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
247 dest, src, maxout, encoding));
248
249 if (dest)
250 *dest = '\0';
251
ef416fc2 252 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
e1d6a774 253 {
254 DEBUG_puts(" Bad arguments, returning -1");
ef416fc2 255 return (-1);
e1d6a774 256 }
ef416fc2 257
258 /*
259 * Handle identity conversions...
260 */
261
262 if (encoding == CUPS_UTF8 ||
263 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
264 {
e1d6a774 265 strlcpy((char *)dest, src, maxout);
266 return (strlen((char *)dest));
ef416fc2 267 }
268
269 /*
e1d6a774 270 * Convert input legacy charset to UTF-8...
ef416fc2 271 */
e1d6a774 272
ef416fc2 273 if (encoding < CUPS_ENCODING_SBCS_END)
e1d6a774 274 return (conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding));
ef416fc2 275 else if (encoding < CUPS_ENCODING_VBCS_END)
e1d6a774 276 return (conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding));
ef416fc2 277 else
e1d6a774 278 {
279 puts(" Bad encoding, returning -1");
ef416fc2 280 return (-1);
e1d6a774 281 }
ef416fc2 282}
283
e1d6a774 284
ef416fc2 285/*
e1d6a774 286 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
ef416fc2 287 *
288 * This code handles single-byte (SBCS), double-byte (DBCS), and
289 * variable-byte (VBCS) character sets _without_ charset escapes...
290 * This code does not handle multiple-byte character sets (MBCS)
291 * (such as ISO-2022-JP) with charset switching via escapes...
292 */
e1d6a774 293
294int /* O - Count or -1 on error */
295cupsUTF8ToCharset(
296 char *dest, /* O - Target string */
297 const cups_utf8_t *src, /* I - Source string */
298 const int maxout, /* I - Max output */
299 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 300{
301 /*
302 * Check for valid arguments...
303 */
304
305 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
e1d6a774 306 {
307 if (dest)
308 *dest = '\0';
309
ef416fc2 310 return (-1);
e1d6a774 311 }
ef416fc2 312
313 /*
314 * Handle identity conversions...
315 */
316
317 if (encoding == CUPS_UTF8 ||
318 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
319 {
e1d6a774 320 strlcpy(dest, (char *)src, maxout);
321 return (strlen(dest));
ef416fc2 322 }
323
324 /*
e1d6a774 325 * Convert input UTF-8 to legacy charset...
ef416fc2 326 */
e1d6a774 327
ef416fc2 328 if (encoding < CUPS_ENCODING_SBCS_END)
e1d6a774 329 return (conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding));
ef416fc2 330 else if (encoding < CUPS_ENCODING_VBCS_END)
e1d6a774 331 return (conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding));
ef416fc2 332 else
333 return (-1);
334}
335
ef416fc2 336
337/*
338 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
339 *
340 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
341 *
342 * UTF-32 char UTF-8 char(s)
343 * --------------------------------------------------
e1d6a774 344 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 345 * 128 to 2047 = 110xxxxx 10yyyyyy
346 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 347 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 348 *
349 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
350 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 351 */
e1d6a774 352
353int /* O - Count or -1 on error */
354cupsUTF8ToUTF32(
355 cups_utf32_t *dest, /* O - Target string */
356 const cups_utf8_t *src, /* I - Source string */
357 const int maxout) /* I - Max output */
ef416fc2 358{
e1d6a774 359 size_t srclen; /* Source string length */
360 int i; /* Looping variable */
361 cups_utf8_t ch; /* Character value */
362 cups_utf8_t next; /* Next character value */
363 cups_utf32_t ch32; /* UTF-32 character value */
364
ef416fc2 365
366 /*
367 * Check for valid arguments and clear output...
368 */
e1d6a774 369
370 if (dest)
371 *dest = 0;
372
373 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
ef416fc2 374 return (-1);
ef416fc2 375
376 /*
377 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
378 */
e1d6a774 379
380 *dest++ = 0xfeff;
381 srclen = strlen((char *)src);
382
383 for (i = maxout - 1; *src && i > 0; i --)
ef416fc2 384 {
e1d6a774 385 ch = *src++;
ef416fc2 386
387 /*
388 * Convert UTF-8 character(s) to UTF-32 character...
389 */
e1d6a774 390
391 if (!(ch & 0x80))
ef416fc2 392 {
393 /*
394 * One-octet UTF-8 <= 127 (US-ASCII)...
395 */
e1d6a774 396
397 *dest++ = ch;
ef416fc2 398 }
399 else if ((ch & 0xe0) == 0xc0)
400 {
401 /*
402 * Two-octet UTF-8 <= 2047 (Latin-x)...
403 */
e1d6a774 404
405 next = *src++;
406 if (!next)
ef416fc2 407 return (-1);
e1d6a774 408
ef416fc2 409 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
410
411 /*
412 * Check for non-shortest form (invalid UTF-8)...
413 */
e1d6a774 414
415 if (ch32 < 0x80)
ef416fc2 416 return (-1);
e1d6a774 417
418 *dest++ = ch32;
ef416fc2 419 }
420 else if ((ch & 0xf0) == 0xe0)
421 {
422 /*
423 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
424 */
e1d6a774 425
426 next = *src++;
427 if (!next)
ef416fc2 428 return (-1);
e1d6a774 429
430 ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
431
432 next = *src++;
433 if (!next)
ef416fc2 434 return (-1);
e1d6a774 435
436 ch32 = (ch32 << 6) | (next & 0x3f);
ef416fc2 437
438 /*
439 * Check for non-shortest form (invalid UTF-8)...
440 */
e1d6a774 441
442 if (ch32 < 0x800)
ef416fc2 443 return (-1);
e1d6a774 444
445 *dest++ = ch32;
ef416fc2 446 }
447 else if ((ch & 0xf8) == 0xf0)
448 {
449 /*
e1d6a774 450 * Four-octet UTF-8...
ef416fc2 451 */
e1d6a774 452
453 next = *src++;
454 if (!next)
ef416fc2 455 return (-1);
e1d6a774 456
457 ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
458
459 next = *src++;
460 if (!next)
461 return (-1);
462
463 ch32 = (ch32 << 6) | (next & 0x3f);
464
465 next = *src++;
466 if (!next)
467 return (-1);
468
469 ch32 = (ch32 << 6) | (next & 0x3f);
470
ef416fc2 471 /*
e1d6a774 472 * Check for non-shortest form (invalid UTF-8)...
ef416fc2 473 */
e1d6a774 474
475 if (ch32 < 0x10000)
476 return (-1);
477
478 *dest++ = ch32;
ef416fc2 479 }
480 else
481 {
482 /*
e1d6a774 483 * More than 4-octet (invalid UTF-8 sequence)...
ef416fc2 484 */
e1d6a774 485
ef416fc2 486 return (-1);
487 }
488
489 /*
490 * Check for UTF-16 surrogate (illegal UTF-8)...
491 */
ef416fc2 492
e1d6a774 493 if (*dest >= 0xd800 && *dest <= 0xdfff)
ef416fc2 494 return (-1);
495 }
e1d6a774 496
ef416fc2 497 *dest = 0;
e1d6a774 498
ef416fc2 499 return (i);
500}
501
e1d6a774 502
ef416fc2 503/*
504 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
505 *
506 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
507 *
508 * UTF-32 char UTF-8 char(s)
509 * --------------------------------------------------
e1d6a774 510 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 511 * 128 to 2047 = 110xxxxx 10yyyyyy
512 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 513 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 514 *
515 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
516 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 517 */
e1d6a774 518
519int /* O - Count or -1 on error */
520cupsUTF32ToUTF8(
521 cups_utf8_t *dest, /* O - Target string */
522 const cups_utf32_t *src, /* I - Source string */
523 const int maxout) /* I - Max output */
ef416fc2 524{
e1d6a774 525 cups_utf8_t *start; /* Start of destination string */
526 int i; /* Looping variable */
527 int swap; /* Byte-swap input to output */
528 cups_utf32_t ch; /* Character value */
529
ef416fc2 530
531 /*
532 * Check for valid arguments and clear output...
533 */
e1d6a774 534
535 if (dest)
536 *dest = '\0';
537
538 if (!dest || !src || maxout < 1)
ef416fc2 539 return (-1);
ef416fc2 540
541 /*
542 * Check for leading BOM in UTF-32 and inverted BOM...
543 */
e1d6a774 544
545 start = dest;
546 swap = *src == 0xfffe0000;
547
548 if (*src == 0xfffe0000 || *src == 0xfeff)
549 src ++;
ef416fc2 550
551 /*
552 * Convert input UTF-32 to output UTF-8...
553 */
e1d6a774 554
555 for (i = maxout - 1; *src && i > 0;)
ef416fc2 556 {
e1d6a774 557 ch = *src++;
ef416fc2 558
559 /*
560 * Byte swap input UTF-32, if necessary...
e1d6a774 561 * (only byte-swapping 24 of 32 bits)
ef416fc2 562 */
e1d6a774 563
ef416fc2 564 if (swap)
565 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
566
567 /*
e1d6a774 568 * Check for beyond Plane 16 (invalid UTF-32)...
ef416fc2 569 */
ef416fc2 570
ef416fc2 571 if (ch > 0x10ffff)
572 return (-1);
573
ef416fc2 574 /*
575 * Convert UTF-32 character to UTF-8 character(s)...
576 */
e1d6a774 577
578 if (ch < 0x80)
ef416fc2 579 {
580 /*
581 * One-octet UTF-8 <= 127 (US-ASCII)...
582 */
e1d6a774 583
584 *dest++ = (cups_utf8_t)ch;
585 i --;
ef416fc2 586 }
e1d6a774 587 else if (ch < 0x800)
ef416fc2 588 {
589 /*
590 * Two-octet UTF-8 <= 2047 (Latin-x)...
591 */
e1d6a774 592
593 if (i < 2)
594 return (-1);
595
596 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
597 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
598 i -= 2;
ef416fc2 599 }
e1d6a774 600 else if (ch < 0x10000)
ef416fc2 601 {
602 /*
603 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
604 */
e1d6a774 605
606 if (i < 3)
607 return (-1);
608
609 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
610 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
611 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
612 i -= 3;
613 }
614 else
615 {
616 /*
617 * Four-octet UTF-8...
618 */
619
620 if (i < 4)
621 return (-1);
622
623 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
624 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
625 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
626 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
627 i -= 4;
ef416fc2 628 }
629 }
e1d6a774 630
ef416fc2 631 *dest = '\0';
e1d6a774 632
633 return ((int)(dest - start));
ef416fc2 634}
635
e1d6a774 636
ef416fc2 637/*
e1d6a774 638 * 'compare_wide()' - Compare key for wide (VBCS) match.
639 */
640
641static int
642compare_wide(const void *k1, /* I - Key char */
643 const void *k2) /* I - Map char */
644{
645 cups_vbcs_t key; /* Legacy key character */
646 cups_vbcs_t map; /* Legacy map character */
647
648
649 key = *((cups_vbcs_t *)k1);
650 map = ((_cups_wide2uni_t *)k2)->widechar;
651
652 return ((int)(key - map));
653}
654
655
656/*
657 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
ef416fc2 658 */
e1d6a774 659
660static int /* O - Count or -1 on error */
661conv_sbcs_to_utf8(
662 cups_utf8_t *dest, /* O - Target string */
663 const cups_sbcs_t *src, /* I - Source string */
664 int maxout, /* I - Max output */
665 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 666{
e1d6a774 667 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
668 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
669 cups_sbcs_t legchar; /* Legacy character value */
670 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
671 *workptr; /* Pointer into string */
672
ef416fc2 673
674 /*
e1d6a774 675 * Find legacy charset map in cache...
ef416fc2 676 */
e1d6a774 677
678 if ((cmap = (_cups_cmap_t *)_cupsCharmapGet(encoding)) == NULL)
ef416fc2 679 return (-1);
ef416fc2 680
681 /*
e1d6a774 682 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 683 */
ef416fc2 684
e1d6a774 685 work[0] = 0xfeff;
686 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 687 {
e1d6a774 688 legchar = *src++;
ef416fc2 689
690 /*
e1d6a774 691 * Convert ASCII verbatim (optimization)...
ef416fc2 692 */
ef416fc2 693
e1d6a774 694 if (legchar < 0x80)
695 *workptr++ = (cups_utf32_t)legchar;
696 else
ef416fc2 697 {
e1d6a774 698 /*
699 * Convert unknown character to Replacement Character...
700 */
ef416fc2 701
e1d6a774 702 crow = cmap->char2uni + legchar;
703
704 if (!*crow)
705 *workptr++ = 0xfffd;
706 else
707 *workptr++ = (cups_utf32_t)*crow;
ef416fc2 708 }
ef416fc2 709 }
e1d6a774 710
711 *workptr = 0;
712
713 /*
714 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
715 */
716
717 _cupsCharmapFree(encoding);
718
719 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 720}
721
e1d6a774 722
ef416fc2 723/*
e1d6a774 724 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
ef416fc2 725 */
e1d6a774 726
727static int /* O - Count or -1 on error */
728conv_utf8_to_sbcs(
729 cups_sbcs_t *dest, /* O - Target string */
730 const cups_utf8_t *src, /* I - Source string */
731 int maxout, /* I - Max output */
732 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 733{
e1d6a774 734 cups_sbcs_t *start; /* Start of destination string */
735 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
736 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
737 cups_utf32_t unichar; /* Character value */
738 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
739 *workptr; /* Pointer into string */
740
ef416fc2 741
742 /*
e1d6a774 743 * Find legacy charset map in cache...
ef416fc2 744 */
e1d6a774 745
746 if ((cmap = (_cups_cmap_t *) _cupsCharmapGet(encoding)) == NULL)
ef416fc2 747 return (-1);
ef416fc2 748
749 /*
e1d6a774 750 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 751 */
e1d6a774 752
753 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
754 return (-1);
ef416fc2 755
756 /*
e1d6a774 757 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
ef416fc2 758 */
e1d6a774 759
760 for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
ef416fc2 761 {
e1d6a774 762 unichar = *workptr++;
763 if (!unichar)
ef416fc2 764 break;
ef416fc2 765
766 /*
e1d6a774 767 * Convert ASCII verbatim (optimization)...
ef416fc2 768 */
ef416fc2 769
e1d6a774 770 if (unichar < 0x80)
771 {
772 *dest++ = (cups_sbcs_t)unichar;
773 continue;
774 }
ef416fc2 775
776 /*
e1d6a774 777 * Convert unknown character to visible replacement...
ef416fc2 778 */
ef416fc2 779
e1d6a774 780 srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
ef416fc2 781
e1d6a774 782 if (srow)
783 srow += (int)(unichar & 0xff);
ef416fc2 784
e1d6a774 785 if (!srow || !*srow)
786 *dest++ = '?';
787 else
788 *dest++ = *srow;
ef416fc2 789 }
ef416fc2 790
e1d6a774 791 *dest = '\0';
792
793 _cupsCharmapFree(encoding);
794
795 return ((int)(dest - start));
ef416fc2 796}
797
e1d6a774 798
ef416fc2 799/*
e1d6a774 800 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 801 */
e1d6a774 802
803static int /* O - Count or -1 on error */
804conv_utf8_to_vbcs(
805 cups_sbcs_t *dest, /* O - Target string */
806 const cups_utf8_t *src, /* I - Source string */
807 int maxout, /* I - Max output */
808 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 809{
e1d6a774 810 cups_sbcs_t *start; /* Start of destination string */
811 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
812 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
813 cups_utf32_t unichar; /* Character value */
814 cups_vbcs_t legchar; /* Legacy character value */
815 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
816 *workptr; /* Pointer into string */
ef416fc2 817
ef416fc2 818
819 /*
e1d6a774 820 * Find legacy charset map in cache...
ef416fc2 821 */
ef416fc2 822
e1d6a774 823 if ((vmap = (_cups_vmap_t *)_cupsCharmapGet(encoding)) == NULL)
824 return (-1);
ef416fc2 825
826 /*
e1d6a774 827 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 828 */
e1d6a774 829
830 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
831 return (-1);
ef416fc2 832
833 /*
e1d6a774 834 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
ef416fc2 835 */
e1d6a774 836
837 for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
ef416fc2 838 {
e1d6a774 839 unichar = *workptr++;
840 if (!unichar)
ef416fc2 841 break;
ef416fc2 842
843 /*
e1d6a774 844 * Convert ASCII verbatim (optimization)...
ef416fc2 845 */
e1d6a774 846
847 if (unichar < 0x80)
848 {
849 *dest++ = (cups_vbcs_t)unichar;
850 continue;
851 }
ef416fc2 852
853 /*
e1d6a774 854 * Convert unknown character to visible replacement...
ef416fc2 855 */
e1d6a774 856
857 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
858
859 if (vrow)
860 vrow += (int)(unichar & 0xff);
861
862 if (!vrow || !*vrow)
863 legchar = (cups_vbcs_t)'?';
864 else
865 legchar = (cups_vbcs_t)*vrow;
ef416fc2 866
867 /*
e1d6a774 868 * Save n-byte legacy character...
ef416fc2 869 */
e1d6a774 870
871 if (legchar > 0xffffff)
ef416fc2 872 {
e1d6a774 873 if (maxout < 5)
874 return (-1);
875
876 *dest++ = (cups_sbcs_t)(legchar >> 24);
877 *dest++ = (cups_sbcs_t)(legchar >> 16);
878 *dest++ = (cups_sbcs_t)(legchar >> 8);
879 *dest++ = (cups_sbcs_t)legchar;
880
881 maxout -= 3;
ef416fc2 882 }
e1d6a774 883 else if (legchar > 0xffff)
884 {
885 if (maxout < 4)
886 return (-1);
ef416fc2 887
e1d6a774 888 *dest++ = (cups_sbcs_t)(legchar >> 16);
889 *dest++ = (cups_sbcs_t)(legchar >> 8);
890 *dest++ = (cups_sbcs_t)legchar;
ef416fc2 891
e1d6a774 892 maxout -= 2;
893 }
894 else if (legchar > 0xff)
895 {
896 *dest++ = (cups_sbcs_t)(legchar >> 8);
897 *dest++ = (cups_sbcs_t)legchar;
898
899 maxout --;
900 }
ef416fc2 901 }
e1d6a774 902
903 *dest = '\0';
904
905 _cupsCharmapFree(encoding);
906
907 return ((int)(dest - start));
ef416fc2 908}
909
e1d6a774 910
ef416fc2 911/*
e1d6a774 912 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
ef416fc2 913 */
e1d6a774 914
915static int /* O - Count or -1 on error */
916conv_vbcs_to_utf8(
917 cups_utf8_t *dest, /* O - Target string */
918 const cups_sbcs_t *src, /* I - Source string */
919 int maxout, /* I - Max output */
920 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 921{
e1d6a774 922 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
923 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
924 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
925 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
926 cups_vbcs_t legchar; /* Legacy character value */
927 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
928 *workptr; /* Pointer into string */
ef416fc2 929
ef416fc2 930
931 /*
e1d6a774 932 * Find legacy charset map in cache...
ef416fc2 933 */
ef416fc2 934
e1d6a774 935 if ((vmap = (_cups_vmap_t *)_cupsCharmapGet(encoding)) == NULL)
936 return (-1);
ef416fc2 937
938 /*
e1d6a774 939 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 940 */
ef416fc2 941
e1d6a774 942 work[0] = 0xfeff;
943 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 944 {
e1d6a774 945 legchar = *src++;
946 leadchar = (cups_sbcs_t)legchar;
ef416fc2 947
948 /*
e1d6a774 949 * Convert ASCII verbatim (optimization)...
ef416fc2 950 */
ef416fc2 951
e1d6a774 952 if (legchar < 0x80)
ef416fc2 953 {
e1d6a774 954 *workptr++ = (cups_utf32_t)legchar;
955 continue;
ef416fc2 956 }
957
958 /*
e1d6a774 959 * Convert 2-byte legacy character...
ef416fc2 960 */
e1d6a774 961
962 if (vmap->lead2char[(int)leadchar] == leadchar)
ef416fc2 963 {
e1d6a774 964 if (!*src)
965 return (-1);
966
967 legchar = (legchar << 8) | *src++;
968
ef416fc2 969 /*
e1d6a774 970 * Convert unknown character to Replacement Character...
ef416fc2 971 */
e1d6a774 972
973 crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
974 if (crow)
975 crow += (int) (legchar & 0xff);
976
977 if (!crow || !*crow)
978 *workptr++ = 0xfffd;
979 else
980 *workptr++ = (cups_utf32_t)*crow;
981 continue;
ef416fc2 982 }
983
984 /*
e1d6a774 985 * Fetch 3-byte or 4-byte legacy character...
ef416fc2 986 */
e1d6a774 987
988 if (vmap->lead3char[(int)leadchar] == leadchar)
ef416fc2 989 {
e1d6a774 990 if (!*src || !src[1])
991 return (-1);
992
993 legchar = (legchar << 8) | *src++;
994 legchar = (legchar << 8) | *src++;
ef416fc2 995 }
e1d6a774 996 else if (vmap->lead4char[(int)leadchar] == leadchar)
997 {
998 if (!*src || !src[1] || !src[2])
999 return (-1);
1000
1001 legchar = (legchar << 8) | *src++;
1002 legchar = (legchar << 8) | *src++;
1003 legchar = (legchar << 8) | *src++;
1004 }
1005 else
1006 return (-1);
ef416fc2 1007
1008 /*
e1d6a774 1009 * Find 3-byte or 4-byte legacy character...
ef416fc2 1010 */
e1d6a774 1011
1012 wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1013 vmap->wide2uni,
1014 vmap->widecount,
1015 sizeof(_cups_wide2uni_t),
1016 compare_wide);
ef416fc2 1017
1018 /*
e1d6a774 1019 * Convert unknown character to Replacement Character...
ef416fc2 1020 */
e1d6a774 1021
1022 if (!wide2uni || !wide2uni->unichar)
1023 *workptr++ = 0xfffd;
1024 else
1025 *workptr++ = wide2uni->unichar;
ef416fc2 1026 }
e1d6a774 1027
1028 *workptr = 0;
1029
1030 _cupsCharmapFree(encoding);
1031
1032 /*
1033 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1034 */
1035
1036 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 1037}
1038
e1d6a774 1039
ef416fc2 1040/*
e1d6a774 1041 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
ef416fc2 1042 */
e1d6a774 1043
1044static void
1045free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
ef416fc2 1046{
e1d6a774 1047 int i; /* Looping variable */
ef416fc2 1048
ef416fc2 1049
e1d6a774 1050 for (i = 0; i < 256; i ++)
1051 if (cmap->uni2char[i])
1052 free(cmap->uni2char[i]);
1053
1054 free(cmap);
1055}
1056
1057
1058/*
1059 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1060 */
1061
1062static void
1063free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
1064{
1065 int i; /* Looping variable */
1066
1067
1068 for (i = 0; i < 256; i ++)
1069 if (vmap->char2uni[i])
1070 free(vmap->char2uni[i]);
1071
1072 for (i = 0; i < 256; i ++)
1073 if (vmap->uni2char[i])
1074 free(vmap->uni2char[i]);
1075
1076 if (vmap->wide2uni)
1077 free(vmap->wide2uni);
1078
1079 free(vmap);
1080}
1081
1082
1083/*
1084 * 'get_charmap_count()' - Count lines in a charmap file.
1085 */
1086
1087static int /* O - Count or -1 on error */
1088get_charmap_count(cups_file_t *fp) /* I - File to read from */
1089{
1090 int count; /* Number of lines */
1091 char line[256]; /* Line from input map file */
ef416fc2 1092
ef416fc2 1093
1094 /*
e1d6a774 1095 * Count lines in map input file...
ef416fc2 1096 */
ef416fc2 1097
e1d6a774 1098 count = 0;
ef416fc2 1099
e1d6a774 1100 while (cupsFileGets(fp, line, sizeof(line)))
1101 if (line[0] == '0')
1102 count ++;
ef416fc2 1103
e1d6a774 1104 /*
1105 * Return the number of lines...
1106 */
1107
1108 if (count > 0)
1109 return (count);
1110 else
1111 return (-1);
ef416fc2 1112}
1113
e1d6a774 1114
ef416fc2 1115/*
e1d6a774 1116 * 'get_sbcs_charmap()' - Get SBCS Charmap.
ef416fc2 1117 */
e1d6a774 1118
1119static _cups_cmap_t * /* O - Charmap or 0 on error */
1120get_sbcs_charmap(
1121 const cups_encoding_t encoding, /* I - Charmap Encoding */
1122 const char *filename) /* I - Charmap Filename */
ef416fc2 1123{
e1d6a774 1124 unsigned long legchar; /* Legacy character value */
1125 cups_utf32_t unichar; /* Unicode character value */
1126 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1127 cups_file_t *fp; /* Charset map file pointer */
1128 char *s; /* Line parsing pointer */
1129 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1130 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1131 char line[256]; /* Line from charset map file */
1132 _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */
1133
ef416fc2 1134
1135 /*
e1d6a774 1136 * See if we already have this SBCS charset map loaded...
ef416fc2 1137 */
e1d6a774 1138
1139 for (cmap = cg->cmap_cache; cmap; cmap = cmap->next)
1140 {
1141 if (cmap->encoding == encoding)
1142 {
1143 cmap->used ++;
1144 DEBUG_printf((" returning existing cmap=%p\n", cmap));
1145 return ((void *)cmap);
1146 }
1147 }
ef416fc2 1148
1149 /*
e1d6a774 1150 * Open SBCS charset map input file...
ef416fc2 1151 */
e1d6a774 1152
1153 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1154 return (NULL);
ef416fc2 1155
1156 /*
e1d6a774 1157 * Allocate memory for SBCS charset map...
ef416fc2 1158 */
e1d6a774 1159
1160 if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1161 {
1162 cupsFileClose(fp);
1163 DEBUG_puts(" Unable to allocate memory!");
1164 return (NULL);
1165 }
1166
1167 cmap->used ++;
1168 cmap->encoding = encoding;
ef416fc2 1169
1170 /*
e1d6a774 1171 * Save SBCS charset map into memory for transcoding...
ef416fc2 1172 */
e1d6a774 1173
1174 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1175 {
e1d6a774 1176 if (line[0] != '0')
1177 continue;
1178
1179 legchar = strtol(line, &s, 16);
1180 if (legchar < 0 || legchar > 0xff)
1181 goto sbcs_error;
1182
1183 unichar = strtol(s, NULL, 16);
1184 if (unichar < 0 || unichar > 0xffff)
1185 goto sbcs_error;
ef416fc2 1186
1187 /*
e1d6a774 1188 * Save legacy to Unicode mapping in direct lookup table...
ef416fc2 1189 */
e1d6a774 1190
1191 crow = cmap->char2uni + legchar;
1192 *crow = (cups_ucs2_t)(unichar & 0xffff);
ef416fc2 1193
1194 /*
e1d6a774 1195 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1196 */
e1d6a774 1197
1198 srow = cmap->uni2char[(unichar >> 8) & 0xff];
1199 if (!srow)
ef416fc2 1200 {
e1d6a774 1201 srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1202 if (!srow)
1203 goto sbcs_error;
1204
1205 cmap->uni2char[(unichar >> 8) & 0xff] = srow;
ef416fc2 1206 }
1207
e1d6a774 1208 srow += unichar & 0xff;
1209
ef416fc2 1210 /*
e1d6a774 1211 * Convert Replacement Character to visible replacement...
ef416fc2 1212 */
e1d6a774 1213
1214 if (unichar == 0xfffd)
1215 legchar = (unsigned long)'?';
ef416fc2 1216
1217 /*
e1d6a774 1218 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1219 */
ef416fc2 1220
e1d6a774 1221 if (!*srow)
1222 *srow = (cups_sbcs_t)legchar;
1223 }
ef416fc2 1224
e1d6a774 1225 cupsFileClose(fp);
1226
ef416fc2 1227 /*
e1d6a774 1228 * Add it to the cache and return...
ef416fc2 1229 */
e1d6a774 1230
1231 cmap->next = cg->cmap_cache;
1232 cg->cmap_cache = cmap;
1233
1234 DEBUG_printf((" returning new cmap=%p\n", cmap));
1235
1236 return (cmap);
ef416fc2 1237
1238 /*
e1d6a774 1239 * If we get here, there was an error in the cmap file...
ef416fc2 1240 */
e1d6a774 1241
1242 sbcs_error:
1243
1244 free_sbcs_charmap(cmap);
1245
1246 cupsFileClose(fp);
1247
1248 DEBUG_puts(" Error, returning NULL!");
1249
1250 return (NULL);
1251}
1252
1253
1254/*
1255 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1256 */
1257
1258static _cups_vmap_t * /* O - Charmap or 0 on error */
1259get_vbcs_charmap(
1260 const cups_encoding_t encoding, /* I - Charmap Encoding */
1261 const char *filename) /* I - Charmap Filename */
1262{
1263 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1264 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1265 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1266 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1267 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1268 unsigned long legchar; /* Legacy character value */
1269 cups_utf32_t unichar; /* Unicode character value */
1270 int mapcount; /* Count of lines in charmap file */
1271 cups_file_t *fp; /* Charset map file pointer */
1272 char *s; /* Line parsing pointer */
1273 char line[256]; /* Line from charset map file */
1274 int i; /* Loop variable */
1275 int wide; /* 32-bit legacy char */
1276 _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */
1277
1278
1279 DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1280 encoding, filename));
ef416fc2 1281
1282 /*
e1d6a774 1283 * See if we already have this DBCS/VBCS charset map loaded...
ef416fc2 1284 */
ef416fc2 1285
e1d6a774 1286 for (vmap = cg->vmap_cache; vmap; vmap = vmap->next)
1287 {
1288 if (vmap->encoding == encoding)
ef416fc2 1289 {
e1d6a774 1290 vmap->used ++;
1291 DEBUG_printf((" returning existing vmap=%p\n", vmap));
1292 return ((void *)vmap);
ef416fc2 1293 }
ef416fc2 1294 }
ef416fc2 1295
1296 /*
e1d6a774 1297 * Open VBCS charset map input file...
ef416fc2 1298 */
ef416fc2 1299
e1d6a774 1300 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1301 {
1302 DEBUG_printf((" Unable to open file: %s\n", strerror(errno)));
1303 return (NULL);
1304 }
ef416fc2 1305
1306 /*
e1d6a774 1307 * Count lines in charmap file...
ef416fc2 1308 */
e1d6a774 1309
1310 if ((mapcount = get_charmap_count(fp)) <= 0)
1311 {
1312 DEBUG_puts(" Unable to get charmap count!");
1313 return (NULL);
1314 }
1315
1316 DEBUG_printf((" mapcount=%d\n", mapcount));
ef416fc2 1317
1318 /*
e1d6a774 1319 * Allocate memory for DBCS/VBCS charset map...
ef416fc2 1320 */
e1d6a774 1321
1322 if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1323 {
1324 cupsFileClose(fp);
1325 DEBUG_puts(" Unable to allocate memory!");
1326 return (NULL);
1327 }
1328
1329 vmap->used ++;
1330 vmap->encoding = encoding;
ef416fc2 1331
1332 /*
e1d6a774 1333 * Save DBCS/VBCS charset map into memory for transcoding...
ef416fc2 1334 */
e1d6a774 1335
1336 leadchar = 0;
1337 wide2uni = NULL;
1338
1339 cupsFileRewind(fp);
1340
1341 i = 0;
1342 wide = 0;
1343
1344 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1345 {
e1d6a774 1346 if (line[0] != '0')
1347 continue;
1348
1349 legchar = strtoul(line, &s, 16);
1350 if (legchar == ULONG_MAX)
1351 goto vbcs_error;
1352
1353 unichar = strtol(s, NULL, 16);
1354 if (unichar < 0 || unichar > 0xffff)
1355 goto vbcs_error;
1356
1357 i ++;
1358
1359/* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1360 legchar, (unsigned)unichar)); */
ef416fc2 1361
1362 /*
e1d6a774 1363 * Save lead char of 2/3/4-byte legacy char...
ef416fc2 1364 */
e1d6a774 1365
1366 if (legchar > 0xff && legchar <= 0xffff)
ef416fc2 1367 {
e1d6a774 1368 leadchar = (cups_sbcs_t)(legchar >> 8);
1369 vmap->lead2char[leadchar] = leadchar;
1370 }
1371
1372 if (legchar > 0xffff && legchar <= 0xffffff)
1373 {
1374 leadchar = (cups_sbcs_t)(legchar >> 16);
1375 vmap->lead3char[leadchar] = leadchar;
1376 }
1377
1378 if (legchar > 0xffffff)
1379 {
1380 leadchar = (cups_sbcs_t)(legchar >> 24);
1381 vmap->lead4char[leadchar] = leadchar;
ef416fc2 1382 }
1383
1384 /*
e1d6a774 1385 * Save Legacy to Unicode mapping...
ef416fc2 1386 */
e1d6a774 1387
1388 if (legchar <= 0xffff)
ef416fc2 1389 {
ef416fc2 1390 /*
e1d6a774 1391 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
ef416fc2 1392 */
e1d6a774 1393
1394 crow = vmap->char2uni[(int)leadchar];
1395 if (!crow)
1396 {
1397 crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1398 if (!crow)
1399 goto vbcs_error;
1400
1401 vmap->char2uni[(int)leadchar] = crow;
1402 }
1403
1404 crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1405 }
1406 else
1407 {
1408 /*
1409 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1410 */
1411
1412 if (!wide)
1413 {
1414 wide = 1;
1415 vmap->widecount = (mapcount - i + 1);
1416 wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
1417 sizeof(_cups_wide2uni_t));
1418 if (!wide2uni)
1419 goto vbcs_error;
1420
1421 vmap->wide2uni = wide2uni;
1422 }
1423
1424 wide2uni->widechar = (cups_vbcs_t)legchar;
1425 wide2uni->unichar = (cups_ucs2_t)unichar;
1426 wide2uni ++;
ef416fc2 1427 }
1428
1429 /*
e1d6a774 1430 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1431 */
e1d6a774 1432
1433 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1434 if (!vrow)
ef416fc2 1435 {
e1d6a774 1436 vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1437 if (!vrow)
1438 goto vbcs_error;
1439
1440 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
ef416fc2 1441 }
e1d6a774 1442
1443 vrow += (int)(unichar & 0xff);
ef416fc2 1444
1445 /*
e1d6a774 1446 * Convert Replacement Character to visible replacement...
ef416fc2 1447 */
e1d6a774 1448
1449 if (unichar == 0xfffd)
1450 legchar = (unsigned long)'?';
ef416fc2 1451
1452 /*
e1d6a774 1453 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1454 */
e1d6a774 1455
1456 if (!*vrow)
1457 *vrow = (cups_vbcs_t)legchar;
ef416fc2 1458 }
e1d6a774 1459
1460 vmap->charcount = (i - vmap->widecount);
1461
1462 cupsFileClose(fp);
ef416fc2 1463
1464 /*
e1d6a774 1465 * Add it to the cache and return...
ef416fc2 1466 */
ef416fc2 1467
e1d6a774 1468 vmap->next = cg->vmap_cache;
1469 cg->vmap_cache = vmap;
1470
1471 DEBUG_printf((" returning new vmap=%p\n", vmap));
1472
1473 return (vmap);
1474
1475 /*
1476 * If we get here, the file contains errors...
1477 */
1478
1479 vbcs_error:
1480
1481 free_vbcs_charmap(vmap);
1482
1483 cupsFileClose(fp);
1484
1485 DEBUG_puts(" Error, returning NULL!");
1486
1487 return (NULL);
ef416fc2 1488}
1489
1490
1491/*
e1d6a774 1492 * End of "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $"
ef416fc2 1493 */