cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 6649 2007-07-11 21:46:42Z mike $"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 2007-2008 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  *   This file is subject to the Apple OS-Developed Software exception.
  16  *
  17  * Contents:
  18  *
  19  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  20  *   _cupsCharmapFree()  - Free a character set map.
  21  *   _cupsCharmapGet()   - Get a character set map.
  22  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  23  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  24  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  25  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  26  *   compare_wide()      - Compare key for wide (VBCS) match.
  27  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  28  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  29  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  30  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  31  *   free_sbcs_charmap() - Free memory used by a single byte character set.
  32  *   free_vbcs_charmap() - Free memory used by a variable byte character set.
  33  *   get_charmap()       - Lookup or get a character set map (private).
  34  *   get_charmap_count() - Count lines in a charmap file.
  35  *   get_sbcs_charmap()  - Get SBCS Charmap.
  36  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  37  */
  38
  39 /*
  40  * Include necessary headers...
  41  */
  42
  43 #include "globals.h"
  44 #include "debug.h"
  45 #include <limits.h>
  46 #include <stdlib.h>
  47 #include <errno.h>
  48 #include <time.h>
  49
  50
  51 /*
  52  * Local globals...
  53  */
  54
  55 #ifdef HAVE_PTHREAD_H
  56 static pthread_mutex_t  map_mutex = PTHREAD_MUTEX_INITIALIZER;
  57                                         /* Mutex to control access to maps */
  58 #endif /* HAVE_PTHREAD_H */
  59 static _cups_cmap_t     *cmap_cache = NULL;
  60                                         /* SBCS Charmap Cache */
  61 static _cups_vmap_t     *vmap_cache = NULL;
  62                                         /* VBCS Charmap Cache */
  63
  64
  65 /*
  66  * Local functions...
  67  */
  68
  69 static int              compare_wide(const void *k1, const void *k2);
  70 static int              conv_sbcs_to_utf8(cups_utf8_t *dest,
  71                                           const cups_sbcs_t *src,
  72                                           int maxout,
  73                                           const cups_encoding_t encoding);
  74 static int              conv_utf8_to_sbcs(cups_sbcs_t *dest,
  75                                           const cups_utf8_t *src,
  76                                           int maxout,
  77                                           const cups_encoding_t encoding);
  78 static int              conv_utf8_to_vbcs(cups_sbcs_t *dest,
  79                                           const cups_utf8_t *src,
  80                                           int maxout,
  81                                           const cups_encoding_t encoding);
  82 static int              conv_vbcs_to_utf8(cups_utf8_t *dest,
  83                                           const cups_sbcs_t *src,
  84                                           int maxout,
  85                                           const cups_encoding_t encoding);
  86 static void             free_sbcs_charmap(_cups_cmap_t *sbcs);
  87 static void             free_vbcs_charmap(_cups_vmap_t *vbcs);
  88 static void             *get_charmap(const cups_encoding_t encoding);
  89 static int              get_charmap_count(cups_file_t *fp);
  90 static _cups_cmap_t     *get_sbcs_charmap(const cups_encoding_t encoding,
  91                                           const char *filename);
  92 static _cups_vmap_t     *get_vbcs_charmap(const cups_encoding_t encoding,
  93                                           const char *filename);
  94
  95
  96 /*
  97  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
  98  */
  99
 100 void
 101 _cupsCharmapFlush(void)
 102 {
 103   _cups_cmap_t  *cmap,                  /* Legacy SBCS / Unicode Charset Map */
 104                 *cnext;                 /* Next Legacy SBCS Charset Map */
 105   _cups_vmap_t  *vmap,                  /* Legacy VBCS / Unicode Charset Map */
 106                 *vnext;                 /* Next Legacy VBCS Charset Map */
 107
 108
 109 #ifdef HAVE_PTHREAD_H
 110   pthread_mutex_lock(&map_mutex);
 111 #endif /* HAVE_PTHREAD_H */
 112
 113  /*
 114   * Loop through SBCS charset map cache, free all memory...
 115   */
 116
 117   for (cmap = cmap_cache; cmap; cmap = cnext)
 118   {
 119     cnext = cmap->next;
 120
 121     free_sbcs_charmap(cmap);
 122   }
 123
 124   cmap_cache = NULL;
 125
 126  /*
 127   * Loop through DBCS/VBCS charset map cache, free all memory...
 128   */
 129
 130   for (vmap = vmap_cache; vmap; vmap = vnext)
 131   {
 132     vnext = vmap->next;
 133
 134     free_vbcs_charmap(vmap);
 135   }
 136
 137   vmap_cache = NULL;
 138
 139 #ifdef HAVE_PTHREAD_H
 140   pthread_mutex_unlock(&map_mutex);
 141 #endif /* HAVE_PTHREAD_H */
 142 }
 143
 144
 145 /*
 146  * '_cupsCharmapFree()' - Free a character set map.
 147  *
 148  * This does not actually free; use '_cupsCharmapFlush()' for that.
 149  */
 150
 151 void
 152 _cupsCharmapFree(
 153     const cups_encoding_t encoding)     /* I - Encoding */
 154 {
 155   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 156   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 157
 158
 159  /*
 160   * See if we already have this SBCS charset map loaded...
 161   */
 162
 163 #ifdef HAVE_PTHREAD_H
 164   pthread_mutex_lock(&map_mutex);
 165 #endif /* HAVE_PTHREAD_H */
 166
 167   for (cmap = cmap_cache; cmap; cmap = cmap->next)
 168   {
 169     if (cmap->encoding == encoding)
 170     {
 171       if (cmap->used > 0)
 172         cmap->used --;
 173       break;
 174     }
 175   }
 176
 177  /*
 178   * See if we already have this DBCS/VBCS charset map loaded...
 179   */
 180
 181   for (vmap = vmap_cache; vmap; vmap = vmap->next)
 182   {
 183     if (vmap->encoding == encoding)
 184     {
 185       if (vmap->used > 0)
 186         vmap->used --;
 187       break;
 188     }
 189   }
 190
 191 #ifdef HAVE_PTHREAD_H
 192   pthread_mutex_unlock(&map_mutex);
 193 #endif /* HAVE_PTHREAD_H */
 194 }
 195
 196
 197 /*
 198  * '_cupsCharmapGet()' - Get a character set map.
 199  *
 200  * This code handles single-byte (SBCS), double-byte (DBCS), and
 201  * variable-byte (VBCS) character sets _without_ charset escapes...
 202  * This code does not handle multiple-byte character sets (MBCS)
 203  * (such as ISO-2022-JP) with charset switching via escapes...
 204  */
 205
 206 void *                                  /* O - Charset map pointer */
 207 _cupsCharmapGet(
 208     const cups_encoding_t encoding)     /* I - Encoding */
 209 {
 210   void  *charmap;                       /* Charset map pointer */
 211
 212
 213   DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
 214
 215  /*
 216   * Check for valid arguments...
 217   */
 218
 219   if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 220   {
 221     DEBUG_puts("    Bad encoding, returning NULL!");
 222     return (NULL);
 223   }
 224
 225  /*
 226   * Lookup or get the charset map pointer and return...
 227   */
 228
 229 #ifdef HAVE_PTHREAD_H
 230   pthread_mutex_lock(&map_mutex);
 231 #endif /* HAVE_PTHREAD_H */
 232
 233   charmap = get_charmap(encoding);
 234
 235 #ifdef HAVE_PTHREAD_H
 236   pthread_mutex_unlock(&map_mutex);
 237 #endif /* HAVE_PTHREAD_H */
 238
 239   return (charmap);
 240 }
 241
 242
 243 /*
 244  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 245  *
 246  * This code handles single-byte (SBCS), double-byte (DBCS), and
 247  * variable-byte (VBCS) character sets _without_ charset escapes...
 248  * This code does not handle multiple-byte character sets (MBCS)
 249  * (such as ISO-2022-JP) with charset switching via escapes...
 250  */
 251
 252 int                                     /* O - Count or -1 on error */
 253 cupsCharsetToUTF8(
 254     cups_utf8_t *dest,                  /* O - Target string */
 255     const char *src,                    /* I - Source string */
 256     const int maxout,                   /* I - Max output */
 257     const cups_encoding_t encoding)     /* I - Encoding */
 258 {
 259   int   bytes;                          /* Number of bytes converted */
 260
 261
 262  /*
 263   * Check for valid arguments...
 264   */
 265
 266   DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
 267                 dest, src, maxout, encoding));
 268
 269   if (dest)
 270     *dest = '\0';
 271
 272   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 273   {
 274     DEBUG_puts("    Bad arguments, returning -1");
 275     return (-1);
 276   }
 277
 278  /*
 279   * Handle identity conversions...
 280   */
 281
 282   if (encoding == CUPS_UTF8 ||
 283       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 284   {
 285     strlcpy((char *)dest, src, maxout);
 286     return ((int)strlen((char *)dest));
 287   }
 288
 289  /*
 290   * Handle ISO-8859-1 to UTF-8 directly...
 291   */
 292
 293   if (encoding == CUPS_ISO8859_1)
 294   {
 295     int         ch;                     /* Character from string */
 296     cups_utf8_t *destptr,               /* Pointer into UTF-8 buffer */
 297                 *destend;               /* End of UTF-8 buffer */
 298
 299
 300     destptr = dest;
 301     destend = dest + maxout - 2;
 302
 303     while (*src && destptr < destend)
 304     {
 305       ch = *src++ & 255;
 306
 307       if (ch & 128)
 308       {
 309         *destptr++ = 0xc0 | (ch >> 6);
 310         *destptr++ = 0x80 | (ch & 0x3f);
 311       }
 312       else
 313         *destptr++ = ch;
 314     }
 315
 316     *destptr = '\0';
 317
 318     return ((int)(destptr - dest));
 319   }
 320
 321  /*
 322   * Convert input legacy charset to UTF-8...
 323   */
 324
 325 #ifdef HAVE_PTHREAD_H
 326   pthread_mutex_lock(&map_mutex);
 327 #endif /* HAVE_PTHREAD_H */
 328
 329   if (encoding < CUPS_ENCODING_SBCS_END)
 330     bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 331   else
 332     bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 333
 334 #ifdef HAVE_PTHREAD_H
 335   pthread_mutex_unlock(&map_mutex);
 336 #endif /* HAVE_PTHREAD_H */
 337
 338   return (bytes);
 339 }
 340
 341
 342 /*
 343  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 344  *
 345  * This code handles single-byte (SBCS), double-byte (DBCS), and
 346  * variable-byte (VBCS) character sets _without_ charset escapes...
 347  * This code does not handle multiple-byte character sets (MBCS)
 348  * (such as ISO-2022-JP) with charset switching via escapes...
 349  */
 350
 351 int                                     /* O - Count or -1 on error */
 352 cupsUTF8ToCharset(
 353     char                  *dest,        /* O - Target string */
 354     const cups_utf8_t     *src,         /* I - Source string */
 355     const int             maxout,       /* I - Max output */
 356     const cups_encoding_t encoding)     /* I - Encoding */
 357 {
 358   int   bytes;                          /* Number of bytes converted */
 359
 360
 361  /*
 362   * Check for valid arguments...
 363   */
 364
 365   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 366   {
 367     if (dest)
 368       *dest = '\0';
 369
 370     return (-1);
 371   }
 372
 373  /*
 374   * Handle identity conversions...
 375   */
 376
 377   if (encoding == CUPS_UTF8 ||
 378       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 379   {
 380     strlcpy(dest, (char *)src, maxout);
 381     return ((int)strlen(dest));
 382   }
 383
 384  /*
 385   * Handle UTF-8 to ISO-8859-1 directly...
 386   */
 387
 388   if (encoding == CUPS_ISO8859_1)
 389   {
 390     int         ch;                     /* Character from string */
 391     char        *destptr,               /* Pointer into ISO-8859-1 buffer */
 392                 *destend;               /* End of ISO-8859-1 buffer */
 393
 394
 395     destptr = dest;
 396     destend = dest + maxout - 1;
 397
 398     while (*src && destptr < destend)
 399     {
 400       ch = *src++;
 401
 402       if ((ch & 0xe0) == 0xc0)
 403       {
 404         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 405
 406         if (ch < 256)
 407           *destptr++ = ch;
 408         else
 409           *destptr++ = '?';
 410       }
 411       else if ((ch & 0xf0) == 0xe0 ||
 412                (ch & 0xf8) == 0xf0)
 413         *destptr++ = '?';
 414       else if (!(ch & 0x80))
 415         *destptr++ = ch;
 416     }
 417
 418     *destptr = '\0';
 419
 420     return ((int)(destptr - dest));
 421   }
 422
 423  /*
 424   * Convert input UTF-8 to legacy charset...
 425   */
 426
 427 #ifdef HAVE_PTHREAD_H
 428   pthread_mutex_lock(&map_mutex);
 429 #endif /* HAVE_PTHREAD_H */
 430
 431   if (encoding < CUPS_ENCODING_SBCS_END)
 432     bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 433   else
 434     bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 435
 436 #ifdef HAVE_PTHREAD_H
 437   pthread_mutex_unlock(&map_mutex);
 438 #endif /* HAVE_PTHREAD_H */
 439
 440   return (bytes);
 441 }
 442
 443
 444 /*
 445  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 446  *
 447  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 448  *
 449  *   UTF-32 char     UTF-8 char(s)
 450  *   --------------------------------------------------
 451  *        0 to 127 = 0xxxxxxx (US-ASCII)
 452  *     128 to 2047 = 110xxxxx 10yyyyyy
 453  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 454  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 455  *
 456  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 457  * which would convert to five- or six-octet UTF-8 sequences...
 458  */
 459
 460 int                                     /* O - Count or -1 on error */
 461 cupsUTF8ToUTF32(
 462     cups_utf32_t      *dest,            /* O - Target string */
 463     const cups_utf8_t *src,             /* I - Source string */
 464     const int         maxout)           /* I - Max output */
 465 {
 466   int           i;                      /* Looping variable */
 467   cups_utf8_t   ch;                     /* Character value */
 468   cups_utf8_t   next;                   /* Next character value */
 469   cups_utf32_t  ch32;                   /* UTF-32 character value */
 470
 471
 472  /*
 473   * Check for valid arguments and clear output...
 474   */
 475
 476   if (dest)
 477     *dest = 0;
 478
 479   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 480     return (-1);
 481
 482  /*
 483   * Convert input UTF-8 to output UTF-32 (and insert BOM)...
 484   */
 485
 486   *dest++ = 0xfeff;
 487
 488   for (i = maxout - 1; *src && i > 0; i --)
 489   {
 490     ch = *src++;
 491
 492    /*
 493     * Convert UTF-8 character(s) to UTF-32 character...
 494     */
 495
 496     if (!(ch & 0x80))
 497     {
 498      /*
 499       * One-octet UTF-8 <= 127 (US-ASCII)...
 500       */
 501
 502       *dest++ = ch;
 503       continue;
 504     }
 505     else if ((ch & 0xe0) == 0xc0)
 506     {
 507      /*
 508       * Two-octet UTF-8 <= 2047 (Latin-x)...
 509       */
 510
 511       next = *src++;
 512       if (!next)
 513         return (-1);
 514
 515       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 516
 517      /*
 518       * Check for non-shortest form (invalid UTF-8)...
 519       */
 520
 521       if (ch32 < 0x80)
 522         return (-1);
 523
 524       *dest++ = ch32;
 525     }
 526     else if ((ch & 0xf0) == 0xe0)
 527     {
 528      /*
 529       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 530       */
 531
 532       next = *src++;
 533       if (!next)
 534         return (-1);
 535
 536       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 537
 538       next = *src++;
 539       if (!next)
 540         return (-1);
 541
 542       ch32 = (ch32 << 6) | (next & 0x3f);
 543
 544      /*
 545       * Check for non-shortest form (invalid UTF-8)...
 546       */
 547
 548       if (ch32 < 0x800)
 549         return (-1);
 550
 551       *dest++ = ch32;
 552     }
 553     else if ((ch & 0xf8) == 0xf0)
 554     {
 555      /*
 556       * Four-octet UTF-8...
 557       */
 558
 559       next = *src++;
 560       if (!next)
 561         return (-1);
 562
 563       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 564
 565       next = *src++;
 566       if (!next)
 567         return (-1);
 568
 569       ch32 = (ch32 << 6) | (next & 0x3f);
 570
 571       next = *src++;
 572       if (!next)
 573         return (-1);
 574
 575       ch32 = (ch32 << 6) | (next & 0x3f);
 576
 577      /*
 578       * Check for non-shortest form (invalid UTF-8)...
 579       */
 580
 581       if (ch32 < 0x10000)
 582         return (-1);
 583
 584       *dest++ = ch32;
 585     }
 586     else
 587     {
 588      /*
 589       * More than 4-octet (invalid UTF-8 sequence)...
 590       */
 591
 592       return (-1);
 593     }
 594
 595    /*
 596     * Check for UTF-16 surrogate (illegal UTF-8)...
 597     */
 598
 599     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 600       return (-1);
 601   }
 602
 603   *dest = 0;
 604
 605   return (i);
 606 }
 607
 608
 609 /*
 610  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 611  *
 612  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 613  *
 614  *   UTF-32 char     UTF-8 char(s)
 615  *   --------------------------------------------------
 616  *        0 to 127 = 0xxxxxxx (US-ASCII)
 617  *     128 to 2047 = 110xxxxx 10yyyyyy
 618  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 619  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 620  *
 621  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 622  * which would convert to five- or six-octet UTF-8 sequences...
 623  */
 624
 625 int                                     /* O - Count or -1 on error */
 626 cupsUTF32ToUTF8(
 627     cups_utf8_t        *dest,           /* O - Target string */
 628     const cups_utf32_t *src,            /* I - Source string */
 629     const int          maxout)          /* I - Max output */
 630 {
 631   cups_utf8_t   *start;                 /* Start of destination string */
 632   int           i;                      /* Looping variable */
 633   int           swap;                   /* Byte-swap input to output */
 634   cups_utf32_t  ch;                     /* Character value */
 635
 636
 637  /*
 638   * Check for valid arguments and clear output...
 639   */
 640
 641   if (dest)
 642     *dest = '\0';
 643
 644   if (!dest || !src || maxout < 1)
 645     return (-1);
 646
 647  /*
 648   * Check for leading BOM in UTF-32 and inverted BOM...
 649   */
 650
 651   start = dest;
 652   swap  = *src == 0xfffe0000;
 653
 654   if (*src == 0xfffe0000 || *src == 0xfeff)
 655     src ++;
 656
 657  /*
 658   * Convert input UTF-32 to output UTF-8...
 659   */
 660
 661   for (i = maxout - 1; *src && i > 0;)
 662   {
 663     ch = *src++;
 664
 665    /*
 666     * Byte swap input UTF-32, if necessary...
 667     * (only byte-swapping 24 of 32 bits)
 668     */
 669
 670     if (swap)
 671       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 672
 673    /*
 674     * Check for beyond Plane 16 (invalid UTF-32)...
 675     */
 676
 677     if (ch > 0x10ffff)
 678       return (-1);
 679
 680    /*
 681     * Convert UTF-32 character to UTF-8 character(s)...
 682     */
 683
 684     if (ch < 0x80)
 685     {
 686      /*
 687       * One-octet UTF-8 <= 127 (US-ASCII)...
 688       */
 689
 690       *dest++ = (cups_utf8_t)ch;
 691       i --;
 692     }
 693     else if (ch < 0x800)
 694     {
 695      /*
 696       * Two-octet UTF-8 <= 2047 (Latin-x)...
 697       */
 698
 699       if (i < 2)
 700         return (-1);
 701
 702       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 703       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 704       i -= 2;
 705     }
 706     else if (ch < 0x10000)
 707     {
 708      /*
 709       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 710       */
 711
 712       if (i < 3)
 713         return (-1);
 714
 715       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 716       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 717       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 718       i -= 3;
 719     }
 720     else
 721     {
 722      /*
 723       * Four-octet UTF-8...
 724       */
 725
 726       if (i < 4)
 727         return (-1);
 728
 729       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 730       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 731       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 732       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 733       i -= 4;
 734     }
 735   }
 736
 737   *dest = '\0';
 738
 739   return ((int)(dest - start));
 740 }
 741
 742
 743 /*
 744  * 'compare_wide()' - Compare key for wide (VBCS) match.
 745  */
 746
 747 static int
 748 compare_wide(const void *k1,            /* I - Key char */
 749              const void *k2)            /* I - Map char */
 750 {
 751   cups_vbcs_t   key;                    /* Legacy key character */
 752   cups_vbcs_t   map;                    /* Legacy map character */
 753
 754
 755   key = *((cups_vbcs_t *)k1);
 756   map = ((_cups_wide2uni_t *)k2)->widechar;
 757
 758   return ((int)(key - map));
 759 }
 760
 761
 762 /*
 763  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 764  */
 765
 766 static int                              /* O - Count or -1 on error */
 767 conv_sbcs_to_utf8(
 768     cups_utf8_t           *dest,        /* O - Target string */
 769     const cups_sbcs_t     *src,         /* I - Source string */
 770     int                   maxout,       /* I - Max output */
 771     const cups_encoding_t encoding)     /* I - Encoding */
 772 {
 773   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 774   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 775   cups_sbcs_t   legchar;                /* Legacy character value */
 776   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 777                 *workptr;               /* Pointer into string */
 778
 779
 780  /*
 781   * Find legacy charset map in cache...
 782   */
 783
 784   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 785     return (-1);
 786
 787  /*
 788   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 789   */
 790
 791   work[0] = 0xfeff;
 792   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 793   {
 794     legchar = *src++;
 795
 796    /*
 797     * Convert ASCII verbatim (optimization)...
 798     */
 799
 800     if (legchar < 0x80)
 801       *workptr++ = (cups_utf32_t)legchar;
 802     else
 803     {
 804      /*
 805       * Convert unknown character to Replacement Character...
 806       */
 807
 808       crow = cmap->char2uni + legchar;
 809
 810       if (!*crow)
 811         *workptr++ = 0xfffd;
 812       else
 813         *workptr++ = (cups_utf32_t)*crow;
 814     }
 815   }
 816
 817   *workptr = 0;
 818
 819  /*
 820   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 821   */
 822
 823   cmap->used --;
 824
 825   return (cupsUTF32ToUTF8(dest, work, maxout));
 826 }
 827
 828
 829 /*
 830  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 831  */
 832
 833 static int                              /* O - Count or -1 on error */
 834 conv_utf8_to_sbcs(
 835     cups_sbcs_t           *dest,        /* O - Target string */
 836     const cups_utf8_t     *src,         /* I - Source string */
 837     int                   maxout,       /* I - Max output */
 838     const cups_encoding_t encoding)     /* I - Encoding */
 839 {
 840   cups_sbcs_t   *start;                 /* Start of destination string */
 841   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 842   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 843   cups_utf32_t  unichar;                /* Character value */
 844   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 845                 *workptr;               /* Pointer into string */
 846
 847
 848  /*
 849   * Find legacy charset map in cache...
 850   */
 851
 852   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 853     return (-1);
 854
 855  /*
 856   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 857   */
 858
 859   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 860     return (-1);
 861
 862  /*
 863   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 864   */
 865
 866   for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
 867   {
 868     unichar = *workptr++;
 869     if (!unichar)
 870       break;
 871
 872    /*
 873     * Convert ASCII verbatim (optimization)...
 874     */
 875
 876     if (unichar < 0x80)
 877     {
 878       *dest++ = (cups_sbcs_t)unichar;
 879       continue;
 880     }
 881
 882    /*
 883     * Convert unknown character to visible replacement...
 884     */
 885
 886     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 887
 888     if (srow)
 889       srow += (int)(unichar & 0xff);
 890
 891     if (!srow || !*srow)
 892       *dest++ = '?';
 893     else
 894       *dest++ = *srow;
 895   }
 896
 897   *dest = '\0';
 898
 899   cmap->used --;
 900
 901   return ((int)(dest - start));
 902 }
 903
 904
 905 /*
 906  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 907  */
 908
 909 static int                              /* O - Count or -1 on error */
 910 conv_utf8_to_vbcs(
 911     cups_sbcs_t           *dest,        /* O - Target string */
 912     const cups_utf8_t     *src,         /* I - Source string */
 913     int                   maxout,       /* I - Max output */
 914     const cups_encoding_t encoding)     /* I - Encoding */
 915 {
 916   cups_sbcs_t   *start;                 /* Start of destination string */
 917   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
 918   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
 919   cups_utf32_t  unichar;                /* Character value */
 920   cups_vbcs_t   legchar;                /* Legacy character value */
 921   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 922                 *workptr;               /* Pointer into string */
 923
 924
 925  /*
 926   * Find legacy charset map in cache...
 927   */
 928
 929   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 930     return (-1);
 931
 932  /*
 933   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 934   */
 935
 936   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 937     return (-1);
 938
 939  /*
 940   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
 941   */
 942
 943   for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
 944   {
 945     unichar = *workptr++;
 946     if (!unichar)
 947       break;
 948
 949    /*
 950     * Convert ASCII verbatim (optimization)...
 951     */
 952
 953     if (unichar < 0x80)
 954     {
 955       *dest++ = (cups_sbcs_t)unichar;
 956       continue;
 957     }
 958
 959    /*
 960     * Convert unknown character to visible replacement...
 961     */
 962
 963     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
 964
 965     if (vrow)
 966       vrow += (int)(unichar & 0xff);
 967
 968     if (!vrow || !*vrow)
 969       legchar = (cups_vbcs_t)'?';
 970     else
 971       legchar = (cups_vbcs_t)*vrow;
 972
 973    /*
 974     * Save n-byte legacy character...
 975     */
 976
 977     if (legchar > 0xffffff)
 978     {
 979       if (maxout < 5)
 980         return (-1);
 981
 982       *dest++ = (cups_sbcs_t)(legchar >> 24);
 983       *dest++ = (cups_sbcs_t)(legchar >> 16);
 984       *dest++ = (cups_sbcs_t)(legchar >> 8);
 985       *dest++ = (cups_sbcs_t)legchar;
 986
 987       maxout -= 3;
 988     }
 989     else if (legchar > 0xffff)
 990     {
 991       if (maxout < 4)
 992         return (-1);
 993
 994       *dest++ = (cups_sbcs_t)(legchar >> 16);
 995       *dest++ = (cups_sbcs_t)(legchar >> 8);
 996       *dest++ = (cups_sbcs_t)legchar;
 997
 998       maxout -= 2;
 999     }
1000     else if (legchar > 0xff)
1001     {
1002       *dest++ = (cups_sbcs_t)(legchar >> 8);
1003       *dest++ = (cups_sbcs_t)legchar;
1004
1005       maxout --;
1006     }
1007   }
1008
1009   *dest = '\0';
1010
1011   vmap->used --;
1012
1013   return ((int)(dest - start));
1014 }
1015
1016
1017 /*
1018  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1019  */
1020
1021 static int                              /* O - Count or -1 on error */
1022 conv_vbcs_to_utf8(
1023     cups_utf8_t           *dest,        /* O - Target string */
1024     const cups_sbcs_t     *src,         /* I - Source string */
1025     int                   maxout,       /* I - Max output */
1026     const cups_encoding_t encoding)     /* I - Encoding */
1027 {
1028   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1029   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1030   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1031   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1032   cups_vbcs_t   legchar;                /* Legacy character value */
1033   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1034                 *workptr;               /* Pointer into string */
1035
1036
1037  /*
1038   * Find legacy charset map in cache...
1039   */
1040
1041   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1042     return (-1);
1043
1044  /*
1045   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1046   */
1047
1048   work[0] = 0xfeff;
1049   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1050   {
1051     legchar  = *src++;
1052     leadchar = (cups_sbcs_t)legchar;
1053
1054    /*
1055     * Convert ASCII verbatim (optimization)...
1056     */
1057
1058     if (legchar < 0x80)
1059     {
1060       *workptr++ = (cups_utf32_t)legchar;
1061       continue;
1062     }
1063
1064    /*
1065     * Convert 2-byte legacy character...
1066     */
1067
1068     if (vmap->lead2char[(int)leadchar] == leadchar)
1069     {
1070       if (!*src)
1071         return (-1);
1072
1073       legchar = (legchar << 8) | *src++;
1074
1075      /*
1076       * Convert unknown character to Replacement Character...
1077       */
1078
1079       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1080       if (crow)
1081         crow += (int) (legchar & 0xff);
1082
1083       if (!crow || !*crow)
1084         *workptr++ = 0xfffd;
1085       else
1086         *workptr++ = (cups_utf32_t)*crow;
1087       continue;
1088     }
1089
1090    /*
1091     * Fetch 3-byte or 4-byte legacy character...
1092     */
1093
1094     if (vmap->lead3char[(int)leadchar] == leadchar)
1095     {
1096       if (!*src || !src[1])
1097         return (-1);
1098
1099       legchar = (legchar << 8) | *src++;
1100       legchar = (legchar << 8) | *src++;
1101     }
1102     else if (vmap->lead4char[(int)leadchar] == leadchar)
1103     {
1104       if (!*src || !src[1] || !src[2])
1105         return (-1);
1106
1107       legchar = (legchar << 8) | *src++;
1108       legchar = (legchar << 8) | *src++;
1109       legchar = (legchar << 8) | *src++;
1110     }
1111     else
1112       return (-1);
1113
1114    /*
1115     * Find 3-byte or 4-byte legacy character...
1116     */
1117
1118     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1119                                            vmap->wide2uni,
1120                                            vmap->widecount,
1121                                            sizeof(_cups_wide2uni_t),
1122                                            compare_wide);
1123
1124    /*
1125     * Convert unknown character to Replacement Character...
1126     */
1127
1128     if (!wide2uni || !wide2uni->unichar)
1129       *workptr++ = 0xfffd;
1130     else
1131       *workptr++ = wide2uni->unichar;
1132   }
1133
1134   *workptr = 0;
1135
1136   vmap->used --;
1137
1138  /*
1139   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1140   */
1141
1142   return (cupsUTF32ToUTF8(dest, work, maxout));
1143 }
1144
1145
1146 /*
1147  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1148  */
1149
1150 static void
1151 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1152 {
1153   int           i;                      /* Looping variable */
1154
1155
1156   for (i = 0; i < 256; i ++)
1157     if (cmap->uni2char[i])
1158       free(cmap->uni2char[i]);
1159
1160   free(cmap);
1161 }
1162
1163
1164 /*
1165  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1166  */
1167
1168 static void
1169 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1170 {
1171   int           i;                      /* Looping variable */
1172
1173
1174   for (i = 0; i < 256; i ++)
1175     if (vmap->char2uni[i])
1176       free(vmap->char2uni[i]);
1177
1178   for (i = 0; i < 256; i ++)
1179     if (vmap->uni2char[i])
1180       free(vmap->uni2char[i]);
1181
1182   if (vmap->wide2uni)
1183     free(vmap->wide2uni);
1184
1185   free(vmap);
1186 }
1187
1188
1189 /*
1190  * 'get_charmap()' - Lookup or get a character set map (private).
1191  *
1192  * This code handles single-byte (SBCS), double-byte (DBCS), and
1193  * variable-byte (VBCS) character sets _without_ charset escapes...
1194  * This code does not handle multiple-byte character sets (MBCS)
1195  * (such as ISO-2022-JP) with charset switching via escapes...
1196  */
1197
1198
1199 static void *                           /* O - Charset map pointer */
1200 get_charmap(
1201     const cups_encoding_t encoding)     /* I - Encoding */
1202 {
1203   char          filename[1024];         /* Filename for charset map file */
1204   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1205
1206
1207  /*
1208   * Get the data directory and charset map name...
1209   */
1210
1211   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1212            cg->cups_datadir, _cupsEncodingName(encoding));
1213
1214   DEBUG_printf(("    filename=\"%s\"\n", filename));
1215
1216  /*
1217   * Read charset map input file into cache...
1218   */
1219
1220   if (encoding < CUPS_ENCODING_SBCS_END)
1221     return (get_sbcs_charmap(encoding, filename));
1222   else if (encoding < CUPS_ENCODING_VBCS_END)
1223     return (get_vbcs_charmap(encoding, filename));
1224   else
1225     return (NULL);
1226 }
1227
1228
1229 /*
1230  * 'get_charmap_count()' - Count lines in a charmap file.
1231  */
1232
1233 static int                              /* O - Count or -1 on error */
1234 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1235 {
1236   int   count;                          /* Number of lines */
1237   char  line[256];                      /* Line from input map file */
1238
1239
1240  /*
1241   * Count lines in map input file...
1242   */
1243
1244   count = 0;
1245
1246   while (cupsFileGets(fp, line, sizeof(line)))
1247     if (line[0] == '0')
1248       count ++;
1249
1250  /*
1251   * Return the number of lines...
1252   */
1253
1254   if (count > 0)
1255     return (count);
1256   else
1257     return (-1);
1258 }
1259
1260
1261 /*
1262  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1263  */
1264
1265 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1266 get_sbcs_charmap(
1267     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1268     const char            *filename)    /* I - Charmap Filename */
1269 {
1270   unsigned long legchar;                /* Legacy character value */
1271   cups_utf32_t  unichar;                /* Unicode character value */
1272   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1273   cups_file_t   *fp;                    /* Charset map file pointer */
1274   char          *s;                     /* Line parsing pointer */
1275   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1276   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1277   char          line[256];              /* Line from charset map file */
1278
1279
1280  /*
1281   * See if we already have this SBCS charset map loaded...
1282   */
1283
1284   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1285   {
1286     if (cmap->encoding == encoding)
1287     {
1288       cmap->used ++;
1289       DEBUG_printf(("    returning existing cmap=%p\n", cmap));
1290
1291       return ((void *)cmap);
1292     }
1293   }
1294
1295  /*
1296   * Open SBCS charset map input file...
1297   */
1298
1299   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1300     return (NULL);
1301
1302  /*
1303   * Allocate memory for SBCS charset map...
1304   */
1305
1306   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1307   {
1308     cupsFileClose(fp);
1309     DEBUG_puts("    Unable to allocate memory!");
1310
1311     return (NULL);
1312   }
1313
1314   cmap->used ++;
1315   cmap->encoding = encoding;
1316
1317  /*
1318   * Save SBCS charset map into memory for transcoding...
1319   */
1320
1321   while (cupsFileGets(fp, line, sizeof(line)))
1322   {
1323     if (line[0] != '0')
1324       continue;
1325
1326     legchar = strtol(line, &s, 16);
1327     if (legchar < 0 || legchar > 0xff)
1328       goto sbcs_error;
1329
1330     unichar = strtol(s, NULL, 16);
1331     if (unichar < 0 || unichar > 0xffff)
1332       goto sbcs_error;
1333
1334    /*
1335     * Save legacy to Unicode mapping in direct lookup table...
1336     */
1337
1338     crow  = cmap->char2uni + legchar;
1339     *crow = (cups_ucs2_t)(unichar & 0xffff);
1340
1341    /*
1342     * Save Unicode to legacy mapping in indirect lookup table...
1343     */
1344
1345     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1346     if (!srow)
1347     {
1348       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1349       if (!srow)
1350         goto sbcs_error;
1351
1352       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1353     }
1354
1355     srow += unichar & 0xff;
1356
1357    /*
1358     * Convert Replacement Character to visible replacement...
1359     */
1360
1361     if (unichar == 0xfffd)
1362       legchar = (unsigned long)'?';
1363
1364    /*
1365     * First (oldest) legacy character uses Unicode mapping cell...
1366     */
1367
1368     if (!*srow)
1369       *srow = (cups_sbcs_t)legchar;
1370   }
1371
1372   cupsFileClose(fp);
1373
1374  /*
1375   * Add it to the cache and return...
1376   */
1377
1378   cmap->next = cmap_cache;
1379   cmap_cache = cmap;
1380
1381   DEBUG_printf(("    returning new cmap=%p\n", cmap));
1382
1383   return (cmap);
1384
1385  /*
1386   * If we get here, there was an error in the cmap file...
1387   */
1388
1389   sbcs_error:
1390
1391   free_sbcs_charmap(cmap);
1392
1393   cupsFileClose(fp);
1394
1395   DEBUG_puts("    Error, returning NULL!");
1396
1397   return (NULL);
1398 }
1399
1400
1401 /*
1402  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1403  */
1404
1405 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1406 get_vbcs_charmap(
1407     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1408     const char            *filename)    /* I - Charmap Filename */
1409 {
1410   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1411   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1412   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1413   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1414   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1415   unsigned long legchar;                /* Legacy character value */
1416   cups_utf32_t  unichar;                /* Unicode character value */
1417   int           mapcount;               /* Count of lines in charmap file */
1418   cups_file_t   *fp;                    /* Charset map file pointer */
1419   char          *s;                     /* Line parsing pointer */
1420   char          line[256];              /* Line from charset map file */
1421   int           i;                      /* Loop variable */
1422   int           legacy;                 /* 32-bit legacy char */
1423
1424
1425   DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1426                 encoding, filename));
1427
1428  /*
1429   * See if we already have this DBCS/VBCS charset map loaded...
1430   */
1431
1432   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1433   {
1434     if (vmap->encoding == encoding)
1435     {
1436       vmap->used ++;
1437       DEBUG_printf(("    returning existing vmap=%p\n", vmap));
1438
1439       return ((void *)vmap);
1440     }
1441   }
1442
1443  /*
1444   * Open VBCS charset map input file...
1445   */
1446
1447   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1448   {
1449     DEBUG_printf(("    Unable to open file: %s\n", strerror(errno)));
1450
1451     return (NULL);
1452   }
1453
1454  /*
1455   * Count lines in charmap file...
1456   */
1457
1458   if ((mapcount = get_charmap_count(fp)) <= 0)
1459   {
1460     DEBUG_puts("    Unable to get charmap count!");
1461
1462     cupsFileClose(fp);
1463
1464     return (NULL);
1465   }
1466
1467   DEBUG_printf(("    mapcount=%d\n", mapcount));
1468
1469  /*
1470   * Allocate memory for DBCS/VBCS charset map...
1471   */
1472
1473   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1474   {
1475     DEBUG_puts("    Unable to allocate memory!");
1476
1477     cupsFileClose(fp);
1478
1479     return (NULL);
1480   }
1481
1482   vmap->used ++;
1483   vmap->encoding = encoding;
1484
1485  /*
1486   * Save DBCS/VBCS charset map into memory for transcoding...
1487   */
1488
1489   leadchar = 0;
1490   wide2uni = NULL;
1491
1492   cupsFileRewind(fp);
1493
1494   i      = 0;
1495   legacy = 0;
1496
1497   while (cupsFileGets(fp, line, sizeof(line)))
1498   {
1499     if (line[0] != '0')
1500       continue;
1501
1502     legchar = strtoul(line, &s, 16);
1503     if (legchar == ULONG_MAX)
1504       goto vbcs_error;
1505
1506     unichar = strtol(s, NULL, 16);
1507     if (unichar < 0 || unichar > 0xffff)
1508       goto vbcs_error;
1509
1510     i ++;
1511
1512 /*    DEBUG_printf(("    i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1513                   legchar, (unsigned)unichar)); */
1514
1515    /*
1516     * Save lead char of 2/3/4-byte legacy char...
1517     */
1518
1519     if (legchar > 0xff && legchar <= 0xffff)
1520     {
1521       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1522       vmap->lead2char[leadchar] = leadchar;
1523     }
1524
1525     if (legchar > 0xffff && legchar <= 0xffffff)
1526     {
1527       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1528       vmap->lead3char[leadchar] = leadchar;
1529     }
1530
1531     if (legchar > 0xffffff)
1532     {
1533       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1534       vmap->lead4char[leadchar] = leadchar;
1535     }
1536
1537    /*
1538     * Save Legacy to Unicode mapping...
1539     */
1540
1541     if (legchar <= 0xffff)
1542     {
1543      /*
1544       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1545       */
1546
1547       crow = vmap->char2uni[(int)leadchar];
1548       if (!crow)
1549       {
1550         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1551         if (!crow)
1552           goto vbcs_error;
1553
1554         vmap->char2uni[(int)leadchar] = crow;
1555       }
1556
1557       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1558     }
1559     else
1560     {
1561      /*
1562       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1563       */
1564
1565       if (!legacy)
1566       {
1567         legacy          = 1;
1568         vmap->widecount = (mapcount - i + 1);
1569         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1570                                                      sizeof(_cups_wide2uni_t));
1571         if (!wide2uni)
1572           goto vbcs_error;
1573
1574         vmap->wide2uni = wide2uni;
1575       }
1576
1577       wide2uni->widechar = (cups_vbcs_t)legchar;
1578       wide2uni->unichar  = (cups_ucs2_t)unichar;
1579       wide2uni ++;
1580     }
1581
1582    /*
1583     * Save Unicode to legacy mapping in indirect lookup table...
1584     */
1585
1586     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1587     if (!vrow)
1588     {
1589       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1590       if (!vrow)
1591         goto vbcs_error;
1592
1593       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1594     }
1595
1596     vrow += (int)(unichar & 0xff);
1597
1598    /*
1599     * Convert Replacement Character to visible replacement...
1600     */
1601
1602     if (unichar == 0xfffd)
1603       legchar = (unsigned long)'?';
1604
1605    /*
1606     * First (oldest) legacy character uses Unicode mapping cell...
1607     */
1608
1609     if (!*vrow)
1610       *vrow = (cups_vbcs_t)legchar;
1611   }
1612
1613   vmap->charcount = (i - vmap->widecount);
1614
1615   cupsFileClose(fp);
1616
1617  /*
1618   * Add it to the cache and return...
1619   */
1620
1621   vmap->next     = vmap_cache;
1622   vmap_cache = vmap;
1623
1624   DEBUG_printf(("    returning new vmap=%p\n", vmap));
1625
1626   return (vmap);
1627
1628  /*
1629   * If we get here, the file contains errors...
1630   */
1631
1632   vbcs_error:
1633
1634   free_vbcs_charmap(vmap);
1635
1636   cupsFileClose(fp);
1637
1638   DEBUG_puts("    Error, returning NULL!");
1639
1640   return (NULL);
1641 }
1642
1643
1644 /*
1645  * End of "$Id: transcode.c 6649 2007-07-11 21:46:42Z mike $"
1646  */