cups/transcode.c

   1 /*
   2  * "$Id$"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 1997-2006 by Easy Software Products.
   7  *
   8  *   These coded instructions, statements, and computer programs are
   9  *   the property of Easy Software Products and are protected by Federal
  10  *   copyright law.  Distribution and use rights are outlined in the
  11  *   file "LICENSE.txt" which should have been included with this file.
  12  *   If this file is missing or damaged please contact Easy Software
  13  *   Products at:
  14  *
  15  *       Attn: CUPS Licensing Information
  16  *       Easy Software Products
  17  *       44141 Airport View Drive, Suite 204
  18  *       Hollywood, Maryland 20636 USA
  19  *
  20  *       Voice: (301) 373-9600
  21  *       EMail: cups-info@cups.org
  22  *         WWW: http://www.cups.org
  23  *
  24  * Contents:
  25  *
  26  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  27  *   _cupsCharmapFree()  - Free a character set map.
  28  *   _cupsCharmapGet()   - Get a character set map.
  29  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  30  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  31  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  32  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  33  *   compare_wide()      - Compare key for wide (VBCS) match.
  34  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  35  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  36  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  37  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  38  *   free_sbcs_charmap() - Free memory used by a single byte character set.
  39  *   free_vbcs_charmap() - Free memory used by a variable byte character set.
  40  *   get_charmap()       - Lookup or get a character set map (private).
  41  *   get_charmap_count() - Count lines in a charmap file.
  42  *   get_sbcs_charmap()  - Get SBCS Charmap.
  43  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  44  */
  45
  46 /*
  47  * Include necessary headers...
  48  */
  49
  50 #include "globals.h"
  51 #include "debug.h"
  52 #include <limits.h>
  53 #include <stdlib.h>
  54 #include <errno.h>
  55 #include <time.h>
  56
  57
  58 /*
  59  * Local globals...
  60  */
  61
  62 #ifdef HAVE_PTHREAD_H
  63 static pthread_mutex_t  map_mutex = PTHREAD_MUTEX_INITIALIZER;
  64                                         /* Mutex to control access to maps */
  65 #endif /* HAVE_PTHREAD_H */
  66 static _cups_cmap_t     *cmap_cache = NULL;
  67                                         /* SBCS Charmap Cache */
  68 static _cups_vmap_t     *vmap_cache = NULL;
  69                                         /* VBCS Charmap Cache */
  70
  71
  72 /*
  73  * Local functions...
  74  */
  75
  76 static int              compare_wide(const void *k1, const void *k2);
  77 static int              conv_sbcs_to_utf8(cups_utf8_t *dest,
  78                                           const cups_sbcs_t *src,
  79                                           int maxout,
  80                                           const cups_encoding_t encoding);
  81 static int              conv_utf8_to_sbcs(cups_sbcs_t *dest,
  82                                           const cups_utf8_t *src,
  83                                           int maxout,
  84                                           const cups_encoding_t encoding);
  85 static int              conv_utf8_to_vbcs(cups_sbcs_t *dest,
  86                                           const cups_utf8_t *src,
  87                                           int maxout,
  88                                           const cups_encoding_t encoding);
  89 static int              conv_vbcs_to_utf8(cups_utf8_t *dest,
  90                                           const cups_sbcs_t *src,
  91                                           int maxout,
  92                                           const cups_encoding_t encoding);
  93 static void             free_sbcs_charmap(_cups_cmap_t *sbcs);
  94 static void             free_vbcs_charmap(_cups_vmap_t *vbcs);
  95 static void             *get_charmap(const cups_encoding_t encoding);
  96 static int              get_charmap_count(cups_file_t *fp);
  97 static _cups_cmap_t     *get_sbcs_charmap(const cups_encoding_t encoding,
  98                                           const char *filename);
  99 static _cups_vmap_t     *get_vbcs_charmap(const cups_encoding_t encoding,
 100                                           const char *filename);
 101
 102
 103 /*
 104  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
 105  */
 106
 107 void
 108 _cupsCharmapFlush(void)
 109 {
 110   _cups_cmap_t  *cmap,                  /* Legacy SBCS / Unicode Charset Map */
 111                 *cnext;                 /* Next Legacy SBCS Charset Map */
 112   _cups_vmap_t  *vmap,                  /* Legacy VBCS / Unicode Charset Map */
 113                 *vnext;                 /* Next Legacy VBCS Charset Map */
 114
 115
 116 #ifdef HAVE_PTHREAD_H
 117   pthread_mutex_lock(&map_mutex);
 118 #endif /* HAVE_PTHREAD_H */
 119
 120  /*
 121   * Loop through SBCS charset map cache, free all memory...
 122   */
 123
 124   for (cmap = cmap_cache; cmap; cmap = cnext)
 125   {
 126     cnext = cmap->next;
 127
 128     free_sbcs_charmap(cmap);
 129   }
 130
 131   cmap_cache = NULL;
 132
 133  /*
 134   * Loop through DBCS/VBCS charset map cache, free all memory...
 135   */
 136
 137   for (vmap = vmap_cache; vmap; vmap = vnext)
 138   {
 139     vnext = vmap->next;
 140
 141     free_vbcs_charmap(vmap);
 142
 143     free(vmap);
 144   }
 145
 146   vmap_cache = NULL;
 147
 148 #ifdef HAVE_PTHREAD_H
 149   pthread_mutex_unlock(&map_mutex);
 150 #endif /* HAVE_PTHREAD_H */
 151 }
 152
 153
 154 /*
 155  * '_cupsCharmapFree()' - Free a character set map.
 156  *
 157  * This does not actually free; use '_cupsCharmapFlush()' for that.
 158  */
 159
 160 void
 161 _cupsCharmapFree(
 162     const cups_encoding_t encoding)     /* I - Encoding */
 163 {
 164   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 165   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 166
 167
 168  /*
 169   * See if we already have this SBCS charset map loaded...
 170   */
 171
 172 #ifdef HAVE_PTHREAD_H
 173   pthread_mutex_lock(&map_mutex);
 174 #endif /* HAVE_PTHREAD_H */
 175
 176   for (cmap = cmap_cache; cmap; cmap = cmap->next)
 177   {
 178     if (cmap->encoding == encoding)
 179     {
 180       if (cmap->used > 0)
 181         cmap->used --;
 182       break;
 183     }
 184   }
 185
 186  /*
 187   * See if we already have this DBCS/VBCS charset map loaded...
 188   */
 189
 190   for (vmap = vmap_cache; vmap; vmap = vmap->next)
 191   {
 192     if (vmap->encoding == encoding)
 193     {
 194       if (vmap->used > 0)
 195         vmap->used --;
 196       break;
 197     }
 198   }
 199
 200 #ifdef HAVE_PTHREAD_H
 201   pthread_mutex_unlock(&map_mutex);
 202 #endif /* HAVE_PTHREAD_H */
 203 }
 204
 205
 206 /*
 207  * '_cupsCharmapGet()' - Get a character set map.
 208  *
 209  * This code handles single-byte (SBCS), double-byte (DBCS), and
 210  * variable-byte (VBCS) character sets _without_ charset escapes...
 211  * This code does not handle multiple-byte character sets (MBCS)
 212  * (such as ISO-2022-JP) with charset switching via escapes...
 213  */
 214
 215 void *                                  /* O - Charset map pointer */
 216 _cupsCharmapGet(
 217     const cups_encoding_t encoding)     /* I - Encoding */
 218 {
 219   void  *charmap;                       /* Charset map pointer */
 220
 221
 222   DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
 223
 224  /*
 225   * Check for valid arguments...
 226   */
 227
 228   if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 229   {
 230     DEBUG_puts("    Bad encoding, returning NULL!");
 231     return (NULL);
 232   }
 233
 234  /*
 235   * Lookup or get the charset map pointer and return...
 236   */
 237
 238 #ifdef HAVE_PTHREAD_H
 239   pthread_mutex_lock(&map_mutex);
 240 #endif /* HAVE_PTHREAD_H */
 241
 242   charmap = get_charmap(encoding);
 243
 244 #ifdef HAVE_PTHREAD_H
 245   pthread_mutex_unlock(&map_mutex);
 246 #endif /* HAVE_PTHREAD_H */
 247
 248   return (charmap);
 249 }
 250
 251
 252 /*
 253  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 254  *
 255  * This code handles single-byte (SBCS), double-byte (DBCS), and
 256  * variable-byte (VBCS) character sets _without_ charset escapes...
 257  * This code does not handle multiple-byte character sets (MBCS)
 258  * (such as ISO-2022-JP) with charset switching via escapes...
 259  */
 260
 261 int                                     /* O - Count or -1 on error */
 262 cupsCharsetToUTF8(
 263     cups_utf8_t *dest,                  /* O - Target string */
 264     const char *src,                    /* I - Source string */
 265     const int maxout,                   /* I - Max output */
 266     const cups_encoding_t encoding)     /* I - Encoding */
 267 {
 268   int   bytes;                          /* Number of bytes converted */
 269
 270
 271  /*
 272   * Check for valid arguments...
 273   */
 274
 275   DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
 276                 dest, src, maxout, encoding));
 277
 278   if (dest)
 279     *dest = '\0';
 280
 281   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 282   {
 283     DEBUG_puts("    Bad arguments, returning -1");
 284     return (-1);
 285   }
 286
 287  /*
 288   * Handle identity conversions...
 289   */
 290
 291   if (encoding == CUPS_UTF8 ||
 292       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 293   {
 294     strlcpy((char *)dest, src, maxout);
 295     return (strlen((char *)dest));
 296   }
 297
 298  /*
 299   * Convert input legacy charset to UTF-8...
 300   */
 301
 302 #ifdef HAVE_PTHREAD_H
 303   pthread_mutex_lock(&map_mutex);
 304 #endif /* HAVE_PTHREAD_H */
 305
 306   if (encoding < CUPS_ENCODING_SBCS_END)
 307     bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 308   else if (encoding < CUPS_ENCODING_VBCS_END)
 309     bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 310   else
 311   {
 312     DEBUG_puts("    Bad encoding, returning -1");
 313     bytes = -1;
 314   }
 315
 316 #ifdef HAVE_PTHREAD_H
 317   pthread_mutex_unlock(&map_mutex);
 318 #endif /* HAVE_PTHREAD_H */
 319
 320   return (bytes);
 321 }
 322
 323
 324 /*
 325  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 326  *
 327  * This code handles single-byte (SBCS), double-byte (DBCS), and
 328  * variable-byte (VBCS) character sets _without_ charset escapes...
 329  * This code does not handle multiple-byte character sets (MBCS)
 330  * (such as ISO-2022-JP) with charset switching via escapes...
 331  */
 332
 333 int                                     /* O - Count or -1 on error */
 334 cupsUTF8ToCharset(
 335     char                  *dest,        /* O - Target string */
 336     const cups_utf8_t     *src,         /* I - Source string */
 337     const int             maxout,       /* I - Max output */
 338     const cups_encoding_t encoding)     /* I - Encoding */
 339 {
 340   int   bytes;                          /* Number of bytes converted */
 341
 342
 343  /*
 344   * Check for valid arguments...
 345   */
 346
 347   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 348   {
 349     if (dest)
 350       *dest = '\0';
 351
 352     return (-1);
 353   }
 354
 355  /*
 356   * Handle identity conversions...
 357   */
 358
 359   if (encoding == CUPS_UTF8 ||
 360       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 361   {
 362     strlcpy(dest, (char *)src, maxout);
 363     return (strlen(dest));
 364   }
 365
 366  /*
 367   * Convert input UTF-8 to legacy charset...
 368   */
 369
 370 #ifdef HAVE_PTHREAD_H
 371   pthread_mutex_lock(&map_mutex);
 372 #endif /* HAVE_PTHREAD_H */
 373
 374   if (encoding < CUPS_ENCODING_SBCS_END)
 375     bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 376   else if (encoding < CUPS_ENCODING_VBCS_END)
 377     bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 378   else
 379     bytes = -1;
 380
 381 #ifdef HAVE_PTHREAD_H
 382   pthread_mutex_unlock(&map_mutex);
 383 #endif /* HAVE_PTHREAD_H */
 384
 385   return (bytes);
 386 }
 387
 388
 389 /*
 390  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 391  *
 392  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 393  *
 394  *   UTF-32 char     UTF-8 char(s)
 395  *   --------------------------------------------------
 396  *        0 to 127 = 0xxxxxxx (US-ASCII)
 397  *     128 to 2047 = 110xxxxx 10yyyyyy
 398  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 399  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 400  *
 401  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 402  * which would convert to five- or six-octet UTF-8 sequences...
 403  */
 404
 405 int                                     /* O - Count or -1 on error */
 406 cupsUTF8ToUTF32(
 407     cups_utf32_t      *dest,            /* O - Target string */
 408     const cups_utf8_t *src,             /* I - Source string */
 409     const int         maxout)           /* I - Max output */
 410 {
 411   size_t        srclen;                 /* Source string length */
 412   int           i;                      /* Looping variable */
 413   cups_utf8_t   ch;                     /* Character value */
 414   cups_utf8_t   next;                   /* Next character value */
 415   cups_utf32_t  ch32;                   /* UTF-32 character value */
 416
 417
 418  /*
 419   * Check for valid arguments and clear output...
 420   */
 421
 422   if (dest)
 423     *dest = 0;
 424
 425   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 426     return (-1);
 427
 428  /*
 429   * Convert input UTF-8 to output UTF-32 (and insert BOM)...
 430   */
 431
 432   *dest++ = 0xfeff;
 433   srclen  = strlen((char *)src);
 434
 435   for (i = maxout - 1; *src && i > 0; i --)
 436   {
 437     ch = *src++;
 438
 439    /*
 440     * Convert UTF-8 character(s) to UTF-32 character...
 441     */
 442
 443     if (!(ch & 0x80))
 444     {
 445      /*
 446       * One-octet UTF-8 <= 127 (US-ASCII)...
 447       */
 448
 449       *dest++ = ch;
 450     }
 451     else if ((ch & 0xe0) == 0xc0)
 452     {
 453      /*
 454       * Two-octet UTF-8 <= 2047 (Latin-x)...
 455       */
 456
 457       next = *src++;
 458       if (!next)
 459         return (-1);
 460
 461       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 462
 463      /*
 464       * Check for non-shortest form (invalid UTF-8)...
 465       */
 466
 467       if (ch32 < 0x80)
 468         return (-1);
 469
 470       *dest++ = ch32;
 471     }
 472     else if ((ch & 0xf0) == 0xe0)
 473     {
 474      /*
 475       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 476       */
 477
 478       next = *src++;
 479       if (!next)
 480         return (-1);
 481
 482       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 483
 484       next = *src++;
 485       if (!next)
 486         return (-1);
 487
 488       ch32 = (ch32 << 6) | (next & 0x3f);
 489
 490      /*
 491       * Check for non-shortest form (invalid UTF-8)...
 492       */
 493
 494       if (ch32 < 0x800)
 495         return (-1);
 496
 497       *dest++ = ch32;
 498     }
 499     else if ((ch & 0xf8) == 0xf0)
 500     {
 501      /*
 502       * Four-octet UTF-8...
 503       */
 504
 505       next = *src++;
 506       if (!next)
 507         return (-1);
 508
 509       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 510
 511       next = *src++;
 512       if (!next)
 513         return (-1);
 514
 515       ch32 = (ch32 << 6) | (next & 0x3f);
 516
 517       next = *src++;
 518       if (!next)
 519         return (-1);
 520
 521       ch32 = (ch32 << 6) | (next & 0x3f);
 522
 523      /*
 524       * Check for non-shortest form (invalid UTF-8)...
 525       */
 526
 527       if (ch32 < 0x10000)
 528         return (-1);
 529
 530       *dest++ = ch32;
 531     }
 532     else
 533     {
 534      /*
 535       * More than 4-octet (invalid UTF-8 sequence)...
 536       */
 537
 538       return (-1);
 539     }
 540
 541    /*
 542     * Check for UTF-16 surrogate (illegal UTF-8)...
 543     */
 544
 545     if (*dest >= 0xd800 && *dest <= 0xdfff)
 546       return (-1);
 547   }
 548
 549   *dest = 0;
 550
 551   return (i);
 552 }
 553
 554
 555 /*
 556  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 557  *
 558  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 559  *
 560  *   UTF-32 char     UTF-8 char(s)
 561  *   --------------------------------------------------
 562  *        0 to 127 = 0xxxxxxx (US-ASCII)
 563  *     128 to 2047 = 110xxxxx 10yyyyyy
 564  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 565  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 566  *
 567  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 568  * which would convert to five- or six-octet UTF-8 sequences...
 569  */
 570
 571 int                                     /* O - Count or -1 on error */
 572 cupsUTF32ToUTF8(
 573     cups_utf8_t        *dest,           /* O - Target string */
 574     const cups_utf32_t *src,            /* I - Source string */
 575     const int          maxout)          /* I - Max output */
 576 {
 577   cups_utf8_t   *start;                 /* Start of destination string */
 578   int           i;                      /* Looping variable */
 579   int           swap;                   /* Byte-swap input to output */
 580   cups_utf32_t  ch;                     /* Character value */
 581
 582
 583  /*
 584   * Check for valid arguments and clear output...
 585   */
 586
 587   if (dest)
 588     *dest = '\0';
 589
 590   if (!dest || !src || maxout < 1)
 591     return (-1);
 592
 593  /*
 594   * Check for leading BOM in UTF-32 and inverted BOM...
 595   */
 596
 597   start = dest;
 598   swap  = *src == 0xfffe0000;
 599
 600   if (*src == 0xfffe0000 || *src == 0xfeff)
 601     src ++;
 602
 603  /*
 604   * Convert input UTF-32 to output UTF-8...
 605   */
 606
 607   for (i = maxout - 1; *src && i > 0;)
 608   {
 609     ch = *src++;
 610
 611    /*
 612     * Byte swap input UTF-32, if necessary...
 613     * (only byte-swapping 24 of 32 bits)
 614     */
 615
 616     if (swap)
 617       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 618
 619    /*
 620     * Check for beyond Plane 16 (invalid UTF-32)...
 621     */
 622
 623     if (ch > 0x10ffff)
 624       return (-1);
 625
 626    /*
 627     * Convert UTF-32 character to UTF-8 character(s)...
 628     */
 629
 630     if (ch < 0x80)
 631     {
 632      /*
 633       * One-octet UTF-8 <= 127 (US-ASCII)...
 634       */
 635
 636       *dest++ = (cups_utf8_t)ch;
 637       i --;
 638     }
 639     else if (ch < 0x800)
 640     {
 641      /*
 642       * Two-octet UTF-8 <= 2047 (Latin-x)...
 643       */
 644
 645       if (i < 2)
 646         return (-1);
 647
 648       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 649       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 650       i -= 2;
 651     }
 652     else if (ch < 0x10000)
 653     {
 654      /*
 655       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 656       */
 657
 658       if (i < 3)
 659         return (-1);
 660
 661       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 662       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 663       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 664       i -= 3;
 665     }
 666     else
 667     {
 668      /*
 669       * Four-octet UTF-8...
 670       */
 671
 672       if (i < 4)
 673         return (-1);
 674
 675       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 676       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 677       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 678       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 679       i -= 4;
 680     }
 681   }
 682
 683   *dest = '\0';
 684
 685   return ((int)(dest - start));
 686 }
 687
 688
 689 /*
 690  * 'compare_wide()' - Compare key for wide (VBCS) match.
 691  */
 692
 693 static int
 694 compare_wide(const void *k1,            /* I - Key char */
 695              const void *k2)            /* I - Map char */
 696 {
 697   cups_vbcs_t   key;                    /* Legacy key character */
 698   cups_vbcs_t   map;                    /* Legacy map character */
 699
 700
 701   key = *((cups_vbcs_t *)k1);
 702   map = ((_cups_wide2uni_t *)k2)->widechar;
 703
 704   return ((int)(key - map));
 705 }
 706
 707
 708 /*
 709  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 710  */
 711
 712 static int                              /* O - Count or -1 on error */
 713 conv_sbcs_to_utf8(
 714     cups_utf8_t           *dest,        /* O - Target string */
 715     const cups_sbcs_t     *src,         /* I - Source string */
 716     int                   maxout,       /* I - Max output */
 717     const cups_encoding_t encoding)     /* I - Encoding */
 718 {
 719   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 720   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 721   cups_sbcs_t   legchar;                /* Legacy character value */
 722   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 723                 *workptr;               /* Pointer into string */
 724
 725
 726  /*
 727   * Find legacy charset map in cache...
 728   */
 729
 730   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 731     return (-1);
 732
 733  /*
 734   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 735   */
 736
 737   work[0] = 0xfeff;
 738   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 739   {
 740     legchar = *src++;
 741
 742    /*
 743     * Convert ASCII verbatim (optimization)...
 744     */
 745
 746     if (legchar < 0x80)
 747       *workptr++ = (cups_utf32_t)legchar;
 748     else
 749     {
 750      /*
 751       * Convert unknown character to Replacement Character...
 752       */
 753
 754       crow = cmap->char2uni + legchar;
 755
 756       if (!*crow)
 757         *workptr++ = 0xfffd;
 758       else
 759         *workptr++ = (cups_utf32_t)*crow;
 760     }
 761   }
 762
 763   *workptr = 0;
 764
 765  /*
 766   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 767   */
 768
 769   cmap->used --;
 770
 771   return (cupsUTF32ToUTF8(dest, work, maxout));
 772 }
 773
 774
 775 /*
 776  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 777  */
 778
 779 static int                              /* O - Count or -1 on error */
 780 conv_utf8_to_sbcs(
 781     cups_sbcs_t           *dest,        /* O - Target string */
 782     const cups_utf8_t     *src,         /* I - Source string */
 783     int                   maxout,       /* I - Max output */
 784     const cups_encoding_t encoding)     /* I - Encoding */
 785 {
 786   cups_sbcs_t   *start;                 /* Start of destination string */
 787   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 788   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 789   cups_utf32_t  unichar;                /* Character value */
 790   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 791                 *workptr;               /* Pointer into string */
 792
 793
 794  /*
 795   * Find legacy charset map in cache...
 796   */
 797
 798   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 799     return (-1);
 800
 801  /*
 802   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 803   */
 804
 805   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 806     return (-1);
 807
 808  /*
 809   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 810   */
 811
 812   for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
 813   {
 814     unichar = *workptr++;
 815     if (!unichar)
 816       break;
 817
 818    /*
 819     * Convert ASCII verbatim (optimization)...
 820     */
 821
 822     if (unichar < 0x80)
 823     {
 824       *dest++ = (cups_sbcs_t)unichar;
 825       continue;
 826     }
 827
 828    /*
 829     * Convert unknown character to visible replacement...
 830     */
 831
 832     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 833
 834     if (srow)
 835       srow += (int)(unichar & 0xff);
 836
 837     if (!srow || !*srow)
 838       *dest++ = '?';
 839     else
 840       *dest++ = *srow;
 841   }
 842
 843   *dest = '\0';
 844
 845   cmap->used --;
 846
 847   return ((int)(dest - start));
 848 }
 849
 850
 851 /*
 852  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 853  */
 854
 855 static int                              /* O - Count or -1 on error */
 856 conv_utf8_to_vbcs(
 857     cups_sbcs_t           *dest,        /* O - Target string */
 858     const cups_utf8_t     *src,         /* I - Source string */
 859     int                   maxout,       /* I - Max output */
 860     const cups_encoding_t encoding)     /* I - Encoding */
 861 {
 862   cups_sbcs_t   *start;                 /* Start of destination string */
 863   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
 864   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
 865   cups_utf32_t  unichar;                /* Character value */
 866   cups_vbcs_t   legchar;                /* Legacy character value */
 867   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 868                 *workptr;               /* Pointer into string */
 869
 870
 871  /*
 872   * Find legacy charset map in cache...
 873   */
 874
 875   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 876     return (-1);
 877
 878  /*
 879   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 880   */
 881
 882   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 883     return (-1);
 884
 885  /*
 886   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
 887   */
 888
 889   for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
 890   {
 891     unichar = *workptr++;
 892     if (!unichar)
 893       break;
 894
 895    /*
 896     * Convert ASCII verbatim (optimization)...
 897     */
 898
 899     if (unichar < 0x80)
 900     {
 901       *dest++ = (cups_vbcs_t)unichar;
 902       continue;
 903     }
 904
 905    /*
 906     * Convert unknown character to visible replacement...
 907     */
 908
 909     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
 910
 911     if (vrow)
 912       vrow += (int)(unichar & 0xff);
 913
 914     if (!vrow || !*vrow)
 915       legchar = (cups_vbcs_t)'?';
 916     else
 917       legchar = (cups_vbcs_t)*vrow;
 918
 919    /*
 920     * Save n-byte legacy character...
 921     */
 922
 923     if (legchar > 0xffffff)
 924     {
 925       if (maxout < 5)
 926         return (-1);
 927
 928       *dest++ = (cups_sbcs_t)(legchar >> 24);
 929       *dest++ = (cups_sbcs_t)(legchar >> 16);
 930       *dest++ = (cups_sbcs_t)(legchar >> 8);
 931       *dest++ = (cups_sbcs_t)legchar;
 932
 933       maxout -= 3;
 934     }
 935     else if (legchar > 0xffff)
 936     {
 937       if (maxout < 4)
 938         return (-1);
 939
 940       *dest++ = (cups_sbcs_t)(legchar >> 16);
 941       *dest++ = (cups_sbcs_t)(legchar >> 8);
 942       *dest++ = (cups_sbcs_t)legchar;
 943
 944       maxout -= 2;
 945     }
 946     else if (legchar > 0xff)
 947     {
 948       *dest++ = (cups_sbcs_t)(legchar >> 8);
 949       *dest++ = (cups_sbcs_t)legchar;
 950
 951       maxout --;
 952     }
 953   }
 954
 955   *dest = '\0';
 956
 957   vmap->used --;
 958
 959   return ((int)(dest - start));
 960 }
 961
 962
 963 /*
 964  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
 965  */
 966
 967 static int                              /* O - Count or -1 on error */
 968 conv_vbcs_to_utf8(
 969     cups_utf8_t           *dest,        /* O - Target string */
 970     const cups_sbcs_t     *src,         /* I - Source string */
 971     int                   maxout,       /* I - Max output */
 972     const cups_encoding_t encoding)     /* I - Encoding */
 973 {
 974   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 975   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 976   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
 977   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
 978   cups_vbcs_t   legchar;                /* Legacy character value */
 979   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 980                 *workptr;               /* Pointer into string */
 981
 982
 983  /*
 984   * Find legacy charset map in cache...
 985   */
 986
 987   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 988     return (-1);
 989
 990  /*
 991   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 992   */
 993
 994   work[0] = 0xfeff;
 995   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 996   {
 997     legchar  = *src++;
 998     leadchar = (cups_sbcs_t)legchar;
 999
1000    /*
1001     * Convert ASCII verbatim (optimization)...
1002     */
1003
1004     if (legchar < 0x80)
1005     {
1006       *workptr++ = (cups_utf32_t)legchar;
1007       continue;
1008     }
1009
1010    /*
1011     * Convert 2-byte legacy character...
1012     */
1013
1014     if (vmap->lead2char[(int)leadchar] == leadchar)
1015     {
1016       if (!*src)
1017         return (-1);
1018
1019       legchar = (legchar << 8) | *src++;
1020
1021      /*
1022       * Convert unknown character to Replacement Character...
1023       */
1024
1025       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1026       if (crow)
1027         crow += (int) (legchar & 0xff);
1028
1029       if (!crow || !*crow)
1030         *workptr++ = 0xfffd;
1031       else
1032         *workptr++ = (cups_utf32_t)*crow;
1033       continue;
1034     }
1035
1036    /*
1037     * Fetch 3-byte or 4-byte legacy character...
1038     */
1039
1040     if (vmap->lead3char[(int)leadchar] == leadchar)
1041     {
1042       if (!*src || !src[1])
1043         return (-1);
1044
1045       legchar = (legchar << 8) | *src++;
1046       legchar = (legchar << 8) | *src++;
1047     }
1048     else if (vmap->lead4char[(int)leadchar] == leadchar)
1049     {
1050       if (!*src || !src[1] || !src[2])
1051         return (-1);
1052
1053       legchar = (legchar << 8) | *src++;
1054       legchar = (legchar << 8) | *src++;
1055       legchar = (legchar << 8) | *src++;
1056     }
1057     else
1058       return (-1);
1059
1060    /*
1061     * Find 3-byte or 4-byte legacy character...
1062     */
1063
1064     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1065                                            vmap->wide2uni,
1066                                            vmap->widecount,
1067                                            sizeof(_cups_wide2uni_t),
1068                                            compare_wide);
1069
1070    /*
1071     * Convert unknown character to Replacement Character...
1072     */
1073
1074     if (!wide2uni || !wide2uni->unichar)
1075       *workptr++ = 0xfffd;
1076     else
1077       *workptr++ = wide2uni->unichar;
1078   }
1079
1080   *workptr = 0;
1081
1082   vmap->used --;
1083
1084  /*
1085   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1086   */
1087
1088   return (cupsUTF32ToUTF8(dest, work, maxout));
1089 }
1090
1091
1092 /*
1093  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1094  */
1095
1096 static void
1097 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1098 {
1099   int           i;                      /* Looping variable */
1100
1101
1102   for (i = 0; i < 256; i ++)
1103     if (cmap->uni2char[i])
1104       free(cmap->uni2char[i]);
1105
1106   free(cmap);
1107 }
1108
1109
1110 /*
1111  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1112  */
1113
1114 static void
1115 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1116 {
1117   int           i;                      /* Looping variable */
1118
1119
1120   for (i = 0; i < 256; i ++)
1121     if (vmap->char2uni[i])
1122       free(vmap->char2uni[i]);
1123
1124   for (i = 0; i < 256; i ++)
1125     if (vmap->uni2char[i])
1126       free(vmap->uni2char[i]);
1127
1128   if (vmap->wide2uni)
1129     free(vmap->wide2uni);
1130
1131   free(vmap);
1132 }
1133
1134
1135 /*
1136  * 'get_charmap()' - Lookup or get a character set map (private).
1137  *
1138  * This code handles single-byte (SBCS), double-byte (DBCS), and
1139  * variable-byte (VBCS) character sets _without_ charset escapes...
1140  * This code does not handle multiple-byte character sets (MBCS)
1141  * (such as ISO-2022-JP) with charset switching via escapes...
1142  */
1143
1144
1145 void *                                  /* O - Charset map pointer */
1146 get_charmap(
1147     const cups_encoding_t encoding)     /* I - Encoding */
1148 {
1149   char          filename[1024];         /* Filename for charset map file */
1150   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1151
1152
1153  /*
1154   * Get the data directory and charset map name...
1155   */
1156
1157   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1158            cg->cups_datadir, _cupsEncodingName(encoding));
1159
1160   DEBUG_printf(("    filename=\"%s\"\n", filename));
1161
1162  /*
1163   * Read charset map input file into cache...
1164   */
1165
1166   if (encoding < CUPS_ENCODING_SBCS_END)
1167     return (get_sbcs_charmap(encoding, filename));
1168   else if (encoding < CUPS_ENCODING_VBCS_END)
1169     return (get_vbcs_charmap(encoding, filename));
1170   else
1171     return (NULL);
1172 }
1173
1174
1175 /*
1176  * 'get_charmap_count()' - Count lines in a charmap file.
1177  */
1178
1179 static int                              /* O - Count or -1 on error */
1180 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1181 {
1182   int   count;                          /* Number of lines */
1183   char  line[256];                      /* Line from input map file */
1184
1185
1186  /*
1187   * Count lines in map input file...
1188   */
1189
1190   count = 0;
1191
1192   while (cupsFileGets(fp, line, sizeof(line)))
1193     if (line[0] == '0')
1194       count ++;
1195
1196  /*
1197   * Return the number of lines...
1198   */
1199
1200   if (count > 0)
1201     return (count);
1202   else
1203     return (-1);
1204 }
1205
1206
1207 /*
1208  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1209  */
1210
1211 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1212 get_sbcs_charmap(
1213     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1214     const char            *filename)    /* I - Charmap Filename */
1215 {
1216   unsigned long legchar;                /* Legacy character value */
1217   cups_utf32_t  unichar;                /* Unicode character value */
1218   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1219   cups_file_t   *fp;                    /* Charset map file pointer */
1220   char          *s;                     /* Line parsing pointer */
1221   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1222   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1223   char          line[256];              /* Line from charset map file */
1224
1225
1226  /*
1227   * See if we already have this SBCS charset map loaded...
1228   */
1229
1230   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1231   {
1232     if (cmap->encoding == encoding)
1233     {
1234       cmap->used ++;
1235       DEBUG_printf(("    returning existing cmap=%p\n", cmap));
1236
1237       return ((void *)cmap);
1238     }
1239   }
1240
1241  /*
1242   * Open SBCS charset map input file...
1243   */
1244
1245   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1246     return (NULL);
1247
1248  /*
1249   * Allocate memory for SBCS charset map...
1250   */
1251
1252   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1253   {
1254     cupsFileClose(fp);
1255     DEBUG_puts("    Unable to allocate memory!");
1256
1257     return (NULL);
1258   }
1259
1260   cmap->used ++;
1261   cmap->encoding = encoding;
1262
1263  /*
1264   * Save SBCS charset map into memory for transcoding...
1265   */
1266
1267   while (cupsFileGets(fp, line, sizeof(line)))
1268   {
1269     if (line[0] != '0')
1270       continue;
1271
1272     legchar = strtol(line, &s, 16);
1273     if (legchar < 0 || legchar > 0xff)
1274       goto sbcs_error;
1275
1276     unichar = strtol(s, NULL, 16);
1277     if (unichar < 0 || unichar > 0xffff)
1278       goto sbcs_error;
1279
1280    /*
1281     * Save legacy to Unicode mapping in direct lookup table...
1282     */
1283
1284     crow  = cmap->char2uni + legchar;
1285     *crow = (cups_ucs2_t)(unichar & 0xffff);
1286
1287    /*
1288     * Save Unicode to legacy mapping in indirect lookup table...
1289     */
1290
1291     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1292     if (!srow)
1293     {
1294       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1295       if (!srow)
1296         goto sbcs_error;
1297
1298       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1299     }
1300
1301     srow += unichar & 0xff;
1302
1303    /*
1304     * Convert Replacement Character to visible replacement...
1305     */
1306
1307     if (unichar == 0xfffd)
1308       legchar = (unsigned long)'?';
1309
1310    /*
1311     * First (oldest) legacy character uses Unicode mapping cell...
1312     */
1313
1314     if (!*srow)
1315       *srow = (cups_sbcs_t)legchar;
1316   }
1317
1318   cupsFileClose(fp);
1319
1320  /*
1321   * Add it to the cache and return...
1322   */
1323
1324   cmap->next = cmap_cache;
1325   cmap_cache = cmap;
1326
1327   DEBUG_printf(("    returning new cmap=%p\n", cmap));
1328
1329   return (cmap);
1330
1331  /*
1332   * If we get here, there was an error in the cmap file...
1333   */
1334
1335   sbcs_error:
1336
1337   free_sbcs_charmap(cmap);
1338
1339   cupsFileClose(fp);
1340
1341   DEBUG_puts("    Error, returning NULL!");
1342
1343   return (NULL);
1344 }
1345
1346
1347 /*
1348  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1349  */
1350
1351 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1352 get_vbcs_charmap(
1353     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1354     const char            *filename)    /* I - Charmap Filename */
1355 {
1356   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1357   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1358   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1359   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1360   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1361   unsigned long legchar;                /* Legacy character value */
1362   cups_utf32_t  unichar;                /* Unicode character value */
1363   int           mapcount;               /* Count of lines in charmap file */
1364   cups_file_t   *fp;                    /* Charset map file pointer */
1365   char          *s;                     /* Line parsing pointer */
1366   char          line[256];              /* Line from charset map file */
1367   int           i;                      /* Loop variable */
1368   int           wide;                   /* 32-bit legacy char */
1369
1370
1371   DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1372                 encoding, filename));
1373
1374  /*
1375   * See if we already have this DBCS/VBCS charset map loaded...
1376   */
1377
1378   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1379   {
1380     if (vmap->encoding == encoding)
1381     {
1382       vmap->used ++;
1383       DEBUG_printf(("    returning existing vmap=%p\n", vmap));
1384
1385       return ((void *)vmap);
1386     }
1387   }
1388
1389  /*
1390   * Open VBCS charset map input file...
1391   */
1392
1393   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1394   {
1395     DEBUG_printf(("    Unable to open file: %s\n", strerror(errno)));
1396
1397     return (NULL);
1398   }
1399
1400  /*
1401   * Count lines in charmap file...
1402   */
1403
1404   if ((mapcount = get_charmap_count(fp)) <= 0)
1405   {
1406     DEBUG_puts("    Unable to get charmap count!");
1407
1408     return (NULL);
1409   }
1410
1411   DEBUG_printf(("    mapcount=%d\n", mapcount));
1412
1413  /*
1414   * Allocate memory for DBCS/VBCS charset map...
1415   */
1416
1417   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1418   {
1419     cupsFileClose(fp);
1420     DEBUG_puts("    Unable to allocate memory!");
1421
1422     return (NULL);
1423   }
1424
1425   vmap->used ++;
1426   vmap->encoding = encoding;
1427
1428  /*
1429   * Save DBCS/VBCS charset map into memory for transcoding...
1430   */
1431
1432   leadchar = 0;
1433   wide2uni = NULL;
1434
1435   cupsFileRewind(fp);
1436
1437   i    = 0;
1438   wide = 0;
1439
1440   while (cupsFileGets(fp, line, sizeof(line)))
1441   {
1442     if (line[0] != '0')
1443       continue;
1444
1445     legchar = strtoul(line, &s, 16);
1446     if (legchar == ULONG_MAX)
1447       goto vbcs_error;
1448
1449     unichar = strtol(s, NULL, 16);
1450     if (unichar < 0 || unichar > 0xffff)
1451       goto vbcs_error;
1452
1453     i ++;
1454
1455 /*    DEBUG_printf(("    i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1456                   legchar, (unsigned)unichar)); */
1457
1458    /*
1459     * Save lead char of 2/3/4-byte legacy char...
1460     */
1461
1462     if (legchar > 0xff && legchar <= 0xffff)
1463     {
1464       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1465       vmap->lead2char[leadchar] = leadchar;
1466     }
1467
1468     if (legchar > 0xffff && legchar <= 0xffffff)
1469     {
1470       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1471       vmap->lead3char[leadchar] = leadchar;
1472     }
1473
1474     if (legchar > 0xffffff)
1475     {
1476       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1477       vmap->lead4char[leadchar] = leadchar;
1478     }
1479
1480    /*
1481     * Save Legacy to Unicode mapping...
1482     */
1483
1484     if (legchar <= 0xffff)
1485     {
1486      /*
1487       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1488       */
1489
1490       crow = vmap->char2uni[(int)leadchar];
1491       if (!crow)
1492       {
1493         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1494         if (!crow)
1495           goto vbcs_error;
1496
1497         vmap->char2uni[(int)leadchar] = crow;
1498       }
1499
1500       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1501     }
1502     else
1503     {
1504      /*
1505       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1506       */
1507
1508       if (!wide)
1509       {
1510         wide            = 1;
1511         vmap->widecount = (mapcount - i + 1);
1512         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1513                                                      sizeof(_cups_wide2uni_t));
1514         if (!wide2uni)
1515           goto vbcs_error;
1516
1517         vmap->wide2uni = wide2uni;
1518       }
1519
1520       wide2uni->widechar = (cups_vbcs_t)legchar;
1521       wide2uni->unichar  = (cups_ucs2_t)unichar;
1522       wide2uni ++;
1523     }
1524
1525    /*
1526     * Save Unicode to legacy mapping in indirect lookup table...
1527     */
1528
1529     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1530     if (!vrow)
1531     {
1532       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1533       if (!vrow)
1534         goto vbcs_error;
1535
1536       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1537     }
1538
1539     vrow += (int)(unichar & 0xff);
1540
1541    /*
1542     * Convert Replacement Character to visible replacement...
1543     */
1544
1545     if (unichar == 0xfffd)
1546       legchar = (unsigned long)'?';
1547
1548    /*
1549     * First (oldest) legacy character uses Unicode mapping cell...
1550     */
1551
1552     if (!*vrow)
1553       *vrow = (cups_vbcs_t)legchar;
1554   }
1555
1556   vmap->charcount = (i - vmap->widecount);
1557
1558   cupsFileClose(fp);
1559
1560  /*
1561   * Add it to the cache and return...
1562   */
1563
1564   vmap->next     = vmap_cache;
1565   vmap_cache = vmap;
1566
1567   DEBUG_printf(("    returning new vmap=%p\n", vmap));
1568
1569   return (vmap);
1570
1571  /*
1572   * If we get here, the file contains errors...
1573   */
1574
1575   vbcs_error:
1576
1577   free_vbcs_charmap(vmap);
1578
1579   cupsFileClose(fp);
1580
1581   DEBUG_puts("    Error, returning NULL!");
1582
1583   return (NULL);
1584 }
1585
1586
1587 /*
1588  * End of "$Id$"
1589  */