cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 5838 2006-08-17 14:41:42Z mike $"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 1997-2006 by Easy Software Products.
   7  *
   8  *   These coded instructions, statements, and computer programs are
   9  *   the property of Easy Software Products and are protected by Federal
  10  *   copyright law.  Distribution and use rights are outlined in the
  11  *   file "LICENSE.txt" which should have been included with this file.
  12  *   If this file is missing or damaged please contact Easy Software
  13  *   Products at:
  14  *
  15  *       Attn: CUPS Licensing Information
  16  *       Easy Software Products
  17  *       44141 Airport View Drive, Suite 204
  18  *       Hollywood, Maryland 20636 USA
  19  *
  20  *       Voice: (301) 373-9600
  21  *       EMail: cups-info@cups.org
  22  *         WWW: http://www.cups.org
  23  *
  24  * Contents:
  25  *
  26  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  27  *   _cupsCharmapFree()  - Free a character set map.
  28  *   _cupsCharmapGet()   - Get a character set map.
  29  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  30  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  31  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  32  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  33  *   compare_wide()      - Compare key for wide (VBCS) match.
  34  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  35  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  36  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  37  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  38  *   free_sbcs_charmap() - Free memory used by a single byte character set.
  39  *   free_vbcs_charmap() - Free memory used by a variable byte character set.
  40  *   get_charmap()       - Lookup or get a character set map (private).
  41  *   get_charmap_count() - Count lines in a charmap file.
  42  *   get_sbcs_charmap()  - Get SBCS Charmap.
  43  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  44  */
  45
  46 /*
  47  * Include necessary headers...
  48  */
  49
  50 #include "globals.h"
  51 #include "debug.h"
  52 #include <limits.h>
  53 #include <stdlib.h>
  54 #include <errno.h>
  55 #include <time.h>
  56
  57
  58 /*
  59  * Local globals...
  60  */
  61
  62 #ifdef HAVE_PTHREAD_H
  63 static pthread_mutex_t  map_mutex = PTHREAD_MUTEX_INITIALIZER;
  64                                         /* Mutex to control access to maps */
  65 #endif /* HAVE_PTHREAD_H */
  66 static _cups_cmap_t     *cmap_cache = NULL;
  67                                         /* SBCS Charmap Cache */
  68 static _cups_vmap_t     *vmap_cache = NULL;
  69                                         /* VBCS Charmap Cache */
  70
  71
  72 /*
  73  * Local functions...
  74  */
  75
  76 static int              compare_wide(const void *k1, const void *k2);
  77 static int              conv_sbcs_to_utf8(cups_utf8_t *dest,
  78                                           const cups_sbcs_t *src,
  79                                           int maxout,
  80                                           const cups_encoding_t encoding);
  81 static int              conv_utf8_to_sbcs(cups_sbcs_t *dest,
  82                                           const cups_utf8_t *src,
  83                                           int maxout,
  84                                           const cups_encoding_t encoding);
  85 static int              conv_utf8_to_vbcs(cups_sbcs_t *dest,
  86                                           const cups_utf8_t *src,
  87                                           int maxout,
  88                                           const cups_encoding_t encoding);
  89 static int              conv_vbcs_to_utf8(cups_utf8_t *dest,
  90                                           const cups_sbcs_t *src,
  91                                           int maxout,
  92                                           const cups_encoding_t encoding);
  93 static void             free_sbcs_charmap(_cups_cmap_t *sbcs);
  94 static void             free_vbcs_charmap(_cups_vmap_t *vbcs);
  95 static void             *get_charmap(const cups_encoding_t encoding);
  96 static int              get_charmap_count(cups_file_t *fp);
  97 static _cups_cmap_t     *get_sbcs_charmap(const cups_encoding_t encoding,
  98                                           const char *filename);
  99 static _cups_vmap_t     *get_vbcs_charmap(const cups_encoding_t encoding,
 100                                           const char *filename);
 101
 102
 103 /*
 104  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
 105  */
 106
 107 void
 108 _cupsCharmapFlush(void)
 109 {
 110   _cups_cmap_t  *cmap,                  /* Legacy SBCS / Unicode Charset Map */
 111                 *cnext;                 /* Next Legacy SBCS Charset Map */
 112   _cups_vmap_t  *vmap,                  /* Legacy VBCS / Unicode Charset Map */
 113                 *vnext;                 /* Next Legacy VBCS Charset Map */
 114
 115
 116 #ifdef HAVE_PTHREAD_H
 117   pthread_mutex_lock(&map_mutex);
 118 #endif /* HAVE_PTHREAD_H */
 119
 120  /*
 121   * Loop through SBCS charset map cache, free all memory...
 122   */
 123
 124   for (cmap = cmap_cache; cmap; cmap = cnext)
 125   {
 126     cnext = cmap->next;
 127
 128     free_sbcs_charmap(cmap);
 129   }
 130
 131   cmap_cache = NULL;
 132
 133  /*
 134   * Loop through DBCS/VBCS charset map cache, free all memory...
 135   */
 136
 137   for (vmap = vmap_cache; vmap; vmap = vnext)
 138   {
 139     vnext = vmap->next;
 140
 141     free_vbcs_charmap(vmap);
 142
 143     free(vmap);
 144   }
 145
 146   vmap_cache = NULL;
 147
 148 #ifdef HAVE_PTHREAD_H
 149   pthread_mutex_unlock(&map_mutex);
 150 #endif /* HAVE_PTHREAD_H */
 151 }
 152
 153
 154 /*
 155  * '_cupsCharmapFree()' - Free a character set map.
 156  *
 157  * This does not actually free; use '_cupsCharmapFlush()' for that.
 158  */
 159
 160 void
 161 _cupsCharmapFree(
 162     const cups_encoding_t encoding)     /* I - Encoding */
 163 {
 164   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 165   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 166
 167
 168  /*
 169   * See if we already have this SBCS charset map loaded...
 170   */
 171
 172 #ifdef HAVE_PTHREAD_H
 173   pthread_mutex_lock(&map_mutex);
 174 #endif /* HAVE_PTHREAD_H */
 175
 176   for (cmap = cmap_cache; cmap; cmap = cmap->next)
 177   {
 178     if (cmap->encoding == encoding)
 179     {
 180       if (cmap->used > 0)
 181         cmap->used --;
 182       break;
 183     }
 184   }
 185
 186  /*
 187   * See if we already have this DBCS/VBCS charset map loaded...
 188   */
 189
 190   for (vmap = vmap_cache; vmap; vmap = vmap->next)
 191   {
 192     if (vmap->encoding == encoding)
 193     {
 194       if (vmap->used > 0)
 195         vmap->used --;
 196       break;
 197     }
 198   }
 199
 200 #ifdef HAVE_PTHREAD_H
 201   pthread_mutex_unlock(&map_mutex);
 202 #endif /* HAVE_PTHREAD_H */
 203 }
 204
 205
 206 /*
 207  * '_cupsCharmapGet()' - Get a character set map.
 208  *
 209  * This code handles single-byte (SBCS), double-byte (DBCS), and
 210  * variable-byte (VBCS) character sets _without_ charset escapes...
 211  * This code does not handle multiple-byte character sets (MBCS)
 212  * (such as ISO-2022-JP) with charset switching via escapes...
 213  */
 214
 215 void *                                  /* O - Charset map pointer */
 216 _cupsCharmapGet(
 217     const cups_encoding_t encoding)     /* I - Encoding */
 218 {
 219   void  *charmap;                       /* Charset map pointer */
 220
 221
 222   DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
 223
 224  /*
 225   * Check for valid arguments...
 226   */
 227
 228   if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 229   {
 230     DEBUG_puts("    Bad encoding, returning NULL!");
 231     return (NULL);
 232   }
 233
 234  /*
 235   * Lookup or get the charset map pointer and return...
 236   */
 237
 238 #ifdef HAVE_PTHREAD_H
 239   pthread_mutex_lock(&map_mutex);
 240 #endif /* HAVE_PTHREAD_H */
 241
 242   charmap = get_charmap(encoding);
 243
 244 #ifdef HAVE_PTHREAD_H
 245   pthread_mutex_unlock(&map_mutex);
 246 #endif /* HAVE_PTHREAD_H */
 247
 248   return (charmap);
 249 }
 250
 251
 252 /*
 253  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 254  *
 255  * This code handles single-byte (SBCS), double-byte (DBCS), and
 256  * variable-byte (VBCS) character sets _without_ charset escapes...
 257  * This code does not handle multiple-byte character sets (MBCS)
 258  * (such as ISO-2022-JP) with charset switching via escapes...
 259  */
 260
 261 int                                     /* O - Count or -1 on error */
 262 cupsCharsetToUTF8(
 263     cups_utf8_t *dest,                  /* O - Target string */
 264     const char *src,                    /* I - Source string */
 265     const int maxout,                   /* I - Max output */
 266     const cups_encoding_t encoding)     /* I - Encoding */
 267 {
 268   int   bytes;                          /* Number of bytes converted */
 269
 270
 271  /*
 272   * Check for valid arguments...
 273   */
 274
 275   DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
 276                 dest, src, maxout, encoding));
 277
 278   if (dest)
 279     *dest = '\0';
 280
 281   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 282   {
 283     DEBUG_puts("    Bad arguments, returning -1");
 284     return (-1);
 285   }
 286
 287  /*
 288   * Handle identity conversions...
 289   */
 290
 291   if (encoding == CUPS_UTF8 ||
 292       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 293   {
 294     strlcpy((char *)dest, src, maxout);
 295     return (strlen((char *)dest));
 296   }
 297
 298  /*
 299   * Convert input legacy charset to UTF-8...
 300   */
 301
 302 #ifdef HAVE_PTHREAD_H
 303   pthread_mutex_lock(&map_mutex);
 304 #endif /* HAVE_PTHREAD_H */
 305
 306   if (encoding < CUPS_ENCODING_SBCS_END)
 307     bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 308   else if (encoding < CUPS_ENCODING_VBCS_END)
 309     bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 310   else
 311   {
 312     DEBUG_puts("    Bad encoding, returning -1");
 313     bytes = -1;
 314   }
 315
 316 #ifdef HAVE_PTHREAD_H
 317   pthread_mutex_unlock(&map_mutex);
 318 #endif /* HAVE_PTHREAD_H */
 319
 320   return (bytes);
 321 }
 322
 323
 324 /*
 325  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 326  *
 327  * This code handles single-byte (SBCS), double-byte (DBCS), and
 328  * variable-byte (VBCS) character sets _without_ charset escapes...
 329  * This code does not handle multiple-byte character sets (MBCS)
 330  * (such as ISO-2022-JP) with charset switching via escapes...
 331  */
 332
 333 int                                     /* O - Count or -1 on error */
 334 cupsUTF8ToCharset(
 335     char                  *dest,        /* O - Target string */
 336     const cups_utf8_t     *src,         /* I - Source string */
 337     const int             maxout,       /* I - Max output */
 338     const cups_encoding_t encoding)     /* I - Encoding */
 339 {
 340   int   bytes;                          /* Number of bytes converted */
 341
 342
 343  /*
 344   * Check for valid arguments...
 345   */
 346
 347   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 348   {
 349     if (dest)
 350       *dest = '\0';
 351
 352     return (-1);
 353   }
 354
 355  /*
 356   * Handle identity conversions...
 357   */
 358
 359   if (encoding == CUPS_UTF8 ||
 360       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 361   {
 362     strlcpy(dest, (char *)src, maxout);
 363     return (strlen(dest));
 364   }
 365
 366  /*
 367   * Convert input UTF-8 to legacy charset...
 368   */
 369
 370 #ifdef HAVE_PTHREAD_H
 371   pthread_mutex_lock(&map_mutex);
 372 #endif /* HAVE_PTHREAD_H */
 373
 374   if (encoding < CUPS_ENCODING_SBCS_END)
 375     bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 376   else if (encoding < CUPS_ENCODING_VBCS_END)
 377     bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 378   else
 379     bytes = -1;
 380
 381 #ifdef HAVE_PTHREAD_H
 382   pthread_mutex_unlock(&map_mutex);
 383 #endif /* HAVE_PTHREAD_H */
 384
 385   return (bytes);
 386 }
 387
 388
 389 /*
 390  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 391  *
 392  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 393  *
 394  *   UTF-32 char     UTF-8 char(s)
 395  *   --------------------------------------------------
 396  *        0 to 127 = 0xxxxxxx (US-ASCII)
 397  *     128 to 2047 = 110xxxxx 10yyyyyy
 398  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 399  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 400  *
 401  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 402  * which would convert to five- or six-octet UTF-8 sequences...
 403  */
 404
 405 int                                     /* O - Count or -1 on error */
 406 cupsUTF8ToUTF32(
 407     cups_utf32_t      *dest,            /* O - Target string */
 408     const cups_utf8_t *src,             /* I - Source string */
 409     const int         maxout)           /* I - Max output */
 410 {
 411   int           i;                      /* Looping variable */
 412   cups_utf8_t   ch;                     /* Character value */
 413   cups_utf8_t   next;                   /* Next character value */
 414   cups_utf32_t  ch32;                   /* UTF-32 character value */
 415
 416
 417  /*
 418   * Check for valid arguments and clear output...
 419   */
 420
 421   if (dest)
 422     *dest = 0;
 423
 424   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 425     return (-1);
 426
 427  /*
 428   * Convert input UTF-8 to output UTF-32 (and insert BOM)...
 429   */
 430
 431   *dest++ = 0xfeff;
 432
 433   for (i = maxout - 1; *src && i > 0; i --)
 434   {
 435     ch = *src++;
 436
 437    /*
 438     * Convert UTF-8 character(s) to UTF-32 character...
 439     */
 440
 441     if (!(ch & 0x80))
 442     {
 443      /*
 444       * One-octet UTF-8 <= 127 (US-ASCII)...
 445       */
 446
 447       *dest++ = ch;
 448     }
 449     else if ((ch & 0xe0) == 0xc0)
 450     {
 451      /*
 452       * Two-octet UTF-8 <= 2047 (Latin-x)...
 453       */
 454
 455       next = *src++;
 456       if (!next)
 457         return (-1);
 458
 459       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 460
 461      /*
 462       * Check for non-shortest form (invalid UTF-8)...
 463       */
 464
 465       if (ch32 < 0x80)
 466         return (-1);
 467
 468       *dest++ = ch32;
 469     }
 470     else if ((ch & 0xf0) == 0xe0)
 471     {
 472      /*
 473       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 474       */
 475
 476       next = *src++;
 477       if (!next)
 478         return (-1);
 479
 480       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 481
 482       next = *src++;
 483       if (!next)
 484         return (-1);
 485
 486       ch32 = (ch32 << 6) | (next & 0x3f);
 487
 488      /*
 489       * Check for non-shortest form (invalid UTF-8)...
 490       */
 491
 492       if (ch32 < 0x800)
 493         return (-1);
 494
 495       *dest++ = ch32;
 496     }
 497     else if ((ch & 0xf8) == 0xf0)
 498     {
 499      /*
 500       * Four-octet UTF-8...
 501       */
 502
 503       next = *src++;
 504       if (!next)
 505         return (-1);
 506
 507       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 508
 509       next = *src++;
 510       if (!next)
 511         return (-1);
 512
 513       ch32 = (ch32 << 6) | (next & 0x3f);
 514
 515       next = *src++;
 516       if (!next)
 517         return (-1);
 518
 519       ch32 = (ch32 << 6) | (next & 0x3f);
 520
 521      /*
 522       * Check for non-shortest form (invalid UTF-8)...
 523       */
 524
 525       if (ch32 < 0x10000)
 526         return (-1);
 527
 528       *dest++ = ch32;
 529     }
 530     else
 531     {
 532      /*
 533       * More than 4-octet (invalid UTF-8 sequence)...
 534       */
 535
 536       return (-1);
 537     }
 538
 539    /*
 540     * Check for UTF-16 surrogate (illegal UTF-8)...
 541     */
 542
 543     if (*dest >= 0xd800 && *dest <= 0xdfff)
 544       return (-1);
 545   }
 546
 547   *dest = 0;
 548
 549   return (i);
 550 }
 551
 552
 553 /*
 554  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 555  *
 556  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 557  *
 558  *   UTF-32 char     UTF-8 char(s)
 559  *   --------------------------------------------------
 560  *        0 to 127 = 0xxxxxxx (US-ASCII)
 561  *     128 to 2047 = 110xxxxx 10yyyyyy
 562  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 563  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 564  *
 565  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 566  * which would convert to five- or six-octet UTF-8 sequences...
 567  */
 568
 569 int                                     /* O - Count or -1 on error */
 570 cupsUTF32ToUTF8(
 571     cups_utf8_t        *dest,           /* O - Target string */
 572     const cups_utf32_t *src,            /* I - Source string */
 573     const int          maxout)          /* I - Max output */
 574 {
 575   cups_utf8_t   *start;                 /* Start of destination string */
 576   int           i;                      /* Looping variable */
 577   int           swap;                   /* Byte-swap input to output */
 578   cups_utf32_t  ch;                     /* Character value */
 579
 580
 581  /*
 582   * Check for valid arguments and clear output...
 583   */
 584
 585   if (dest)
 586     *dest = '\0';
 587
 588   if (!dest || !src || maxout < 1)
 589     return (-1);
 590
 591  /*
 592   * Check for leading BOM in UTF-32 and inverted BOM...
 593   */
 594
 595   start = dest;
 596   swap  = *src == 0xfffe0000;
 597
 598   if (*src == 0xfffe0000 || *src == 0xfeff)
 599     src ++;
 600
 601  /*
 602   * Convert input UTF-32 to output UTF-8...
 603   */
 604
 605   for (i = maxout - 1; *src && i > 0;)
 606   {
 607     ch = *src++;
 608
 609    /*
 610     * Byte swap input UTF-32, if necessary...
 611     * (only byte-swapping 24 of 32 bits)
 612     */
 613
 614     if (swap)
 615       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 616
 617    /*
 618     * Check for beyond Plane 16 (invalid UTF-32)...
 619     */
 620
 621     if (ch > 0x10ffff)
 622       return (-1);
 623
 624    /*
 625     * Convert UTF-32 character to UTF-8 character(s)...
 626     */
 627
 628     if (ch < 0x80)
 629     {
 630      /*
 631       * One-octet UTF-8 <= 127 (US-ASCII)...
 632       */
 633
 634       *dest++ = (cups_utf8_t)ch;
 635       i --;
 636     }
 637     else if (ch < 0x800)
 638     {
 639      /*
 640       * Two-octet UTF-8 <= 2047 (Latin-x)...
 641       */
 642
 643       if (i < 2)
 644         return (-1);
 645
 646       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 647       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 648       i -= 2;
 649     }
 650     else if (ch < 0x10000)
 651     {
 652      /*
 653       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 654       */
 655
 656       if (i < 3)
 657         return (-1);
 658
 659       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 660       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 661       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 662       i -= 3;
 663     }
 664     else
 665     {
 666      /*
 667       * Four-octet UTF-8...
 668       */
 669
 670       if (i < 4)
 671         return (-1);
 672
 673       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 674       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 675       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 676       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 677       i -= 4;
 678     }
 679   }
 680
 681   *dest = '\0';
 682
 683   return ((int)(dest - start));
 684 }
 685
 686
 687 /*
 688  * 'compare_wide()' - Compare key for wide (VBCS) match.
 689  */
 690
 691 static int
 692 compare_wide(const void *k1,            /* I - Key char */
 693              const void *k2)            /* I - Map char */
 694 {
 695   cups_vbcs_t   key;                    /* Legacy key character */
 696   cups_vbcs_t   map;                    /* Legacy map character */
 697
 698
 699   key = *((cups_vbcs_t *)k1);
 700   map = ((_cups_wide2uni_t *)k2)->widechar;
 701
 702   return ((int)(key - map));
 703 }
 704
 705
 706 /*
 707  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 708  */
 709
 710 static int                              /* O - Count or -1 on error */
 711 conv_sbcs_to_utf8(
 712     cups_utf8_t           *dest,        /* O - Target string */
 713     const cups_sbcs_t     *src,         /* I - Source string */
 714     int                   maxout,       /* I - Max output */
 715     const cups_encoding_t encoding)     /* I - Encoding */
 716 {
 717   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 718   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 719   cups_sbcs_t   legchar;                /* Legacy character value */
 720   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 721                 *workptr;               /* Pointer into string */
 722
 723
 724  /*
 725   * Find legacy charset map in cache...
 726   */
 727
 728   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 729     return (-1);
 730
 731  /*
 732   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 733   */
 734
 735   work[0] = 0xfeff;
 736   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 737   {
 738     legchar = *src++;
 739
 740    /*
 741     * Convert ASCII verbatim (optimization)...
 742     */
 743
 744     if (legchar < 0x80)
 745       *workptr++ = (cups_utf32_t)legchar;
 746     else
 747     {
 748      /*
 749       * Convert unknown character to Replacement Character...
 750       */
 751
 752       crow = cmap->char2uni + legchar;
 753
 754       if (!*crow)
 755         *workptr++ = 0xfffd;
 756       else
 757         *workptr++ = (cups_utf32_t)*crow;
 758     }
 759   }
 760
 761   *workptr = 0;
 762
 763  /*
 764   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 765   */
 766
 767   cmap->used --;
 768
 769   return (cupsUTF32ToUTF8(dest, work, maxout));
 770 }
 771
 772
 773 /*
 774  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 775  */
 776
 777 static int                              /* O - Count or -1 on error */
 778 conv_utf8_to_sbcs(
 779     cups_sbcs_t           *dest,        /* O - Target string */
 780     const cups_utf8_t     *src,         /* I - Source string */
 781     int                   maxout,       /* I - Max output */
 782     const cups_encoding_t encoding)     /* I - Encoding */
 783 {
 784   cups_sbcs_t   *start;                 /* Start of destination string */
 785   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 786   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 787   cups_utf32_t  unichar;                /* Character value */
 788   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 789                 *workptr;               /* Pointer into string */
 790
 791
 792  /*
 793   * Find legacy charset map in cache...
 794   */
 795
 796   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 797     return (-1);
 798
 799  /*
 800   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 801   */
 802
 803   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 804     return (-1);
 805
 806  /*
 807   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 808   */
 809
 810   for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
 811   {
 812     unichar = *workptr++;
 813     if (!unichar)
 814       break;
 815
 816    /*
 817     * Convert ASCII verbatim (optimization)...
 818     */
 819
 820     if (unichar < 0x80)
 821     {
 822       *dest++ = (cups_sbcs_t)unichar;
 823       continue;
 824     }
 825
 826    /*
 827     * Convert unknown character to visible replacement...
 828     */
 829
 830     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 831
 832     if (srow)
 833       srow += (int)(unichar & 0xff);
 834
 835     if (!srow || !*srow)
 836       *dest++ = '?';
 837     else
 838       *dest++ = *srow;
 839   }
 840
 841   *dest = '\0';
 842
 843   cmap->used --;
 844
 845   return ((int)(dest - start));
 846 }
 847
 848
 849 /*
 850  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 851  */
 852
 853 static int                              /* O - Count or -1 on error */
 854 conv_utf8_to_vbcs(
 855     cups_sbcs_t           *dest,        /* O - Target string */
 856     const cups_utf8_t     *src,         /* I - Source string */
 857     int                   maxout,       /* I - Max output */
 858     const cups_encoding_t encoding)     /* I - Encoding */
 859 {
 860   cups_sbcs_t   *start;                 /* Start of destination string */
 861   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
 862   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
 863   cups_utf32_t  unichar;                /* Character value */
 864   cups_vbcs_t   legchar;                /* Legacy character value */
 865   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 866                 *workptr;               /* Pointer into string */
 867
 868
 869  /*
 870   * Find legacy charset map in cache...
 871   */
 872
 873   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 874     return (-1);
 875
 876  /*
 877   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 878   */
 879
 880   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 881     return (-1);
 882
 883  /*
 884   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
 885   */
 886
 887   for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
 888   {
 889     unichar = *workptr++;
 890     if (!unichar)
 891       break;
 892
 893    /*
 894     * Convert ASCII verbatim (optimization)...
 895     */
 896
 897     if (unichar < 0x80)
 898     {
 899       *dest++ = (cups_vbcs_t)unichar;
 900       continue;
 901     }
 902
 903    /*
 904     * Convert unknown character to visible replacement...
 905     */
 906
 907     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
 908
 909     if (vrow)
 910       vrow += (int)(unichar & 0xff);
 911
 912     if (!vrow || !*vrow)
 913       legchar = (cups_vbcs_t)'?';
 914     else
 915       legchar = (cups_vbcs_t)*vrow;
 916
 917    /*
 918     * Save n-byte legacy character...
 919     */
 920
 921     if (legchar > 0xffffff)
 922     {
 923       if (maxout < 5)
 924         return (-1);
 925
 926       *dest++ = (cups_sbcs_t)(legchar >> 24);
 927       *dest++ = (cups_sbcs_t)(legchar >> 16);
 928       *dest++ = (cups_sbcs_t)(legchar >> 8);
 929       *dest++ = (cups_sbcs_t)legchar;
 930
 931       maxout -= 3;
 932     }
 933     else if (legchar > 0xffff)
 934     {
 935       if (maxout < 4)
 936         return (-1);
 937
 938       *dest++ = (cups_sbcs_t)(legchar >> 16);
 939       *dest++ = (cups_sbcs_t)(legchar >> 8);
 940       *dest++ = (cups_sbcs_t)legchar;
 941
 942       maxout -= 2;
 943     }
 944     else if (legchar > 0xff)
 945     {
 946       *dest++ = (cups_sbcs_t)(legchar >> 8);
 947       *dest++ = (cups_sbcs_t)legchar;
 948
 949       maxout --;
 950     }
 951   }
 952
 953   *dest = '\0';
 954
 955   vmap->used --;
 956
 957   return ((int)(dest - start));
 958 }
 959
 960
 961 /*
 962  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
 963  */
 964
 965 static int                              /* O - Count or -1 on error */
 966 conv_vbcs_to_utf8(
 967     cups_utf8_t           *dest,        /* O - Target string */
 968     const cups_sbcs_t     *src,         /* I - Source string */
 969     int                   maxout,       /* I - Max output */
 970     const cups_encoding_t encoding)     /* I - Encoding */
 971 {
 972   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 973   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 974   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
 975   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
 976   cups_vbcs_t   legchar;                /* Legacy character value */
 977   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 978                 *workptr;               /* Pointer into string */
 979
 980
 981  /*
 982   * Find legacy charset map in cache...
 983   */
 984
 985   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 986     return (-1);
 987
 988  /*
 989   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 990   */
 991
 992   work[0] = 0xfeff;
 993   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 994   {
 995     legchar  = *src++;
 996     leadchar = (cups_sbcs_t)legchar;
 997
 998    /*
 999     * Convert ASCII verbatim (optimization)...
1000     */
1001
1002     if (legchar < 0x80)
1003     {
1004       *workptr++ = (cups_utf32_t)legchar;
1005       continue;
1006     }
1007
1008    /*
1009     * Convert 2-byte legacy character...
1010     */
1011
1012     if (vmap->lead2char[(int)leadchar] == leadchar)
1013     {
1014       if (!*src)
1015         return (-1);
1016
1017       legchar = (legchar << 8) | *src++;
1018
1019      /*
1020       * Convert unknown character to Replacement Character...
1021       */
1022
1023       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1024       if (crow)
1025         crow += (int) (legchar & 0xff);
1026
1027       if (!crow || !*crow)
1028         *workptr++ = 0xfffd;
1029       else
1030         *workptr++ = (cups_utf32_t)*crow;
1031       continue;
1032     }
1033
1034    /*
1035     * Fetch 3-byte or 4-byte legacy character...
1036     */
1037
1038     if (vmap->lead3char[(int)leadchar] == leadchar)
1039     {
1040       if (!*src || !src[1])
1041         return (-1);
1042
1043       legchar = (legchar << 8) | *src++;
1044       legchar = (legchar << 8) | *src++;
1045     }
1046     else if (vmap->lead4char[(int)leadchar] == leadchar)
1047     {
1048       if (!*src || !src[1] || !src[2])
1049         return (-1);
1050
1051       legchar = (legchar << 8) | *src++;
1052       legchar = (legchar << 8) | *src++;
1053       legchar = (legchar << 8) | *src++;
1054     }
1055     else
1056       return (-1);
1057
1058    /*
1059     * Find 3-byte or 4-byte legacy character...
1060     */
1061
1062     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1063                                            vmap->wide2uni,
1064                                            vmap->widecount,
1065                                            sizeof(_cups_wide2uni_t),
1066                                            compare_wide);
1067
1068    /*
1069     * Convert unknown character to Replacement Character...
1070     */
1071
1072     if (!wide2uni || !wide2uni->unichar)
1073       *workptr++ = 0xfffd;
1074     else
1075       *workptr++ = wide2uni->unichar;
1076   }
1077
1078   *workptr = 0;
1079
1080   vmap->used --;
1081
1082  /*
1083   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1084   */
1085
1086   return (cupsUTF32ToUTF8(dest, work, maxout));
1087 }
1088
1089
1090 /*
1091  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1092  */
1093
1094 static void
1095 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1096 {
1097   int           i;                      /* Looping variable */
1098
1099
1100   for (i = 0; i < 256; i ++)
1101     if (cmap->uni2char[i])
1102       free(cmap->uni2char[i]);
1103
1104   free(cmap);
1105 }
1106
1107
1108 /*
1109  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1110  */
1111
1112 static void
1113 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1114 {
1115   int           i;                      /* Looping variable */
1116
1117
1118   for (i = 0; i < 256; i ++)
1119     if (vmap->char2uni[i])
1120       free(vmap->char2uni[i]);
1121
1122   for (i = 0; i < 256; i ++)
1123     if (vmap->uni2char[i])
1124       free(vmap->uni2char[i]);
1125
1126   if (vmap->wide2uni)
1127     free(vmap->wide2uni);
1128
1129   free(vmap);
1130 }
1131
1132
1133 /*
1134  * 'get_charmap()' - Lookup or get a character set map (private).
1135  *
1136  * This code handles single-byte (SBCS), double-byte (DBCS), and
1137  * variable-byte (VBCS) character sets _without_ charset escapes...
1138  * This code does not handle multiple-byte character sets (MBCS)
1139  * (such as ISO-2022-JP) with charset switching via escapes...
1140  */
1141
1142
1143 static void *                           /* O - Charset map pointer */
1144 get_charmap(
1145     const cups_encoding_t encoding)     /* I - Encoding */
1146 {
1147   char          filename[1024];         /* Filename for charset map file */
1148   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1149
1150
1151  /*
1152   * Get the data directory and charset map name...
1153   */
1154
1155   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1156            cg->cups_datadir, _cupsEncodingName(encoding));
1157
1158   DEBUG_printf(("    filename=\"%s\"\n", filename));
1159
1160  /*
1161   * Read charset map input file into cache...
1162   */
1163
1164   if (encoding < CUPS_ENCODING_SBCS_END)
1165     return (get_sbcs_charmap(encoding, filename));
1166   else if (encoding < CUPS_ENCODING_VBCS_END)
1167     return (get_vbcs_charmap(encoding, filename));
1168   else
1169     return (NULL);
1170 }
1171
1172
1173 /*
1174  * 'get_charmap_count()' - Count lines in a charmap file.
1175  */
1176
1177 static int                              /* O - Count or -1 on error */
1178 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1179 {
1180   int   count;                          /* Number of lines */
1181   char  line[256];                      /* Line from input map file */
1182
1183
1184  /*
1185   * Count lines in map input file...
1186   */
1187
1188   count = 0;
1189
1190   while (cupsFileGets(fp, line, sizeof(line)))
1191     if (line[0] == '0')
1192       count ++;
1193
1194  /*
1195   * Return the number of lines...
1196   */
1197
1198   if (count > 0)
1199     return (count);
1200   else
1201     return (-1);
1202 }
1203
1204
1205 /*
1206  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1207  */
1208
1209 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1210 get_sbcs_charmap(
1211     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1212     const char            *filename)    /* I - Charmap Filename */
1213 {
1214   unsigned long legchar;                /* Legacy character value */
1215   cups_utf32_t  unichar;                /* Unicode character value */
1216   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1217   cups_file_t   *fp;                    /* Charset map file pointer */
1218   char          *s;                     /* Line parsing pointer */
1219   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1220   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1221   char          line[256];              /* Line from charset map file */
1222
1223
1224  /*
1225   * See if we already have this SBCS charset map loaded...
1226   */
1227
1228   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1229   {
1230     if (cmap->encoding == encoding)
1231     {
1232       cmap->used ++;
1233       DEBUG_printf(("    returning existing cmap=%p\n", cmap));
1234
1235       return ((void *)cmap);
1236     }
1237   }
1238
1239  /*
1240   * Open SBCS charset map input file...
1241   */
1242
1243   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1244     return (NULL);
1245
1246  /*
1247   * Allocate memory for SBCS charset map...
1248   */
1249
1250   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1251   {
1252     cupsFileClose(fp);
1253     DEBUG_puts("    Unable to allocate memory!");
1254
1255     return (NULL);
1256   }
1257
1258   cmap->used ++;
1259   cmap->encoding = encoding;
1260
1261  /*
1262   * Save SBCS charset map into memory for transcoding...
1263   */
1264
1265   while (cupsFileGets(fp, line, sizeof(line)))
1266   {
1267     if (line[0] != '0')
1268       continue;
1269
1270     legchar = strtol(line, &s, 16);
1271     if (legchar < 0 || legchar > 0xff)
1272       goto sbcs_error;
1273
1274     unichar = strtol(s, NULL, 16);
1275     if (unichar < 0 || unichar > 0xffff)
1276       goto sbcs_error;
1277
1278    /*
1279     * Save legacy to Unicode mapping in direct lookup table...
1280     */
1281
1282     crow  = cmap->char2uni + legchar;
1283     *crow = (cups_ucs2_t)(unichar & 0xffff);
1284
1285    /*
1286     * Save Unicode to legacy mapping in indirect lookup table...
1287     */
1288
1289     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1290     if (!srow)
1291     {
1292       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1293       if (!srow)
1294         goto sbcs_error;
1295
1296       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1297     }
1298
1299     srow += unichar & 0xff;
1300
1301    /*
1302     * Convert Replacement Character to visible replacement...
1303     */
1304
1305     if (unichar == 0xfffd)
1306       legchar = (unsigned long)'?';
1307
1308    /*
1309     * First (oldest) legacy character uses Unicode mapping cell...
1310     */
1311
1312     if (!*srow)
1313       *srow = (cups_sbcs_t)legchar;
1314   }
1315
1316   cupsFileClose(fp);
1317
1318  /*
1319   * Add it to the cache and return...
1320   */
1321
1322   cmap->next = cmap_cache;
1323   cmap_cache = cmap;
1324
1325   DEBUG_printf(("    returning new cmap=%p\n", cmap));
1326
1327   return (cmap);
1328
1329  /*
1330   * If we get here, there was an error in the cmap file...
1331   */
1332
1333   sbcs_error:
1334
1335   free_sbcs_charmap(cmap);
1336
1337   cupsFileClose(fp);
1338
1339   DEBUG_puts("    Error, returning NULL!");
1340
1341   return (NULL);
1342 }
1343
1344
1345 /*
1346  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1347  */
1348
1349 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1350 get_vbcs_charmap(
1351     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1352     const char            *filename)    /* I - Charmap Filename */
1353 {
1354   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1355   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1356   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1357   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1358   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1359   unsigned long legchar;                /* Legacy character value */
1360   cups_utf32_t  unichar;                /* Unicode character value */
1361   int           mapcount;               /* Count of lines in charmap file */
1362   cups_file_t   *fp;                    /* Charset map file pointer */
1363   char          *s;                     /* Line parsing pointer */
1364   char          line[256];              /* Line from charset map file */
1365   int           i;                      /* Loop variable */
1366   int           wide;                   /* 32-bit legacy char */
1367
1368
1369   DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1370                 encoding, filename));
1371
1372  /*
1373   * See if we already have this DBCS/VBCS charset map loaded...
1374   */
1375
1376   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1377   {
1378     if (vmap->encoding == encoding)
1379     {
1380       vmap->used ++;
1381       DEBUG_printf(("    returning existing vmap=%p\n", vmap));
1382
1383       return ((void *)vmap);
1384     }
1385   }
1386
1387  /*
1388   * Open VBCS charset map input file...
1389   */
1390
1391   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1392   {
1393     DEBUG_printf(("    Unable to open file: %s\n", strerror(errno)));
1394
1395     return (NULL);
1396   }
1397
1398  /*
1399   * Count lines in charmap file...
1400   */
1401
1402   if ((mapcount = get_charmap_count(fp)) <= 0)
1403   {
1404     DEBUG_puts("    Unable to get charmap count!");
1405
1406     return (NULL);
1407   }
1408
1409   DEBUG_printf(("    mapcount=%d\n", mapcount));
1410
1411  /*
1412   * Allocate memory for DBCS/VBCS charset map...
1413   */
1414
1415   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1416   {
1417     cupsFileClose(fp);
1418     DEBUG_puts("    Unable to allocate memory!");
1419
1420     return (NULL);
1421   }
1422
1423   vmap->used ++;
1424   vmap->encoding = encoding;
1425
1426  /*
1427   * Save DBCS/VBCS charset map into memory for transcoding...
1428   */
1429
1430   leadchar = 0;
1431   wide2uni = NULL;
1432
1433   cupsFileRewind(fp);
1434
1435   i    = 0;
1436   wide = 0;
1437
1438   while (cupsFileGets(fp, line, sizeof(line)))
1439   {
1440     if (line[0] != '0')
1441       continue;
1442
1443     legchar = strtoul(line, &s, 16);
1444     if (legchar == ULONG_MAX)
1445       goto vbcs_error;
1446
1447     unichar = strtol(s, NULL, 16);
1448     if (unichar < 0 || unichar > 0xffff)
1449       goto vbcs_error;
1450
1451     i ++;
1452
1453 /*    DEBUG_printf(("    i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1454                   legchar, (unsigned)unichar)); */
1455
1456    /*
1457     * Save lead char of 2/3/4-byte legacy char...
1458     */
1459
1460     if (legchar > 0xff && legchar <= 0xffff)
1461     {
1462       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1463       vmap->lead2char[leadchar] = leadchar;
1464     }
1465
1466     if (legchar > 0xffff && legchar <= 0xffffff)
1467     {
1468       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1469       vmap->lead3char[leadchar] = leadchar;
1470     }
1471
1472     if (legchar > 0xffffff)
1473     {
1474       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1475       vmap->lead4char[leadchar] = leadchar;
1476     }
1477
1478    /*
1479     * Save Legacy to Unicode mapping...
1480     */
1481
1482     if (legchar <= 0xffff)
1483     {
1484      /*
1485       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1486       */
1487
1488       crow = vmap->char2uni[(int)leadchar];
1489       if (!crow)
1490       {
1491         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1492         if (!crow)
1493           goto vbcs_error;
1494
1495         vmap->char2uni[(int)leadchar] = crow;
1496       }
1497
1498       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1499     }
1500     else
1501     {
1502      /*
1503       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1504       */
1505
1506       if (!wide)
1507       {
1508         wide            = 1;
1509         vmap->widecount = (mapcount - i + 1);
1510         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1511                                                      sizeof(_cups_wide2uni_t));
1512         if (!wide2uni)
1513           goto vbcs_error;
1514
1515         vmap->wide2uni = wide2uni;
1516       }
1517
1518       wide2uni->widechar = (cups_vbcs_t)legchar;
1519       wide2uni->unichar  = (cups_ucs2_t)unichar;
1520       wide2uni ++;
1521     }
1522
1523    /*
1524     * Save Unicode to legacy mapping in indirect lookup table...
1525     */
1526
1527     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1528     if (!vrow)
1529     {
1530       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1531       if (!vrow)
1532         goto vbcs_error;
1533
1534       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1535     }
1536
1537     vrow += (int)(unichar & 0xff);
1538
1539    /*
1540     * Convert Replacement Character to visible replacement...
1541     */
1542
1543     if (unichar == 0xfffd)
1544       legchar = (unsigned long)'?';
1545
1546    /*
1547     * First (oldest) legacy character uses Unicode mapping cell...
1548     */
1549
1550     if (!*vrow)
1551       *vrow = (cups_vbcs_t)legchar;
1552   }
1553
1554   vmap->charcount = (i - vmap->widecount);
1555
1556   cupsFileClose(fp);
1557
1558  /*
1559   * Add it to the cache and return...
1560   */
1561
1562   vmap->next     = vmap_cache;
1563   vmap_cache = vmap;
1564
1565   DEBUG_printf(("    returning new vmap=%p\n", vmap));
1566
1567   return (vmap);
1568
1569  /*
1570   * If we get here, the file contains errors...
1571   */
1572
1573   vbcs_error:
1574
1575   free_vbcs_charmap(vmap);
1576
1577   cupsFileClose(fp);
1578
1579   DEBUG_puts("    Error, returning NULL!");
1580
1581   return (NULL);
1582 }
1583
1584
1585 /*
1586  * End of "$Id: transcode.c 5838 2006-08-17 14:41:42Z mike $"
1587  */