cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 6188 2007-01-10 16:23:06Z mike $"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 1997-2007 by Easy Software Products.
   7  *
   8  *   These coded instructions, statements, and computer programs are
   9  *   the property of Easy Software Products and are protected by Federal
  10  *   copyright law.  Distribution and use rights are outlined in the
  11  *   file "LICENSE.txt" which should have been included with this file.
  12  *   If this file is missing or damaged please contact Easy Software
  13  *   Products at:
  14  *
  15  *       Attn: CUPS Licensing Information
  16  *       Easy Software Products
  17  *       44141 Airport View Drive, Suite 204
  18  *       Hollywood, Maryland 20636 USA
  19  *
  20  *       Voice: (301) 373-9600
  21  *       EMail: cups-info@cups.org
  22  *         WWW: http://www.cups.org
  23  *
  24  * Contents:
  25  *
  26  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  27  *   _cupsCharmapFree()  - Free a character set map.
  28  *   _cupsCharmapGet()   - Get a character set map.
  29  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  30  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  31  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  32  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  33  *   compare_wide()      - Compare key for wide (VBCS) match.
  34  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  35  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  36  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  37  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  38  *   free_sbcs_charmap() - Free memory used by a single byte character set.
  39  *   free_vbcs_charmap() - Free memory used by a variable byte character set.
  40  *   get_charmap()       - Lookup or get a character set map (private).
  41  *   get_charmap_count() - Count lines in a charmap file.
  42  *   get_sbcs_charmap()  - Get SBCS Charmap.
  43  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  44  */
  45
  46 /*
  47  * Include necessary headers...
  48  */
  49
  50 #include "globals.h"
  51 #include "debug.h"
  52 #include <limits.h>
  53 #include <stdlib.h>
  54 #include <errno.h>
  55 #include <time.h>
  56
  57
  58 /*
  59  * Local globals...
  60  */
  61
  62 #ifdef HAVE_PTHREAD_H
  63 static pthread_mutex_t  map_mutex = PTHREAD_MUTEX_INITIALIZER;
  64                                         /* Mutex to control access to maps */
  65 #endif /* HAVE_PTHREAD_H */
  66 static _cups_cmap_t     *cmap_cache = NULL;
  67                                         /* SBCS Charmap Cache */
  68 static _cups_vmap_t     *vmap_cache = NULL;
  69                                         /* VBCS Charmap Cache */
  70
  71
  72 /*
  73  * Local functions...
  74  */
  75
  76 static int              compare_wide(const void *k1, const void *k2);
  77 static int              conv_sbcs_to_utf8(cups_utf8_t *dest,
  78                                           const cups_sbcs_t *src,
  79                                           int maxout,
  80                                           const cups_encoding_t encoding);
  81 static int              conv_utf8_to_sbcs(cups_sbcs_t *dest,
  82                                           const cups_utf8_t *src,
  83                                           int maxout,
  84                                           const cups_encoding_t encoding);
  85 static int              conv_utf8_to_vbcs(cups_sbcs_t *dest,
  86                                           const cups_utf8_t *src,
  87                                           int maxout,
  88                                           const cups_encoding_t encoding);
  89 static int              conv_vbcs_to_utf8(cups_utf8_t *dest,
  90                                           const cups_sbcs_t *src,
  91                                           int maxout,
  92                                           const cups_encoding_t encoding);
  93 static void             free_sbcs_charmap(_cups_cmap_t *sbcs);
  94 static void             free_vbcs_charmap(_cups_vmap_t *vbcs);
  95 static void             *get_charmap(const cups_encoding_t encoding);
  96 static int              get_charmap_count(cups_file_t *fp);
  97 static _cups_cmap_t     *get_sbcs_charmap(const cups_encoding_t encoding,
  98                                           const char *filename);
  99 static _cups_vmap_t     *get_vbcs_charmap(const cups_encoding_t encoding,
 100                                           const char *filename);
 101
 102
 103 /*
 104  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
 105  */
 106
 107 void
 108 _cupsCharmapFlush(void)
 109 {
 110   _cups_cmap_t  *cmap,                  /* Legacy SBCS / Unicode Charset Map */
 111                 *cnext;                 /* Next Legacy SBCS Charset Map */
 112   _cups_vmap_t  *vmap,                  /* Legacy VBCS / Unicode Charset Map */
 113                 *vnext;                 /* Next Legacy VBCS Charset Map */
 114
 115
 116 #ifdef HAVE_PTHREAD_H
 117   pthread_mutex_lock(&map_mutex);
 118 #endif /* HAVE_PTHREAD_H */
 119
 120  /*
 121   * Loop through SBCS charset map cache, free all memory...
 122   */
 123
 124   for (cmap = cmap_cache; cmap; cmap = cnext)
 125   {
 126     cnext = cmap->next;
 127
 128     free_sbcs_charmap(cmap);
 129   }
 130
 131   cmap_cache = NULL;
 132
 133  /*
 134   * Loop through DBCS/VBCS charset map cache, free all memory...
 135   */
 136
 137   for (vmap = vmap_cache; vmap; vmap = vnext)
 138   {
 139     vnext = vmap->next;
 140
 141     free_vbcs_charmap(vmap);
 142
 143     free(vmap);
 144   }
 145
 146   vmap_cache = NULL;
 147
 148 #ifdef HAVE_PTHREAD_H
 149   pthread_mutex_unlock(&map_mutex);
 150 #endif /* HAVE_PTHREAD_H */
 151 }
 152
 153
 154 /*
 155  * '_cupsCharmapFree()' - Free a character set map.
 156  *
 157  * This does not actually free; use '_cupsCharmapFlush()' for that.
 158  */
 159
 160 void
 161 _cupsCharmapFree(
 162     const cups_encoding_t encoding)     /* I - Encoding */
 163 {
 164   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 165   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 166
 167
 168  /*
 169   * See if we already have this SBCS charset map loaded...
 170   */
 171
 172 #ifdef HAVE_PTHREAD_H
 173   pthread_mutex_lock(&map_mutex);
 174 #endif /* HAVE_PTHREAD_H */
 175
 176   for (cmap = cmap_cache; cmap; cmap = cmap->next)
 177   {
 178     if (cmap->encoding == encoding)
 179     {
 180       if (cmap->used > 0)
 181         cmap->used --;
 182       break;
 183     }
 184   }
 185
 186  /*
 187   * See if we already have this DBCS/VBCS charset map loaded...
 188   */
 189
 190   for (vmap = vmap_cache; vmap; vmap = vmap->next)
 191   {
 192     if (vmap->encoding == encoding)
 193     {
 194       if (vmap->used > 0)
 195         vmap->used --;
 196       break;
 197     }
 198   }
 199
 200 #ifdef HAVE_PTHREAD_H
 201   pthread_mutex_unlock(&map_mutex);
 202 #endif /* HAVE_PTHREAD_H */
 203 }
 204
 205
 206 /*
 207  * '_cupsCharmapGet()' - Get a character set map.
 208  *
 209  * This code handles single-byte (SBCS), double-byte (DBCS), and
 210  * variable-byte (VBCS) character sets _without_ charset escapes...
 211  * This code does not handle multiple-byte character sets (MBCS)
 212  * (such as ISO-2022-JP) with charset switching via escapes...
 213  */
 214
 215 void *                                  /* O - Charset map pointer */
 216 _cupsCharmapGet(
 217     const cups_encoding_t encoding)     /* I - Encoding */
 218 {
 219   void  *charmap;                       /* Charset map pointer */
 220
 221
 222   DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
 223
 224  /*
 225   * Check for valid arguments...
 226   */
 227
 228   if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 229   {
 230     DEBUG_puts("    Bad encoding, returning NULL!");
 231     return (NULL);
 232   }
 233
 234  /*
 235   * Lookup or get the charset map pointer and return...
 236   */
 237
 238 #ifdef HAVE_PTHREAD_H
 239   pthread_mutex_lock(&map_mutex);
 240 #endif /* HAVE_PTHREAD_H */
 241
 242   charmap = get_charmap(encoding);
 243
 244 #ifdef HAVE_PTHREAD_H
 245   pthread_mutex_unlock(&map_mutex);
 246 #endif /* HAVE_PTHREAD_H */
 247
 248   return (charmap);
 249 }
 250
 251
 252 /*
 253  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 254  *
 255  * This code handles single-byte (SBCS), double-byte (DBCS), and
 256  * variable-byte (VBCS) character sets _without_ charset escapes...
 257  * This code does not handle multiple-byte character sets (MBCS)
 258  * (such as ISO-2022-JP) with charset switching via escapes...
 259  */
 260
 261 int                                     /* O - Count or -1 on error */
 262 cupsCharsetToUTF8(
 263     cups_utf8_t *dest,                  /* O - Target string */
 264     const char *src,                    /* I - Source string */
 265     const int maxout,                   /* I - Max output */
 266     const cups_encoding_t encoding)     /* I - Encoding */
 267 {
 268   int   bytes;                          /* Number of bytes converted */
 269
 270
 271  /*
 272   * Check for valid arguments...
 273   */
 274
 275   DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
 276                 dest, src, maxout, encoding));
 277
 278   if (dest)
 279     *dest = '\0';
 280
 281   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 282   {
 283     DEBUG_puts("    Bad arguments, returning -1");
 284     return (-1);
 285   }
 286
 287  /*
 288   * Handle identity conversions...
 289   */
 290
 291   if (encoding == CUPS_UTF8 ||
 292       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 293   {
 294     strlcpy((char *)dest, src, maxout);
 295     return ((int)strlen((char *)dest));
 296   }
 297
 298  /*
 299   * Handle ISO-8859-1 to UTF-8 directly...
 300   */
 301
 302   if (encoding == CUPS_ISO8859_1)
 303   {
 304     int         ch;                     /* Character from string */
 305     cups_utf8_t *destptr,               /* Pointer into UTF-8 buffer */
 306                 *destend;               /* End of UTF-8 buffer */
 307
 308
 309     destptr = dest;
 310     destend = dest + maxout - 2;
 311
 312     while (*src && destptr < destend)
 313     {
 314       ch = *src++ & 255;
 315
 316       if (ch & 128)
 317       {
 318         *destptr++ = 0xc0 | (ch >> 6);
 319         *destptr++ = 0x80 | (ch & 0x3f);
 320       }
 321       else
 322         *destptr++ = ch;
 323     }
 324
 325     *destptr = '\0';
 326
 327     return ((int)(destptr - dest));
 328   }
 329
 330  /*
 331   * Convert input legacy charset to UTF-8...
 332   */
 333
 334 #ifdef HAVE_PTHREAD_H
 335   pthread_mutex_lock(&map_mutex);
 336 #endif /* HAVE_PTHREAD_H */
 337
 338   if (encoding < CUPS_ENCODING_SBCS_END)
 339     bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 340   else if (encoding < CUPS_ENCODING_VBCS_END)
 341     bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 342   else
 343   {
 344     DEBUG_puts("    Bad encoding, returning -1");
 345     bytes = -1;
 346   }
 347
 348 #ifdef HAVE_PTHREAD_H
 349   pthread_mutex_unlock(&map_mutex);
 350 #endif /* HAVE_PTHREAD_H */
 351
 352   return (bytes);
 353 }
 354
 355
 356 /*
 357  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 358  *
 359  * This code handles single-byte (SBCS), double-byte (DBCS), and
 360  * variable-byte (VBCS) character sets _without_ charset escapes...
 361  * This code does not handle multiple-byte character sets (MBCS)
 362  * (such as ISO-2022-JP) with charset switching via escapes...
 363  */
 364
 365 int                                     /* O - Count or -1 on error */
 366 cupsUTF8ToCharset(
 367     char                  *dest,        /* O - Target string */
 368     const cups_utf8_t     *src,         /* I - Source string */
 369     const int             maxout,       /* I - Max output */
 370     const cups_encoding_t encoding)     /* I - Encoding */
 371 {
 372   int   bytes;                          /* Number of bytes converted */
 373
 374
 375  /*
 376   * Check for valid arguments...
 377   */
 378
 379   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 380   {
 381     if (dest)
 382       *dest = '\0';
 383
 384     return (-1);
 385   }
 386
 387  /*
 388   * Handle identity conversions...
 389   */
 390
 391   if (encoding == CUPS_UTF8 ||
 392       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 393   {
 394     strlcpy(dest, (char *)src, maxout);
 395     return ((int)strlen(dest));
 396   }
 397
 398  /*
 399   * Handle UTF-8 to ISO-8859-1 directly...
 400   */
 401
 402   if (encoding == CUPS_ISO8859_1)
 403   {
 404     int         ch;                     /* Character from string */
 405     char        *destptr,               /* Pointer into ISO-8859-1 buffer */
 406                 *destend;               /* End of ISO-8859-1 buffer */
 407
 408
 409     destptr = dest;
 410     destend = dest + maxout - 1;
 411
 412     while (*src && destptr < destend)
 413     {
 414       ch = *src++;
 415
 416       if ((ch & 0xe0) == 0xc0)
 417       {
 418         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 419
 420         if (ch < 256)
 421           *destptr++ = ch;
 422         else
 423           *destptr++ = '?';
 424       }
 425       else if ((ch & 0xf0) == 0xe0 ||
 426                (ch & 0xf8) == 0xf0)
 427         *destptr++ = '?';
 428       else if (!(ch & 0x80))
 429         *destptr++ = ch;
 430     }
 431
 432     *destptr = '\0';
 433
 434     return ((int)(destptr - dest));
 435   }
 436
 437  /*
 438   * Convert input UTF-8 to legacy charset...
 439   */
 440
 441 #ifdef HAVE_PTHREAD_H
 442   pthread_mutex_lock(&map_mutex);
 443 #endif /* HAVE_PTHREAD_H */
 444
 445   if (encoding < CUPS_ENCODING_SBCS_END)
 446     bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 447   else if (encoding < CUPS_ENCODING_VBCS_END)
 448     bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 449   else
 450     bytes = -1;
 451
 452 #ifdef HAVE_PTHREAD_H
 453   pthread_mutex_unlock(&map_mutex);
 454 #endif /* HAVE_PTHREAD_H */
 455
 456   return (bytes);
 457 }
 458
 459
 460 /*
 461  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 462  *
 463  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 464  *
 465  *   UTF-32 char     UTF-8 char(s)
 466  *   --------------------------------------------------
 467  *        0 to 127 = 0xxxxxxx (US-ASCII)
 468  *     128 to 2047 = 110xxxxx 10yyyyyy
 469  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 470  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 471  *
 472  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 473  * which would convert to five- or six-octet UTF-8 sequences...
 474  */
 475
 476 int                                     /* O - Count or -1 on error */
 477 cupsUTF8ToUTF32(
 478     cups_utf32_t      *dest,            /* O - Target string */
 479     const cups_utf8_t *src,             /* I - Source string */
 480     const int         maxout)           /* I - Max output */
 481 {
 482   int           i;                      /* Looping variable */
 483   cups_utf8_t   ch;                     /* Character value */
 484   cups_utf8_t   next;                   /* Next character value */
 485   cups_utf32_t  ch32;                   /* UTF-32 character value */
 486
 487
 488  /*
 489   * Check for valid arguments and clear output...
 490   */
 491
 492   if (dest)
 493     *dest = 0;
 494
 495   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 496     return (-1);
 497
 498  /*
 499   * Convert input UTF-8 to output UTF-32 (and insert BOM)...
 500   */
 501
 502   *dest++ = 0xfeff;
 503
 504   for (i = maxout - 1; *src && i > 0; i --)
 505   {
 506     ch = *src++;
 507
 508    /*
 509     * Convert UTF-8 character(s) to UTF-32 character...
 510     */
 511
 512     if (!(ch & 0x80))
 513     {
 514      /*
 515       * One-octet UTF-8 <= 127 (US-ASCII)...
 516       */
 517
 518       *dest++ = ch;
 519       continue;
 520     }
 521     else if ((ch & 0xe0) == 0xc0)
 522     {
 523      /*
 524       * Two-octet UTF-8 <= 2047 (Latin-x)...
 525       */
 526
 527       next = *src++;
 528       if (!next)
 529         return (-1);
 530
 531       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 532
 533      /*
 534       * Check for non-shortest form (invalid UTF-8)...
 535       */
 536
 537       if (ch32 < 0x80)
 538         return (-1);
 539
 540       *dest++ = ch32;
 541     }
 542     else if ((ch & 0xf0) == 0xe0)
 543     {
 544      /*
 545       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 546       */
 547
 548       next = *src++;
 549       if (!next)
 550         return (-1);
 551
 552       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 553
 554       next = *src++;
 555       if (!next)
 556         return (-1);
 557
 558       ch32 = (ch32 << 6) | (next & 0x3f);
 559
 560      /*
 561       * Check for non-shortest form (invalid UTF-8)...
 562       */
 563
 564       if (ch32 < 0x800)
 565         return (-1);
 566
 567       *dest++ = ch32;
 568     }
 569     else if ((ch & 0xf8) == 0xf0)
 570     {
 571      /*
 572       * Four-octet UTF-8...
 573       */
 574
 575       next = *src++;
 576       if (!next)
 577         return (-1);
 578
 579       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 580
 581       next = *src++;
 582       if (!next)
 583         return (-1);
 584
 585       ch32 = (ch32 << 6) | (next & 0x3f);
 586
 587       next = *src++;
 588       if (!next)
 589         return (-1);
 590
 591       ch32 = (ch32 << 6) | (next & 0x3f);
 592
 593      /*
 594       * Check for non-shortest form (invalid UTF-8)...
 595       */
 596
 597       if (ch32 < 0x10000)
 598         return (-1);
 599
 600       *dest++ = ch32;
 601     }
 602     else
 603     {
 604      /*
 605       * More than 4-octet (invalid UTF-8 sequence)...
 606       */
 607
 608       return (-1);
 609     }
 610
 611    /*
 612     * Check for UTF-16 surrogate (illegal UTF-8)...
 613     */
 614
 615     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 616       return (-1);
 617   }
 618
 619   *dest = 0;
 620
 621   return (i);
 622 }
 623
 624
 625 /*
 626  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 627  *
 628  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 629  *
 630  *   UTF-32 char     UTF-8 char(s)
 631  *   --------------------------------------------------
 632  *        0 to 127 = 0xxxxxxx (US-ASCII)
 633  *     128 to 2047 = 110xxxxx 10yyyyyy
 634  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 635  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 636  *
 637  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 638  * which would convert to five- or six-octet UTF-8 sequences...
 639  */
 640
 641 int                                     /* O - Count or -1 on error */
 642 cupsUTF32ToUTF8(
 643     cups_utf8_t        *dest,           /* O - Target string */
 644     const cups_utf32_t *src,            /* I - Source string */
 645     const int          maxout)          /* I - Max output */
 646 {
 647   cups_utf8_t   *start;                 /* Start of destination string */
 648   int           i;                      /* Looping variable */
 649   int           swap;                   /* Byte-swap input to output */
 650   cups_utf32_t  ch;                     /* Character value */
 651
 652
 653  /*
 654   * Check for valid arguments and clear output...
 655   */
 656
 657   if (dest)
 658     *dest = '\0';
 659
 660   if (!dest || !src || maxout < 1)
 661     return (-1);
 662
 663  /*
 664   * Check for leading BOM in UTF-32 and inverted BOM...
 665   */
 666
 667   start = dest;
 668   swap  = *src == 0xfffe0000;
 669
 670   if (*src == 0xfffe0000 || *src == 0xfeff)
 671     src ++;
 672
 673  /*
 674   * Convert input UTF-32 to output UTF-8...
 675   */
 676
 677   for (i = maxout - 1; *src && i > 0;)
 678   {
 679     ch = *src++;
 680
 681    /*
 682     * Byte swap input UTF-32, if necessary...
 683     * (only byte-swapping 24 of 32 bits)
 684     */
 685
 686     if (swap)
 687       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 688
 689    /*
 690     * Check for beyond Plane 16 (invalid UTF-32)...
 691     */
 692
 693     if (ch > 0x10ffff)
 694       return (-1);
 695
 696    /*
 697     * Convert UTF-32 character to UTF-8 character(s)...
 698     */
 699
 700     if (ch < 0x80)
 701     {
 702      /*
 703       * One-octet UTF-8 <= 127 (US-ASCII)...
 704       */
 705
 706       *dest++ = (cups_utf8_t)ch;
 707       i --;
 708     }
 709     else if (ch < 0x800)
 710     {
 711      /*
 712       * Two-octet UTF-8 <= 2047 (Latin-x)...
 713       */
 714
 715       if (i < 2)
 716         return (-1);
 717
 718       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 719       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 720       i -= 2;
 721     }
 722     else if (ch < 0x10000)
 723     {
 724      /*
 725       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 726       */
 727
 728       if (i < 3)
 729         return (-1);
 730
 731       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 732       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 733       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 734       i -= 3;
 735     }
 736     else
 737     {
 738      /*
 739       * Four-octet UTF-8...
 740       */
 741
 742       if (i < 4)
 743         return (-1);
 744
 745       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 746       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 747       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 748       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 749       i -= 4;
 750     }
 751   }
 752
 753   *dest = '\0';
 754
 755   return ((int)(dest - start));
 756 }
 757
 758
 759 /*
 760  * 'compare_wide()' - Compare key for wide (VBCS) match.
 761  */
 762
 763 static int
 764 compare_wide(const void *k1,            /* I - Key char */
 765              const void *k2)            /* I - Map char */
 766 {
 767   cups_vbcs_t   key;                    /* Legacy key character */
 768   cups_vbcs_t   map;                    /* Legacy map character */
 769
 770
 771   key = *((cups_vbcs_t *)k1);
 772   map = ((_cups_wide2uni_t *)k2)->widechar;
 773
 774   return ((int)(key - map));
 775 }
 776
 777
 778 /*
 779  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 780  */
 781
 782 static int                              /* O - Count or -1 on error */
 783 conv_sbcs_to_utf8(
 784     cups_utf8_t           *dest,        /* O - Target string */
 785     const cups_sbcs_t     *src,         /* I - Source string */
 786     int                   maxout,       /* I - Max output */
 787     const cups_encoding_t encoding)     /* I - Encoding */
 788 {
 789   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 790   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 791   cups_sbcs_t   legchar;                /* Legacy character value */
 792   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 793                 *workptr;               /* Pointer into string */
 794
 795
 796  /*
 797   * Find legacy charset map in cache...
 798   */
 799
 800   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 801     return (-1);
 802
 803  /*
 804   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 805   */
 806
 807   work[0] = 0xfeff;
 808   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 809   {
 810     legchar = *src++;
 811
 812    /*
 813     * Convert ASCII verbatim (optimization)...
 814     */
 815
 816     if (legchar < 0x80)
 817       *workptr++ = (cups_utf32_t)legchar;
 818     else
 819     {
 820      /*
 821       * Convert unknown character to Replacement Character...
 822       */
 823
 824       crow = cmap->char2uni + legchar;
 825
 826       if (!*crow)
 827         *workptr++ = 0xfffd;
 828       else
 829         *workptr++ = (cups_utf32_t)*crow;
 830     }
 831   }
 832
 833   *workptr = 0;
 834
 835  /*
 836   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 837   */
 838
 839   cmap->used --;
 840
 841   return (cupsUTF32ToUTF8(dest, work, maxout));
 842 }
 843
 844
 845 /*
 846  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 847  */
 848
 849 static int                              /* O - Count or -1 on error */
 850 conv_utf8_to_sbcs(
 851     cups_sbcs_t           *dest,        /* O - Target string */
 852     const cups_utf8_t     *src,         /* I - Source string */
 853     int                   maxout,       /* I - Max output */
 854     const cups_encoding_t encoding)     /* I - Encoding */
 855 {
 856   cups_sbcs_t   *start;                 /* Start of destination string */
 857   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 858   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 859   cups_utf32_t  unichar;                /* Character value */
 860   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 861                 *workptr;               /* Pointer into string */
 862
 863
 864  /*
 865   * Find legacy charset map in cache...
 866   */
 867
 868   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 869     return (-1);
 870
 871  /*
 872   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 873   */
 874
 875   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 876     return (-1);
 877
 878  /*
 879   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 880   */
 881
 882   for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
 883   {
 884     unichar = *workptr++;
 885     if (!unichar)
 886       break;
 887
 888    /*
 889     * Convert ASCII verbatim (optimization)...
 890     */
 891
 892     if (unichar < 0x80)
 893     {
 894       *dest++ = (cups_sbcs_t)unichar;
 895       continue;
 896     }
 897
 898    /*
 899     * Convert unknown character to visible replacement...
 900     */
 901
 902     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 903
 904     if (srow)
 905       srow += (int)(unichar & 0xff);
 906
 907     if (!srow || !*srow)
 908       *dest++ = '?';
 909     else
 910       *dest++ = *srow;
 911   }
 912
 913   *dest = '\0';
 914
 915   cmap->used --;
 916
 917   return ((int)(dest - start));
 918 }
 919
 920
 921 /*
 922  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 923  */
 924
 925 static int                              /* O - Count or -1 on error */
 926 conv_utf8_to_vbcs(
 927     cups_sbcs_t           *dest,        /* O - Target string */
 928     const cups_utf8_t     *src,         /* I - Source string */
 929     int                   maxout,       /* I - Max output */
 930     const cups_encoding_t encoding)     /* I - Encoding */
 931 {
 932   cups_sbcs_t   *start;                 /* Start of destination string */
 933   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
 934   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
 935   cups_utf32_t  unichar;                /* Character value */
 936   cups_vbcs_t   legchar;                /* Legacy character value */
 937   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 938                 *workptr;               /* Pointer into string */
 939
 940
 941  /*
 942   * Find legacy charset map in cache...
 943   */
 944
 945   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 946     return (-1);
 947
 948  /*
 949   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 950   */
 951
 952   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 953     return (-1);
 954
 955  /*
 956   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
 957   */
 958
 959   for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
 960   {
 961     unichar = *workptr++;
 962     if (!unichar)
 963       break;
 964
 965    /*
 966     * Convert ASCII verbatim (optimization)...
 967     */
 968
 969     if (unichar < 0x80)
 970     {
 971       *dest++ = (cups_sbcs_t)unichar;
 972       continue;
 973     }
 974
 975    /*
 976     * Convert unknown character to visible replacement...
 977     */
 978
 979     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
 980
 981     if (vrow)
 982       vrow += (int)(unichar & 0xff);
 983
 984     if (!vrow || !*vrow)
 985       legchar = (cups_vbcs_t)'?';
 986     else
 987       legchar = (cups_vbcs_t)*vrow;
 988
 989    /*
 990     * Save n-byte legacy character...
 991     */
 992
 993     if (legchar > 0xffffff)
 994     {
 995       if (maxout < 5)
 996         return (-1);
 997
 998       *dest++ = (cups_sbcs_t)(legchar >> 24);
 999       *dest++ = (cups_sbcs_t)(legchar >> 16);
1000       *dest++ = (cups_sbcs_t)(legchar >> 8);
1001       *dest++ = (cups_sbcs_t)legchar;
1002
1003       maxout -= 3;
1004     }
1005     else if (legchar > 0xffff)
1006     {
1007       if (maxout < 4)
1008         return (-1);
1009
1010       *dest++ = (cups_sbcs_t)(legchar >> 16);
1011       *dest++ = (cups_sbcs_t)(legchar >> 8);
1012       *dest++ = (cups_sbcs_t)legchar;
1013
1014       maxout -= 2;
1015     }
1016     else if (legchar > 0xff)
1017     {
1018       *dest++ = (cups_sbcs_t)(legchar >> 8);
1019       *dest++ = (cups_sbcs_t)legchar;
1020
1021       maxout --;
1022     }
1023   }
1024
1025   *dest = '\0';
1026
1027   vmap->used --;
1028
1029   return ((int)(dest - start));
1030 }
1031
1032
1033 /*
1034  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1035  */
1036
1037 static int                              /* O - Count or -1 on error */
1038 conv_vbcs_to_utf8(
1039     cups_utf8_t           *dest,        /* O - Target string */
1040     const cups_sbcs_t     *src,         /* I - Source string */
1041     int                   maxout,       /* I - Max output */
1042     const cups_encoding_t encoding)     /* I - Encoding */
1043 {
1044   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1045   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1046   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1047   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1048   cups_vbcs_t   legchar;                /* Legacy character value */
1049   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1050                 *workptr;               /* Pointer into string */
1051
1052
1053  /*
1054   * Find legacy charset map in cache...
1055   */
1056
1057   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1058     return (-1);
1059
1060  /*
1061   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1062   */
1063
1064   work[0] = 0xfeff;
1065   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1066   {
1067     legchar  = *src++;
1068     leadchar = (cups_sbcs_t)legchar;
1069
1070    /*
1071     * Convert ASCII verbatim (optimization)...
1072     */
1073
1074     if (legchar < 0x80)
1075     {
1076       *workptr++ = (cups_utf32_t)legchar;
1077       continue;
1078     }
1079
1080    /*
1081     * Convert 2-byte legacy character...
1082     */
1083
1084     if (vmap->lead2char[(int)leadchar] == leadchar)
1085     {
1086       if (!*src)
1087         return (-1);
1088
1089       legchar = (legchar << 8) | *src++;
1090
1091      /*
1092       * Convert unknown character to Replacement Character...
1093       */
1094
1095       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1096       if (crow)
1097         crow += (int) (legchar & 0xff);
1098
1099       if (!crow || !*crow)
1100         *workptr++ = 0xfffd;
1101       else
1102         *workptr++ = (cups_utf32_t)*crow;
1103       continue;
1104     }
1105
1106    /*
1107     * Fetch 3-byte or 4-byte legacy character...
1108     */
1109
1110     if (vmap->lead3char[(int)leadchar] == leadchar)
1111     {
1112       if (!*src || !src[1])
1113         return (-1);
1114
1115       legchar = (legchar << 8) | *src++;
1116       legchar = (legchar << 8) | *src++;
1117     }
1118     else if (vmap->lead4char[(int)leadchar] == leadchar)
1119     {
1120       if (!*src || !src[1] || !src[2])
1121         return (-1);
1122
1123       legchar = (legchar << 8) | *src++;
1124       legchar = (legchar << 8) | *src++;
1125       legchar = (legchar << 8) | *src++;
1126     }
1127     else
1128       return (-1);
1129
1130    /*
1131     * Find 3-byte or 4-byte legacy character...
1132     */
1133
1134     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1135                                            vmap->wide2uni,
1136                                            vmap->widecount,
1137                                            sizeof(_cups_wide2uni_t),
1138                                            compare_wide);
1139
1140    /*
1141     * Convert unknown character to Replacement Character...
1142     */
1143
1144     if (!wide2uni || !wide2uni->unichar)
1145       *workptr++ = 0xfffd;
1146     else
1147       *workptr++ = wide2uni->unichar;
1148   }
1149
1150   *workptr = 0;
1151
1152   vmap->used --;
1153
1154  /*
1155   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1156   */
1157
1158   return (cupsUTF32ToUTF8(dest, work, maxout));
1159 }
1160
1161
1162 /*
1163  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1164  */
1165
1166 static void
1167 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1168 {
1169   int           i;                      /* Looping variable */
1170
1171
1172   for (i = 0; i < 256; i ++)
1173     if (cmap->uni2char[i])
1174       free(cmap->uni2char[i]);
1175
1176   free(cmap);
1177 }
1178
1179
1180 /*
1181  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1182  */
1183
1184 static void
1185 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1186 {
1187   int           i;                      /* Looping variable */
1188
1189
1190   for (i = 0; i < 256; i ++)
1191     if (vmap->char2uni[i])
1192       free(vmap->char2uni[i]);
1193
1194   for (i = 0; i < 256; i ++)
1195     if (vmap->uni2char[i])
1196       free(vmap->uni2char[i]);
1197
1198   if (vmap->wide2uni)
1199     free(vmap->wide2uni);
1200
1201   free(vmap);
1202 }
1203
1204
1205 /*
1206  * 'get_charmap()' - Lookup or get a character set map (private).
1207  *
1208  * This code handles single-byte (SBCS), double-byte (DBCS), and
1209  * variable-byte (VBCS) character sets _without_ charset escapes...
1210  * This code does not handle multiple-byte character sets (MBCS)
1211  * (such as ISO-2022-JP) with charset switching via escapes...
1212  */
1213
1214
1215 static void *                           /* O - Charset map pointer */
1216 get_charmap(
1217     const cups_encoding_t encoding)     /* I - Encoding */
1218 {
1219   char          filename[1024];         /* Filename for charset map file */
1220   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1221
1222
1223  /*
1224   * Get the data directory and charset map name...
1225   */
1226
1227   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1228            cg->cups_datadir, _cupsEncodingName(encoding));
1229
1230   DEBUG_printf(("    filename=\"%s\"\n", filename));
1231
1232  /*
1233   * Read charset map input file into cache...
1234   */
1235
1236   if (encoding < CUPS_ENCODING_SBCS_END)
1237     return (get_sbcs_charmap(encoding, filename));
1238   else if (encoding < CUPS_ENCODING_VBCS_END)
1239     return (get_vbcs_charmap(encoding, filename));
1240   else
1241     return (NULL);
1242 }
1243
1244
1245 /*
1246  * 'get_charmap_count()' - Count lines in a charmap file.
1247  */
1248
1249 static int                              /* O - Count or -1 on error */
1250 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1251 {
1252   int   count;                          /* Number of lines */
1253   char  line[256];                      /* Line from input map file */
1254
1255
1256  /*
1257   * Count lines in map input file...
1258   */
1259
1260   count = 0;
1261
1262   while (cupsFileGets(fp, line, sizeof(line)))
1263     if (line[0] == '0')
1264       count ++;
1265
1266  /*
1267   * Return the number of lines...
1268   */
1269
1270   if (count > 0)
1271     return (count);
1272   else
1273     return (-1);
1274 }
1275
1276
1277 /*
1278  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1279  */
1280
1281 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1282 get_sbcs_charmap(
1283     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1284     const char            *filename)    /* I - Charmap Filename */
1285 {
1286   unsigned long legchar;                /* Legacy character value */
1287   cups_utf32_t  unichar;                /* Unicode character value */
1288   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1289   cups_file_t   *fp;                    /* Charset map file pointer */
1290   char          *s;                     /* Line parsing pointer */
1291   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1292   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1293   char          line[256];              /* Line from charset map file */
1294
1295
1296  /*
1297   * See if we already have this SBCS charset map loaded...
1298   */
1299
1300   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1301   {
1302     if (cmap->encoding == encoding)
1303     {
1304       cmap->used ++;
1305       DEBUG_printf(("    returning existing cmap=%p\n", cmap));
1306
1307       return ((void *)cmap);
1308     }
1309   }
1310
1311  /*
1312   * Open SBCS charset map input file...
1313   */
1314
1315   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1316     return (NULL);
1317
1318  /*
1319   * Allocate memory for SBCS charset map...
1320   */
1321
1322   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1323   {
1324     cupsFileClose(fp);
1325     DEBUG_puts("    Unable to allocate memory!");
1326
1327     return (NULL);
1328   }
1329
1330   cmap->used ++;
1331   cmap->encoding = encoding;
1332
1333  /*
1334   * Save SBCS charset map into memory for transcoding...
1335   */
1336
1337   while (cupsFileGets(fp, line, sizeof(line)))
1338   {
1339     if (line[0] != '0')
1340       continue;
1341
1342     legchar = strtol(line, &s, 16);
1343     if (legchar < 0 || legchar > 0xff)
1344       goto sbcs_error;
1345
1346     unichar = strtol(s, NULL, 16);
1347     if (unichar < 0 || unichar > 0xffff)
1348       goto sbcs_error;
1349
1350    /*
1351     * Save legacy to Unicode mapping in direct lookup table...
1352     */
1353
1354     crow  = cmap->char2uni + legchar;
1355     *crow = (cups_ucs2_t)(unichar & 0xffff);
1356
1357    /*
1358     * Save Unicode to legacy mapping in indirect lookup table...
1359     */
1360
1361     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1362     if (!srow)
1363     {
1364       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1365       if (!srow)
1366         goto sbcs_error;
1367
1368       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1369     }
1370
1371     srow += unichar & 0xff;
1372
1373    /*
1374     * Convert Replacement Character to visible replacement...
1375     */
1376
1377     if (unichar == 0xfffd)
1378       legchar = (unsigned long)'?';
1379
1380    /*
1381     * First (oldest) legacy character uses Unicode mapping cell...
1382     */
1383
1384     if (!*srow)
1385       *srow = (cups_sbcs_t)legchar;
1386   }
1387
1388   cupsFileClose(fp);
1389
1390  /*
1391   * Add it to the cache and return...
1392   */
1393
1394   cmap->next = cmap_cache;
1395   cmap_cache = cmap;
1396
1397   DEBUG_printf(("    returning new cmap=%p\n", cmap));
1398
1399   return (cmap);
1400
1401  /*
1402   * If we get here, there was an error in the cmap file...
1403   */
1404
1405   sbcs_error:
1406
1407   free_sbcs_charmap(cmap);
1408
1409   cupsFileClose(fp);
1410
1411   DEBUG_puts("    Error, returning NULL!");
1412
1413   return (NULL);
1414 }
1415
1416
1417 /*
1418  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1419  */
1420
1421 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1422 get_vbcs_charmap(
1423     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1424     const char            *filename)    /* I - Charmap Filename */
1425 {
1426   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1427   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1428   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1429   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1430   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1431   unsigned long legchar;                /* Legacy character value */
1432   cups_utf32_t  unichar;                /* Unicode character value */
1433   int           mapcount;               /* Count of lines in charmap file */
1434   cups_file_t   *fp;                    /* Charset map file pointer */
1435   char          *s;                     /* Line parsing pointer */
1436   char          line[256];              /* Line from charset map file */
1437   int           i;                      /* Loop variable */
1438   int           wide;                   /* 32-bit legacy char */
1439
1440
1441   DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1442                 encoding, filename));
1443
1444  /*
1445   * See if we already have this DBCS/VBCS charset map loaded...
1446   */
1447
1448   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1449   {
1450     if (vmap->encoding == encoding)
1451     {
1452       vmap->used ++;
1453       DEBUG_printf(("    returning existing vmap=%p\n", vmap));
1454
1455       return ((void *)vmap);
1456     }
1457   }
1458
1459  /*
1460   * Open VBCS charset map input file...
1461   */
1462
1463   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1464   {
1465     DEBUG_printf(("    Unable to open file: %s\n", strerror(errno)));
1466
1467     return (NULL);
1468   }
1469
1470  /*
1471   * Count lines in charmap file...
1472   */
1473
1474   if ((mapcount = get_charmap_count(fp)) <= 0)
1475   {
1476     DEBUG_puts("    Unable to get charmap count!");
1477
1478     return (NULL);
1479   }
1480
1481   DEBUG_printf(("    mapcount=%d\n", mapcount));
1482
1483  /*
1484   * Allocate memory for DBCS/VBCS charset map...
1485   */
1486
1487   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1488   {
1489     cupsFileClose(fp);
1490     DEBUG_puts("    Unable to allocate memory!");
1491
1492     return (NULL);
1493   }
1494
1495   vmap->used ++;
1496   vmap->encoding = encoding;
1497
1498  /*
1499   * Save DBCS/VBCS charset map into memory for transcoding...
1500   */
1501
1502   leadchar = 0;
1503   wide2uni = NULL;
1504
1505   cupsFileRewind(fp);
1506
1507   i    = 0;
1508   wide = 0;
1509
1510   while (cupsFileGets(fp, line, sizeof(line)))
1511   {
1512     if (line[0] != '0')
1513       continue;
1514
1515     legchar = strtoul(line, &s, 16);
1516     if (legchar == ULONG_MAX)
1517       goto vbcs_error;
1518
1519     unichar = strtol(s, NULL, 16);
1520     if (unichar < 0 || unichar > 0xffff)
1521       goto vbcs_error;
1522
1523     i ++;
1524
1525 /*    DEBUG_printf(("    i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1526                   legchar, (unsigned)unichar)); */
1527
1528    /*
1529     * Save lead char of 2/3/4-byte legacy char...
1530     */
1531
1532     if (legchar > 0xff && legchar <= 0xffff)
1533     {
1534       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1535       vmap->lead2char[leadchar] = leadchar;
1536     }
1537
1538     if (legchar > 0xffff && legchar <= 0xffffff)
1539     {
1540       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1541       vmap->lead3char[leadchar] = leadchar;
1542     }
1543
1544     if (legchar > 0xffffff)
1545     {
1546       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1547       vmap->lead4char[leadchar] = leadchar;
1548     }
1549
1550    /*
1551     * Save Legacy to Unicode mapping...
1552     */
1553
1554     if (legchar <= 0xffff)
1555     {
1556      /*
1557       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1558       */
1559
1560       crow = vmap->char2uni[(int)leadchar];
1561       if (!crow)
1562       {
1563         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1564         if (!crow)
1565           goto vbcs_error;
1566
1567         vmap->char2uni[(int)leadchar] = crow;
1568       }
1569
1570       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1571     }
1572     else
1573     {
1574      /*
1575       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1576       */
1577
1578       if (!wide)
1579       {
1580         wide            = 1;
1581         vmap->widecount = (mapcount - i + 1);
1582         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1583                                                      sizeof(_cups_wide2uni_t));
1584         if (!wide2uni)
1585           goto vbcs_error;
1586
1587         vmap->wide2uni = wide2uni;
1588       }
1589
1590       wide2uni->widechar = (cups_vbcs_t)legchar;
1591       wide2uni->unichar  = (cups_ucs2_t)unichar;
1592       wide2uni ++;
1593     }
1594
1595    /*
1596     * Save Unicode to legacy mapping in indirect lookup table...
1597     */
1598
1599     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1600     if (!vrow)
1601     {
1602       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1603       if (!vrow)
1604         goto vbcs_error;
1605
1606       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1607     }
1608
1609     vrow += (int)(unichar & 0xff);
1610
1611    /*
1612     * Convert Replacement Character to visible replacement...
1613     */
1614
1615     if (unichar == 0xfffd)
1616       legchar = (unsigned long)'?';
1617
1618    /*
1619     * First (oldest) legacy character uses Unicode mapping cell...
1620     */
1621
1622     if (!*vrow)
1623       *vrow = (cups_vbcs_t)legchar;
1624   }
1625
1626   vmap->charcount = (i - vmap->widecount);
1627
1628   cupsFileClose(fp);
1629
1630  /*
1631   * Add it to the cache and return...
1632   */
1633
1634   vmap->next     = vmap_cache;
1635   vmap_cache = vmap;
1636
1637   DEBUG_printf(("    returning new vmap=%p\n", vmap));
1638
1639   return (vmap);
1640
1641  /*
1642   * If we get here, the file contains errors...
1643   */
1644
1645   vbcs_error:
1646
1647   free_vbcs_charmap(vmap);
1648
1649   cupsFileClose(fp);
1650
1651   DEBUG_puts("    Error, returning NULL!");
1652
1653   return (NULL);
1654 }
1655
1656
1657 /*
1658  * End of "$Id: transcode.c 6188 2007-01-10 16:23:06Z mike $"
1659  */