cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 6649 2007-07-11 21:46:42Z mike $"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 2007-2008 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  *   This file is subject to the Apple OS-Developed Software exception.
  16  *
  17  * Contents:
  18  *
  19  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  20  *   _cupsCharmapFree()  - Free a character set map.
  21  *   _cupsCharmapGet()   - Get a character set map.
  22  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  23  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  24  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  25  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  26  *   compare_wide()      - Compare key for wide (VBCS) match.
  27  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  28  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  29  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  30  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  31  *   free_sbcs_charmap() - Free memory used by a single byte character set.
  32  *   free_vbcs_charmap() - Free memory used by a variable byte character set.
  33  *   get_charmap()       - Lookup or get a character set map (private).
  34  *   get_charmap_count() - Count lines in a charmap file.
  35  *   get_sbcs_charmap()  - Get SBCS Charmap.
  36  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  37  */
  38
  39 /*
  40  * Include necessary headers...
  41  */
  42
  43 #include "globals.h"
  44 #include "debug.h"
  45 #include <limits.h>
  46 #include <stdlib.h>
  47 #include <errno.h>
  48 #include <time.h>
  49
  50
  51 /*
  52  * Local globals...
  53  */
  54
  55 #ifdef HAVE_PTHREAD_H
  56 static pthread_mutex_t  map_mutex = PTHREAD_MUTEX_INITIALIZER;
  57                                         /* Mutex to control access to maps */
  58 #endif /* HAVE_PTHREAD_H */
  59 static _cups_cmap_t     *cmap_cache = NULL;
  60                                         /* SBCS Charmap Cache */
  61 static _cups_vmap_t     *vmap_cache = NULL;
  62                                         /* VBCS Charmap Cache */
  63
  64
  65 /*
  66  * Local functions...
  67  */
  68
  69 static int              compare_wide(const void *k1, const void *k2);
  70 static int              conv_sbcs_to_utf8(cups_utf8_t *dest,
  71                                           const cups_sbcs_t *src,
  72                                           int maxout,
  73                                           const cups_encoding_t encoding);
  74 static int              conv_utf8_to_sbcs(cups_sbcs_t *dest,
  75                                           const cups_utf8_t *src,
  76                                           int maxout,
  77                                           const cups_encoding_t encoding);
  78 static int              conv_utf8_to_vbcs(cups_sbcs_t *dest,
  79                                           const cups_utf8_t *src,
  80                                           int maxout,
  81                                           const cups_encoding_t encoding);
  82 static int              conv_vbcs_to_utf8(cups_utf8_t *dest,
  83                                           const cups_sbcs_t *src,
  84                                           int maxout,
  85                                           const cups_encoding_t encoding);
  86 static void             free_sbcs_charmap(_cups_cmap_t *sbcs);
  87 static void             free_vbcs_charmap(_cups_vmap_t *vbcs);
  88 static void             *get_charmap(const cups_encoding_t encoding);
  89 static int              get_charmap_count(cups_file_t *fp);
  90 static _cups_cmap_t     *get_sbcs_charmap(const cups_encoding_t encoding,
  91                                           const char *filename);
  92 static _cups_vmap_t     *get_vbcs_charmap(const cups_encoding_t encoding,
  93                                           const char *filename);
  94
  95
  96 /*
  97  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
  98  */
  99
 100 void
 101 _cupsCharmapFlush(void)
 102 {
 103   _cups_cmap_t  *cmap,                  /* Legacy SBCS / Unicode Charset Map */
 104                 *cnext;                 /* Next Legacy SBCS Charset Map */
 105   _cups_vmap_t  *vmap,                  /* Legacy VBCS / Unicode Charset Map */
 106                 *vnext;                 /* Next Legacy VBCS Charset Map */
 107
 108
 109 #ifdef HAVE_PTHREAD_H
 110   pthread_mutex_lock(&map_mutex);
 111 #endif /* HAVE_PTHREAD_H */
 112
 113  /*
 114   * Loop through SBCS charset map cache, free all memory...
 115   */
 116
 117   for (cmap = cmap_cache; cmap; cmap = cnext)
 118   {
 119     cnext = cmap->next;
 120
 121     free_sbcs_charmap(cmap);
 122   }
 123
 124   cmap_cache = NULL;
 125
 126  /*
 127   * Loop through DBCS/VBCS charset map cache, free all memory...
 128   */
 129
 130   for (vmap = vmap_cache; vmap; vmap = vnext)
 131   {
 132     vnext = vmap->next;
 133
 134     free_vbcs_charmap(vmap);
 135   }
 136
 137   vmap_cache = NULL;
 138
 139 #ifdef HAVE_PTHREAD_H
 140   pthread_mutex_unlock(&map_mutex);
 141 #endif /* HAVE_PTHREAD_H */
 142 }
 143
 144
 145 /*
 146  * '_cupsCharmapFree()' - Free a character set map.
 147  *
 148  * This does not actually free; use '_cupsCharmapFlush()' for that.
 149  */
 150
 151 void
 152 _cupsCharmapFree(
 153     const cups_encoding_t encoding)     /* I - Encoding */
 154 {
 155   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 156   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 157
 158
 159  /*
 160   * See if we already have this SBCS charset map loaded...
 161   */
 162
 163 #ifdef HAVE_PTHREAD_H
 164   pthread_mutex_lock(&map_mutex);
 165 #endif /* HAVE_PTHREAD_H */
 166
 167   for (cmap = cmap_cache; cmap; cmap = cmap->next)
 168   {
 169     if (cmap->encoding == encoding)
 170     {
 171       if (cmap->used > 0)
 172         cmap->used --;
 173       break;
 174     }
 175   }
 176
 177  /*
 178   * See if we already have this DBCS/VBCS charset map loaded...
 179   */
 180
 181   for (vmap = vmap_cache; vmap; vmap = vmap->next)
 182   {
 183     if (vmap->encoding == encoding)
 184     {
 185       if (vmap->used > 0)
 186         vmap->used --;
 187       break;
 188     }
 189   }
 190
 191 #ifdef HAVE_PTHREAD_H
 192   pthread_mutex_unlock(&map_mutex);
 193 #endif /* HAVE_PTHREAD_H */
 194 }
 195
 196
 197 /*
 198  * '_cupsCharmapGet()' - Get a character set map.
 199  *
 200  * This code handles single-byte (SBCS), double-byte (DBCS), and
 201  * variable-byte (VBCS) character sets _without_ charset escapes...
 202  * This code does not handle multiple-byte character sets (MBCS)
 203  * (such as ISO-2022-JP) with charset switching via escapes...
 204  */
 205
 206 void *                                  /* O - Charset map pointer */
 207 _cupsCharmapGet(
 208     const cups_encoding_t encoding)     /* I - Encoding */
 209 {
 210   void  *charmap;                       /* Charset map pointer */
 211
 212
 213   DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
 214
 215  /*
 216   * Check for valid arguments...
 217   */
 218
 219   if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 220   {
 221     DEBUG_puts("    Bad encoding, returning NULL!");
 222     return (NULL);
 223   }
 224
 225  /*
 226   * Lookup or get the charset map pointer and return...
 227   */
 228
 229 #ifdef HAVE_PTHREAD_H
 230   pthread_mutex_lock(&map_mutex);
 231 #endif /* HAVE_PTHREAD_H */
 232
 233   charmap = get_charmap(encoding);
 234
 235 #ifdef HAVE_PTHREAD_H
 236   pthread_mutex_unlock(&map_mutex);
 237 #endif /* HAVE_PTHREAD_H */
 238
 239   return (charmap);
 240 }
 241
 242
 243 /*
 244  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 245  *
 246  * This code handles single-byte (SBCS), double-byte (DBCS), and
 247  * variable-byte (VBCS) character sets _without_ charset escapes...
 248  * This code does not handle multiple-byte character sets (MBCS)
 249  * (such as ISO-2022-JP) with charset switching via escapes...
 250  */
 251
 252 int                                     /* O - Count or -1 on error */
 253 cupsCharsetToUTF8(
 254     cups_utf8_t *dest,                  /* O - Target string */
 255     const char *src,                    /* I - Source string */
 256     const int maxout,                   /* I - Max output */
 257     const cups_encoding_t encoding)     /* I - Encoding */
 258 {
 259   int   bytes;                          /* Number of bytes converted */
 260
 261
 262  /*
 263   * Check for valid arguments...
 264   */
 265
 266   DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
 267                 dest, src, maxout, encoding));
 268
 269   if (dest)
 270     *dest = '\0';
 271
 272   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 273   {
 274     DEBUG_puts("    Bad arguments, returning -1");
 275     return (-1);
 276   }
 277
 278  /*
 279   * Handle identity conversions...
 280   */
 281
 282   if (encoding == CUPS_UTF8 ||
 283       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 284   {
 285     strlcpy((char *)dest, src, maxout);
 286     return ((int)strlen((char *)dest));
 287   }
 288
 289  /*
 290   * Handle ISO-8859-1 to UTF-8 directly...
 291   */
 292
 293   if (encoding == CUPS_ISO8859_1)
 294   {
 295     int         ch;                     /* Character from string */
 296     cups_utf8_t *destptr,               /* Pointer into UTF-8 buffer */
 297                 *destend;               /* End of UTF-8 buffer */
 298
 299
 300     destptr = dest;
 301     destend = dest + maxout - 2;
 302
 303     while (*src && destptr < destend)
 304     {
 305       ch = *src++ & 255;
 306
 307       if (ch & 128)
 308       {
 309         *destptr++ = 0xc0 | (ch >> 6);
 310         *destptr++ = 0x80 | (ch & 0x3f);
 311       }
 312       else
 313         *destptr++ = ch;
 314     }
 315
 316     *destptr = '\0';
 317
 318     return ((int)(destptr - dest));
 319   }
 320
 321  /*
 322   * Convert input legacy charset to UTF-8...
 323   */
 324
 325 #ifdef HAVE_PTHREAD_H
 326   pthread_mutex_lock(&map_mutex);
 327 #endif /* HAVE_PTHREAD_H */
 328
 329   if (encoding < CUPS_ENCODING_SBCS_END)
 330     bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 331   else
 332     bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 333
 334 #ifdef HAVE_PTHREAD_H
 335   pthread_mutex_unlock(&map_mutex);
 336 #endif /* HAVE_PTHREAD_H */
 337
 338   return (bytes);
 339 }
 340
 341
 342 /*
 343  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 344  *
 345  * This code handles single-byte (SBCS), double-byte (DBCS), and
 346  * variable-byte (VBCS) character sets _without_ charset escapes...
 347  * This code does not handle multiple-byte character sets (MBCS)
 348  * (such as ISO-2022-JP) with charset switching via escapes...
 349  */
 350
 351 int                                     /* O - Count or -1 on error */
 352 cupsUTF8ToCharset(
 353     char                  *dest,        /* O - Target string */
 354     const cups_utf8_t     *src,         /* I - Source string */
 355     const int             maxout,       /* I - Max output */
 356     const cups_encoding_t encoding)     /* I - Encoding */
 357 {
 358   int   bytes;                          /* Number of bytes converted */
 359
 360
 361  /*
 362   * Check for valid arguments...
 363   */
 364
 365   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 366   {
 367     if (dest)
 368       *dest = '\0';
 369
 370     return (-1);
 371   }
 372
 373  /*
 374   * Handle identity conversions...
 375   */
 376
 377   if (encoding == CUPS_UTF8 ||
 378       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 379   {
 380     strlcpy(dest, (char *)src, maxout);
 381     return ((int)strlen(dest));
 382   }
 383
 384  /*
 385   * Handle UTF-8 to ISO-8859-1 directly...
 386   */
 387
 388   if (encoding == CUPS_ISO8859_1)
 389   {
 390     int         ch;                     /* Character from string */
 391     char        *destptr,               /* Pointer into ISO-8859-1 buffer */
 392                 *destend;               /* End of ISO-8859-1 buffer */
 393
 394
 395     destptr = dest;
 396     destend = dest + maxout - 1;
 397
 398     while (*src && destptr < destend)
 399     {
 400       ch = *src++;
 401
 402       if ((ch & 0xe0) == 0xc0)
 403       {
 404         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 405
 406         if (ch < 256)
 407           *destptr++ = ch;
 408         else
 409           *destptr++ = '?';
 410       }
 411       else if ((ch & 0xf0) == 0xe0 ||
 412                (ch & 0xf8) == 0xf0)
 413         *destptr++ = '?';
 414       else if (!(ch & 0x80))
 415         *destptr++ = ch;
 416     }
 417
 418     *destptr = '\0';
 419
 420     return ((int)(destptr - dest));
 421   }
 422
 423  /*
 424   * Convert input UTF-8 to legacy charset...
 425   */
 426
 427 #ifdef HAVE_PTHREAD_H
 428   pthread_mutex_lock(&map_mutex);
 429 #endif /* HAVE_PTHREAD_H */
 430
 431   if (encoding < CUPS_ENCODING_SBCS_END)
 432     bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 433   else
 434     bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 435
 436 #ifdef HAVE_PTHREAD_H
 437   pthread_mutex_unlock(&map_mutex);
 438 #endif /* HAVE_PTHREAD_H */
 439
 440   return (bytes);
 441 }
 442
 443
 444 /*
 445  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 446  *
 447  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 448  *
 449  *   UTF-32 char     UTF-8 char(s)
 450  *   --------------------------------------------------
 451  *        0 to 127 = 0xxxxxxx (US-ASCII)
 452  *     128 to 2047 = 110xxxxx 10yyyyyy
 453  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 454  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 455  *
 456  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 457  * which would convert to five- or six-octet UTF-8 sequences...
 458  */
 459
 460 int                                     /* O - Count or -1 on error */
 461 cupsUTF8ToUTF32(
 462     cups_utf32_t      *dest,            /* O - Target string */
 463     const cups_utf8_t *src,             /* I - Source string */
 464     const int         maxout)           /* I - Max output */
 465 {
 466   int           i;                      /* Looping variable */
 467   cups_utf8_t   ch;                     /* Character value */
 468   cups_utf8_t   next;                   /* Next character value */
 469   cups_utf32_t  ch32;                   /* UTF-32 character value */
 470
 471
 472  /*
 473   * Check for valid arguments and clear output...
 474   */
 475
 476   DEBUG_printf(("cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)\n", dest,
 477                 src ? (const char *)src : "(null)", maxout));
 478
 479   if (dest)
 480     *dest = 0;
 481
 482   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 483   {
 484     DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad arguments)");
 485
 486     return (-1);
 487   }
 488
 489  /*
 490   * Convert input UTF-8 to output UTF-32 (and insert BOM)...
 491   */
 492
 493   *dest++ = 0xfeff;
 494
 495   for (i = maxout - 1; *src && i > 0; i --)
 496   {
 497     ch = *src++;
 498
 499    /*
 500     * Convert UTF-8 character(s) to UTF-32 character...
 501     */
 502
 503     if (!(ch & 0x80))
 504     {
 505      /*
 506       * One-octet UTF-8 <= 127 (US-ASCII)...
 507       */
 508
 509       *dest++ = ch;
 510
 511       DEBUG_printf(("cupsUTF8ToUTF32: %02x => %08X\n", src[-1], ch));
 512       continue;
 513     }
 514     else if ((ch & 0xe0) == 0xc0)
 515     {
 516      /*
 517       * Two-octet UTF-8 <= 2047 (Latin-x)...
 518       */
 519
 520       next = *src++;
 521       if ((next & 0xc0) != 0x80)
 522       {
 523         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 524
 525         return (-1);
 526       }
 527
 528       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 529
 530      /*
 531       * Check for non-shortest form (invalid UTF-8)...
 532       */
 533
 534       if (ch32 < 0x80)
 535       {
 536         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 537
 538         return (-1);
 539       }
 540
 541       *dest++ = ch32;
 542
 543       DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x => %08X\n",
 544                     src[-2], src[-1], (unsigned)ch32));
 545     }
 546     else if ((ch & 0xf0) == 0xe0)
 547     {
 548      /*
 549       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 550       */
 551
 552       next = *src++;
 553       if ((next & 0xc0) != 0x80)
 554       {
 555         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 556
 557         return (-1);
 558       }
 559
 560       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 561
 562       next = *src++;
 563       if ((next & 0xc0) != 0x80)
 564       {
 565         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 566
 567         return (-1);
 568       }
 569
 570       ch32 = (ch32 << 6) | (next & 0x3f);
 571
 572      /*
 573       * Check for non-shortest form (invalid UTF-8)...
 574       */
 575
 576       if (ch32 < 0x800)
 577       {
 578         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 579
 580         return (-1);
 581       }
 582
 583       *dest++ = ch32;
 584
 585       DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x %02x => %08X\n",
 586                     src[-3], src[-2], src[-1], (unsigned)ch32));
 587     }
 588     else if ((ch & 0xf8) == 0xf0)
 589     {
 590      /*
 591       * Four-octet UTF-8...
 592       */
 593
 594       next = *src++;
 595       if ((next & 0xc0) != 0x80)
 596       {
 597         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 598
 599         return (-1);
 600       }
 601
 602       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 603
 604       next = *src++;
 605       if ((next & 0xc0) != 0x80)
 606       {
 607         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 608
 609         return (-1);
 610       }
 611
 612       ch32 = (ch32 << 6) | (next & 0x3f);
 613
 614       next = *src++;
 615       if ((next & 0xc0) != 0x80)
 616       {
 617         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 618
 619         return (-1);
 620       }
 621
 622       ch32 = (ch32 << 6) | (next & 0x3f);
 623
 624      /*
 625       * Check for non-shortest form (invalid UTF-8)...
 626       */
 627
 628       if (ch32 < 0x10000)
 629       {
 630         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 631
 632         return (-1);
 633       }
 634
 635       *dest++ = ch32;
 636
 637       DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X\n",
 638                     src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
 639     }
 640     else
 641     {
 642      /*
 643       * More than 4-octet (invalid UTF-8 sequence)...
 644       */
 645
 646       DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 647
 648       return (-1);
 649     }
 650
 651    /*
 652     * Check for UTF-16 surrogate (illegal UTF-8)...
 653     */
 654
 655     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 656       return (-1);
 657   }
 658
 659   *dest = 0;
 660
 661   DEBUG_printf(("cupsUTF8ToUTF32: Returning %d characters\n", maxout - 1 - i));
 662
 663   return (maxout - 1 - i);
 664 }
 665
 666
 667 /*
 668  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 669  *
 670  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 671  *
 672  *   UTF-32 char     UTF-8 char(s)
 673  *   --------------------------------------------------
 674  *        0 to 127 = 0xxxxxxx (US-ASCII)
 675  *     128 to 2047 = 110xxxxx 10yyyyyy
 676  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 677  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 678  *
 679  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 680  * which would convert to five- or six-octet UTF-8 sequences...
 681  */
 682
 683 int                                     /* O - Count or -1 on error */
 684 cupsUTF32ToUTF8(
 685     cups_utf8_t        *dest,           /* O - Target string */
 686     const cups_utf32_t *src,            /* I - Source string */
 687     const int          maxout)          /* I - Max output */
 688 {
 689   cups_utf8_t   *start;                 /* Start of destination string */
 690   int           i;                      /* Looping variable */
 691   int           swap;                   /* Byte-swap input to output */
 692   cups_utf32_t  ch;                     /* Character value */
 693
 694
 695  /*
 696   * Check for valid arguments and clear output...
 697   */
 698
 699   DEBUG_printf(("cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)\n", dest, src,
 700                 maxout));
 701
 702   if (dest)
 703     *dest = '\0';
 704
 705   if (!dest || !src || maxout < 1)
 706   {
 707     DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (bad args)");
 708
 709     return (-1);
 710   }
 711
 712  /*
 713   * Check for leading BOM in UTF-32 and inverted BOM...
 714   */
 715
 716   start = dest;
 717   swap  = *src == 0xfffe0000;
 718
 719   DEBUG_printf(("cupsUTF32ToUTF8: swap=%d\n", swap));
 720
 721   if (*src == 0xfffe0000 || *src == 0xfeff)
 722     src ++;
 723
 724  /*
 725   * Convert input UTF-32 to output UTF-8...
 726   */
 727
 728   for (i = maxout - 1; *src && i > 0;)
 729   {
 730     ch = *src++;
 731
 732    /*
 733     * Byte swap input UTF-32, if necessary...
 734     * (only byte-swapping 24 of 32 bits)
 735     */
 736
 737     if (swap)
 738       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 739
 740    /*
 741     * Check for beyond Plane 16 (invalid UTF-32)...
 742     */
 743
 744     if (ch > 0x10ffff)
 745     {
 746       DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (character out of range)");
 747
 748       return (-1);
 749     }
 750
 751    /*
 752     * Convert UTF-32 character to UTF-8 character(s)...
 753     */
 754
 755     if (ch < 0x80)
 756     {
 757      /*
 758       * One-octet UTF-8 <= 127 (US-ASCII)...
 759       */
 760
 761       *dest++ = (cups_utf8_t)ch;
 762       i --;
 763
 764       DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x\n", (unsigned)ch, dest[-1]));
 765     }
 766     else if (ch < 0x800)
 767     {
 768      /*
 769       * Two-octet UTF-8 <= 2047 (Latin-x)...
 770       */
 771
 772       if (i < 2)
 773       {
 774         DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (too long 2)");
 775
 776         return (-1);
 777       }
 778
 779       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 780       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 781       i -= 2;
 782
 783       DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x\n", (unsigned)ch,
 784                     dest[-2], dest[-1]));
 785     }
 786     else if (ch < 0x10000)
 787     {
 788      /*
 789       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 790       */
 791
 792       if (i < 3)
 793       {
 794         DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (too long 3)");
 795
 796         return (-1);
 797       }
 798
 799       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 800       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 801       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 802       i -= 3;
 803
 804       DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x %02x\n", (unsigned)ch,
 805                     dest[-3], dest[-2], dest[-1]));
 806     }
 807     else
 808     {
 809      /*
 810       * Four-octet UTF-8...
 811       */
 812
 813       if (i < 4)
 814         return (-1);
 815
 816       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 817       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 818       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 819       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 820       i -= 4;
 821
 822       DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x\n",
 823                     (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
 824     }
 825   }
 826
 827   *dest = '\0';
 828
 829   DEBUG_printf(("cupsUTF32ToUTF8: Returning %d\n", (int)(dest - start)));
 830
 831   return ((int)(dest - start));
 832 }
 833
 834
 835 /*
 836  * 'compare_wide()' - Compare key for wide (VBCS) match.
 837  */
 838
 839 static int
 840 compare_wide(const void *k1,            /* I - Key char */
 841              const void *k2)            /* I - Map char */
 842 {
 843   cups_vbcs_t   key;                    /* Legacy key character */
 844   cups_vbcs_t   map;                    /* Legacy map character */
 845
 846
 847   key = *((cups_vbcs_t *)k1);
 848   map = ((_cups_wide2uni_t *)k2)->widechar;
 849
 850   return ((int)(key - map));
 851 }
 852
 853
 854 /*
 855  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 856  */
 857
 858 static int                              /* O - Count or -1 on error */
 859 conv_sbcs_to_utf8(
 860     cups_utf8_t           *dest,        /* O - Target string */
 861     const cups_sbcs_t     *src,         /* I - Source string */
 862     int                   maxout,       /* I - Max output */
 863     const cups_encoding_t encoding)     /* I - Encoding */
 864 {
 865   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 866   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 867   cups_sbcs_t   legchar;                /* Legacy character value */
 868   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 869                 *workptr;               /* Pointer into string */
 870
 871
 872  /*
 873   * Find legacy charset map in cache...
 874   */
 875
 876   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 877     return (-1);
 878
 879  /*
 880   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 881   */
 882
 883   work[0] = 0xfeff;
 884   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 885   {
 886     legchar = *src++;
 887
 888    /*
 889     * Convert ASCII verbatim (optimization)...
 890     */
 891
 892     if (legchar < 0x80)
 893       *workptr++ = (cups_utf32_t)legchar;
 894     else
 895     {
 896      /*
 897       * Convert unknown character to Replacement Character...
 898       */
 899
 900       crow = cmap->char2uni + legchar;
 901
 902       if (!*crow)
 903         *workptr++ = 0xfffd;
 904       else
 905         *workptr++ = (cups_utf32_t)*crow;
 906     }
 907   }
 908
 909   *workptr = 0;
 910
 911  /*
 912   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 913   */
 914
 915   cmap->used --;
 916
 917   return (cupsUTF32ToUTF8(dest, work, maxout));
 918 }
 919
 920
 921 /*
 922  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 923  */
 924
 925 static int                              /* O - Count or -1 on error */
 926 conv_utf8_to_sbcs(
 927     cups_sbcs_t           *dest,        /* O - Target string */
 928     const cups_utf8_t     *src,         /* I - Source string */
 929     int                   maxout,       /* I - Max output */
 930     const cups_encoding_t encoding)     /* I - Encoding */
 931 {
 932   cups_sbcs_t   *start;                 /* Start of destination string */
 933   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 934   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 935   cups_utf32_t  unichar;                /* Character value */
 936   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 937                 *workptr;               /* Pointer into string */
 938
 939
 940  /*
 941   * Find legacy charset map in cache...
 942   */
 943
 944   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 945     return (-1);
 946
 947  /*
 948   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 949   */
 950
 951   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 952     return (-1);
 953
 954  /*
 955   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 956   */
 957
 958   for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
 959   {
 960     unichar = *workptr++;
 961     if (!unichar)
 962       break;
 963
 964    /*
 965     * Convert ASCII verbatim (optimization)...
 966     */
 967
 968     if (unichar < 0x80)
 969     {
 970       *dest++ = (cups_sbcs_t)unichar;
 971       continue;
 972     }
 973
 974    /*
 975     * Convert unknown character to visible replacement...
 976     */
 977
 978     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 979
 980     if (srow)
 981       srow += (int)(unichar & 0xff);
 982
 983     if (!srow || !*srow)
 984       *dest++ = '?';
 985     else
 986       *dest++ = *srow;
 987   }
 988
 989   *dest = '\0';
 990
 991   cmap->used --;
 992
 993   return ((int)(dest - start));
 994 }
 995
 996
 997 /*
 998  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 999  */
1000
1001 static int                              /* O - Count or -1 on error */
1002 conv_utf8_to_vbcs(
1003     cups_sbcs_t           *dest,        /* O - Target string */
1004     const cups_utf8_t     *src,         /* I - Source string */
1005     int                   maxout,       /* I - Max output */
1006     const cups_encoding_t encoding)     /* I - Encoding */
1007 {
1008   cups_sbcs_t   *start;                 /* Start of destination string */
1009   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
1010   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1011   cups_utf32_t  unichar;                /* Character value */
1012   cups_vbcs_t   legchar;                /* Legacy character value */
1013   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1014                 *workptr;               /* Pointer into string */
1015
1016
1017   DEBUG_printf(("conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
1018                 "encoding=%d)\n", dest, src, maxout, encoding));
1019
1020  /*
1021   * Find legacy charset map in cache...
1022   */
1023
1024   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1025   {
1026     DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (no charmap)");
1027
1028     return (-1);
1029   }
1030
1031  /*
1032   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1033   */
1034
1035   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
1036   {
1037     DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
1038
1039     return (-1);
1040   }
1041
1042  /*
1043   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1044   */
1045
1046   for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
1047   {
1048     unichar = *workptr++;
1049
1050    /*
1051     * Convert ASCII verbatim (optimization)...
1052     */
1053
1054     if (unichar < 0x80)
1055     {
1056       *dest++ = (cups_sbcs_t)unichar;
1057
1058       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X\n", (unsigned)unichar,
1059                     dest[-1]));
1060
1061       continue;
1062     }
1063
1064    /*
1065     * Convert unknown character to visible replacement...
1066     */
1067
1068     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1069
1070     if (vrow)
1071       vrow += (int)(unichar & 0xff);
1072
1073     if (!vrow || !*vrow)
1074       legchar = (cups_vbcs_t)'?';
1075     else
1076       legchar = (cups_vbcs_t)*vrow;
1077
1078    /*
1079     * Save n-byte legacy character...
1080     */
1081
1082     if (legchar > 0xffffff)
1083     {
1084       if (maxout < 5)
1085       {
1086         DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (out of space)");
1087
1088         return (-1);
1089       }
1090
1091       *dest++ = (cups_sbcs_t)(legchar >> 24);
1092       *dest++ = (cups_sbcs_t)(legchar >> 16);
1093       *dest++ = (cups_sbcs_t)(legchar >> 8);
1094       *dest++ = (cups_sbcs_t)legchar;
1095
1096       maxout -= 3;
1097
1098       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X\n",
1099                     (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
1100     }
1101     else if (legchar > 0xffff)
1102     {
1103       if (maxout < 4)
1104       {
1105         DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (out of space)");
1106
1107         return (-1);
1108       }
1109
1110       *dest++ = (cups_sbcs_t)(legchar >> 16);
1111       *dest++ = (cups_sbcs_t)(legchar >> 8);
1112       *dest++ = (cups_sbcs_t)legchar;
1113
1114       maxout -= 2;
1115
1116       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X %02X\n",
1117                     (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
1118     }
1119     else if (legchar > 0xff)
1120     {
1121       *dest++ = (cups_sbcs_t)(legchar >> 8);
1122       *dest++ = (cups_sbcs_t)legchar;
1123
1124       maxout --;
1125
1126       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X\n",
1127                     (unsigned)unichar, dest[-2], dest[-1]));
1128     }
1129     else
1130     {
1131       *dest++ = legchar;
1132
1133       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X\n",
1134                     (unsigned)unichar, dest[-1]));
1135     }
1136   }
1137
1138   *dest = '\0';
1139
1140   vmap->used --;
1141
1142   DEBUG_printf(("conv_utf8_to_vbcs: Returning %d characters\n",
1143                 (int)(dest - start)));
1144
1145   return ((int)(dest - start));
1146 }
1147
1148
1149 /*
1150  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1151  */
1152
1153 static int                              /* O - Count or -1 on error */
1154 conv_vbcs_to_utf8(
1155     cups_utf8_t           *dest,        /* O - Target string */
1156     const cups_sbcs_t     *src,         /* I - Source string */
1157     int                   maxout,       /* I - Max output */
1158     const cups_encoding_t encoding)     /* I - Encoding */
1159 {
1160   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1161   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1162   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1163   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1164   cups_vbcs_t   legchar;                /* Legacy character value */
1165   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1166                 *workptr;               /* Pointer into string */
1167
1168
1169  /*
1170   * Find legacy charset map in cache...
1171   */
1172
1173   DEBUG_printf(("conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)\n",
1174                 dest, src, maxout, encoding));
1175
1176   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1177   {
1178     DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1179
1180     return (-1);
1181   }
1182
1183  /*
1184   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1185   */
1186
1187   work[0] = 0xfeff;
1188   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1189   {
1190     legchar  = *src++;
1191     leadchar = (cups_sbcs_t)legchar;
1192
1193    /*
1194     * Convert ASCII verbatim (optimization)...
1195     */
1196
1197     if (legchar < 0x80)
1198     {
1199       *workptr++ = (cups_utf32_t)legchar;
1200
1201       DEBUG_printf(("conv_vbcs_to_utf8: %02X => %08X\n", src[-1],
1202                     (unsigned)legchar));
1203       continue;
1204     }
1205
1206    /*
1207     * Convert 2-byte legacy character...
1208     */
1209
1210     if (vmap->lead2char[(int)leadchar] == leadchar)
1211     {
1212       if (!*src)
1213       {
1214         DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string)");
1215
1216         return (-1);
1217       }
1218
1219       legchar = (legchar << 8) | *src++;
1220
1221      /*
1222       * Convert unknown character to Replacement Character...
1223       */
1224
1225       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1226       if (crow)
1227         crow += (int) (legchar & 0xff);
1228
1229       if (!crow || !*crow)
1230         *workptr++ = 0xfffd;
1231       else
1232         *workptr++ = (cups_utf32_t)*crow;
1233
1234       DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X => %08X\n",
1235                     src[-2], src[-1], (unsigned)workptr[-1]));
1236       continue;
1237     }
1238
1239    /*
1240     * Fetch 3-byte or 4-byte legacy character...
1241     */
1242
1243     if (vmap->lead3char[(int)leadchar] == leadchar)
1244     {
1245       if (!*src || !src[1])
1246       {
1247         DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string 2)");
1248
1249         return (-1);
1250       }
1251
1252       legchar = (legchar << 8) | *src++;
1253       legchar = (legchar << 8) | *src++;
1254     }
1255     else if (vmap->lead4char[(int)leadchar] == leadchar)
1256     {
1257       if (!*src || !src[1] || !src[2])
1258       {
1259         DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string 3)");
1260
1261         return (-1);
1262       }
1263
1264       legchar = (legchar << 8) | *src++;
1265       legchar = (legchar << 8) | *src++;
1266       legchar = (legchar << 8) | *src++;
1267     }
1268     else
1269     {
1270       DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (bad character)");
1271
1272       return (-1);
1273     }
1274
1275    /*
1276     * Find 3-byte or 4-byte legacy character...
1277     */
1278
1279     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1280                                            vmap->wide2uni,
1281                                            vmap->widecount,
1282                                            sizeof(_cups_wide2uni_t),
1283                                            compare_wide);
1284
1285    /*
1286     * Convert unknown character to Replacement Character...
1287     */
1288
1289     if (!wide2uni || !wide2uni->unichar)
1290       *workptr++ = 0xfffd;
1291     else
1292       *workptr++ = wide2uni->unichar;
1293
1294     if (vmap->lead3char[(int)leadchar] == leadchar)
1295       DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X %02X => %08X\n",
1296                     src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1297     else
1298       DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X\n",
1299                     src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1300   }
1301
1302   *workptr = 0;
1303
1304   vmap->used --;
1305
1306   DEBUG_printf(("conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8\n",
1307                 (int)(workptr - work)));
1308
1309  /*
1310   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1311   */
1312
1313   return (cupsUTF32ToUTF8(dest, work, maxout));
1314 }
1315
1316
1317 /*
1318  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1319  */
1320
1321 static void
1322 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1323 {
1324   int           i;                      /* Looping variable */
1325
1326
1327   for (i = 0; i < 256; i ++)
1328     if (cmap->uni2char[i])
1329       free(cmap->uni2char[i]);
1330
1331   free(cmap);
1332 }
1333
1334
1335 /*
1336  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1337  */
1338
1339 static void
1340 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1341 {
1342   int           i;                      /* Looping variable */
1343
1344
1345   for (i = 0; i < 256; i ++)
1346     if (vmap->char2uni[i])
1347       free(vmap->char2uni[i]);
1348
1349   for (i = 0; i < 256; i ++)
1350     if (vmap->uni2char[i])
1351       free(vmap->uni2char[i]);
1352
1353   if (vmap->wide2uni)
1354     free(vmap->wide2uni);
1355
1356   free(vmap);
1357 }
1358
1359
1360 /*
1361  * 'get_charmap()' - Lookup or get a character set map (private).
1362  *
1363  * This code handles single-byte (SBCS), double-byte (DBCS), and
1364  * variable-byte (VBCS) character sets _without_ charset escapes...
1365  * This code does not handle multiple-byte character sets (MBCS)
1366  * (such as ISO-2022-JP) with charset switching via escapes...
1367  */
1368
1369
1370 static void *                           /* O - Charset map pointer */
1371 get_charmap(
1372     const cups_encoding_t encoding)     /* I - Encoding */
1373 {
1374   char          filename[1024];         /* Filename for charset map file */
1375   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1376
1377
1378   DEBUG_printf(("get_charmap(encoding=%d)\n", encoding));
1379
1380  /*
1381   * Get the data directory and charset map name...
1382   */
1383
1384   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1385            cg->cups_datadir, _cupsEncodingName(encoding));
1386
1387   DEBUG_printf(("get_charmap: filename=\"%s\"\n", filename));
1388
1389  /*
1390   * Read charset map input file into cache...
1391   */
1392
1393   if (encoding < CUPS_ENCODING_SBCS_END)
1394     return (get_sbcs_charmap(encoding, filename));
1395   else if (encoding < CUPS_ENCODING_VBCS_END)
1396     return (get_vbcs_charmap(encoding, filename));
1397   else
1398     return (NULL);
1399 }
1400
1401
1402 /*
1403  * 'get_charmap_count()' - Count lines in a charmap file.
1404  */
1405
1406 static int                              /* O - Count or -1 on error */
1407 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1408 {
1409   int   count;                          /* Number of lines */
1410   char  line[256];                      /* Line from input map file */
1411
1412
1413  /*
1414   * Count lines in map input file...
1415   */
1416
1417   count = 0;
1418
1419   while (cupsFileGets(fp, line, sizeof(line)))
1420     if (line[0] == '0')
1421       count ++;
1422
1423  /*
1424   * Return the number of lines...
1425   */
1426
1427   if (count > 0)
1428     return (count);
1429   else
1430     return (-1);
1431 }
1432
1433
1434 /*
1435  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1436  */
1437
1438 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1439 get_sbcs_charmap(
1440     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1441     const char            *filename)    /* I - Charmap Filename */
1442 {
1443   unsigned long legchar;                /* Legacy character value */
1444   cups_utf32_t  unichar;                /* Unicode character value */
1445   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1446   cups_file_t   *fp;                    /* Charset map file pointer */
1447   char          *s;                     /* Line parsing pointer */
1448   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1449   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1450   char          line[256];              /* Line from charset map file */
1451
1452
1453  /*
1454   * See if we already have this SBCS charset map loaded...
1455   */
1456
1457   DEBUG_printf(("get_sbcs_charmap(encoding=%d, filename=\"%s\")\n", encoding,
1458                 filename));
1459
1460   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1461   {
1462     if (cmap->encoding == encoding)
1463     {
1464       cmap->used ++;
1465       DEBUG_printf(("get_sbcs_charmap: Returning existing cmap=%p\n", cmap));
1466
1467       return ((void *)cmap);
1468     }
1469   }
1470
1471  /*
1472   * Open SBCS charset map input file...
1473   */
1474
1475   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1476   {
1477     DEBUG_printf(("get_sbcs_charmap: Returning NULL (%s)\n", strerror(errno)));
1478
1479     return (NULL);
1480   }
1481
1482  /*
1483   * Allocate memory for SBCS charset map...
1484   */
1485
1486   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1487   {
1488     cupsFileClose(fp);
1489     DEBUG_puts("get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1490
1491     return (NULL);
1492   }
1493
1494   cmap->used ++;
1495   cmap->encoding = encoding;
1496
1497  /*
1498   * Save SBCS charset map into memory for transcoding...
1499   */
1500
1501   while (cupsFileGets(fp, line, sizeof(line)))
1502   {
1503     if (line[0] != '0')
1504       continue;
1505
1506     legchar = strtol(line, &s, 16);
1507     if (legchar < 0 || legchar > 0xff)
1508       goto sbcs_error;
1509
1510     unichar = strtol(s, NULL, 16);
1511     if (unichar < 0 || unichar > 0xffff)
1512       goto sbcs_error;
1513
1514    /*
1515     * Save legacy to Unicode mapping in direct lookup table...
1516     */
1517
1518     crow  = cmap->char2uni + legchar;
1519     *crow = (cups_ucs2_t)(unichar & 0xffff);
1520
1521    /*
1522     * Save Unicode to legacy mapping in indirect lookup table...
1523     */
1524
1525     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1526     if (!srow)
1527     {
1528       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1529       if (!srow)
1530         goto sbcs_error;
1531
1532       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1533     }
1534
1535     srow += unichar & 0xff;
1536
1537    /*
1538     * Convert Replacement Character to visible replacement...
1539     */
1540
1541     if (unichar == 0xfffd)
1542       legchar = (unsigned long)'?';
1543
1544    /*
1545     * First (oldest) legacy character uses Unicode mapping cell...
1546     */
1547
1548     if (!*srow)
1549       *srow = (cups_sbcs_t)legchar;
1550   }
1551
1552   cupsFileClose(fp);
1553
1554  /*
1555   * Add it to the cache and return...
1556   */
1557
1558   cmap->next = cmap_cache;
1559   cmap_cache = cmap;
1560
1561   DEBUG_printf(("get_sbcs_charmap: Returning new cmap=%p\n", cmap));
1562
1563   return (cmap);
1564
1565  /*
1566   * If we get here, there was an error in the cmap file...
1567   */
1568
1569   sbcs_error:
1570
1571   free_sbcs_charmap(cmap);
1572
1573   cupsFileClose(fp);
1574
1575   DEBUG_puts("get_sbcs_charmap: Returning NULL (Read/format error)");
1576
1577   return (NULL);
1578 }
1579
1580
1581 /*
1582  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1583  */
1584
1585 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1586 get_vbcs_charmap(
1587     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1588     const char            *filename)    /* I - Charmap Filename */
1589 {
1590   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1591   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1592   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1593   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1594   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1595   unsigned long legchar;                /* Legacy character value */
1596   cups_utf32_t  unichar;                /* Unicode character value */
1597   int           mapcount;               /* Count of lines in charmap file */
1598   cups_file_t   *fp;                    /* Charset map file pointer */
1599   char          *s;                     /* Line parsing pointer */
1600   char          line[256];              /* Line from charset map file */
1601   int           i;                      /* Loop variable */
1602   int           legacy;                 /* 32-bit legacy char */
1603
1604
1605   DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1606                 encoding, filename));
1607
1608  /*
1609   * See if we already have this DBCS/VBCS charset map loaded...
1610   */
1611
1612   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1613   {
1614     if (vmap->encoding == encoding)
1615     {
1616       vmap->used ++;
1617       DEBUG_printf(("get_vbcs_charmap: Returning existing vmap=%p\n", vmap));
1618
1619       return ((void *)vmap);
1620     }
1621   }
1622
1623  /*
1624   * Open VBCS charset map input file...
1625   */
1626
1627   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1628   {
1629     DEBUG_printf(("get_vbcs_charmap: Returning NULL (%s)\n", strerror(errno)));
1630
1631     return (NULL);
1632   }
1633
1634  /*
1635   * Count lines in charmap file...
1636   */
1637
1638   if ((mapcount = get_charmap_count(fp)) <= 0)
1639   {
1640     DEBUG_puts("get_vbcs_charmap: Unable to get charmap count!");
1641
1642     cupsFileClose(fp);
1643
1644     return (NULL);
1645   }
1646
1647   DEBUG_printf(("get_vbcs_charmap: mapcount=%d\n", mapcount));
1648
1649  /*
1650   * Allocate memory for DBCS/VBCS charset map...
1651   */
1652
1653   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1654   {
1655     DEBUG_puts("get_vbcs_charmap: Unable to allocate memory!");
1656
1657     cupsFileClose(fp);
1658
1659     return (NULL);
1660   }
1661
1662   vmap->used ++;
1663   vmap->encoding = encoding;
1664
1665  /*
1666   * Save DBCS/VBCS charset map into memory for transcoding...
1667   */
1668
1669   leadchar = 0;
1670   wide2uni = NULL;
1671
1672   cupsFileRewind(fp);
1673
1674   i      = 0;
1675   legacy = 0;
1676
1677   while (cupsFileGets(fp, line, sizeof(line)))
1678   {
1679     if (line[0] != '0')
1680       continue;
1681
1682     legchar = strtoul(line, &s, 16);
1683     if (legchar == ULONG_MAX)
1684       goto vbcs_error;
1685
1686     unichar = strtol(s, NULL, 16);
1687     if (unichar < 0 || unichar > 0xffff)
1688       goto vbcs_error;
1689
1690     i ++;
1691
1692 /*    DEBUG_printf(("    i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1693                   legchar, (unsigned)unichar)); */
1694
1695    /*
1696     * Save lead char of 2/3/4-byte legacy char...
1697     */
1698
1699     if (legchar > 0xffffff)
1700     {
1701       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1702       vmap->lead4char[leadchar] = leadchar;
1703     }
1704     else if (legchar > 0xffff)
1705     {
1706       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1707       vmap->lead3char[leadchar] = leadchar;
1708     }
1709     else if (legchar > 0xff)
1710     {
1711       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1712       vmap->lead2char[leadchar] = leadchar;
1713     }
1714
1715    /*
1716     * Save Legacy to Unicode mapping...
1717     */
1718
1719     if (legchar <= 0xffff)
1720     {
1721      /*
1722       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1723       */
1724
1725       crow = vmap->char2uni[(int)leadchar];
1726       if (!crow)
1727       {
1728         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1729         if (!crow)
1730           goto vbcs_error;
1731
1732         vmap->char2uni[(int)leadchar] = crow;
1733       }
1734
1735       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1736     }
1737     else
1738     {
1739      /*
1740       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1741       */
1742
1743       if (!legacy)
1744       {
1745         legacy          = 1;
1746         vmap->widecount = (mapcount - i + 1);
1747         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1748                                                      sizeof(_cups_wide2uni_t));
1749         if (!wide2uni)
1750           goto vbcs_error;
1751
1752         vmap->wide2uni = wide2uni;
1753       }
1754
1755       wide2uni->widechar = (cups_vbcs_t)legchar;
1756       wide2uni->unichar  = (cups_ucs2_t)unichar;
1757       wide2uni ++;
1758     }
1759
1760    /*
1761     * Save Unicode to legacy mapping in indirect lookup table...
1762     */
1763
1764     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1765     if (!vrow)
1766     {
1767       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1768       if (!vrow)
1769         goto vbcs_error;
1770
1771       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1772     }
1773
1774     vrow += (int)(unichar & 0xff);
1775
1776    /*
1777     * Convert Replacement Character to visible replacement...
1778     */
1779
1780     if (unichar == 0xfffd)
1781       legchar = (unsigned long)'?';
1782
1783    /*
1784     * First (oldest) legacy character uses Unicode mapping cell...
1785     */
1786
1787     if (!*vrow)
1788       *vrow = (cups_vbcs_t)legchar;
1789   }
1790
1791   vmap->charcount = (i - vmap->widecount);
1792
1793   cupsFileClose(fp);
1794
1795  /*
1796   * Add it to the cache and return...
1797   */
1798
1799   vmap->next = vmap_cache;
1800   vmap_cache = vmap;
1801
1802   DEBUG_printf(("get_vbcs_charmap: Returning new vmap=%p\n", vmap));
1803
1804   return (vmap);
1805
1806  /*
1807   * If we get here, the file contains errors...
1808   */
1809
1810   vbcs_error:
1811
1812   free_vbcs_charmap(vmap);
1813
1814   cupsFileClose(fp);
1815
1816   DEBUG_puts("get_vbcs_charmap: Returning NULL (Read/format error)");
1817
1818   return (NULL);
1819 }
1820
1821
1822 /*
1823  * End of "$Id: transcode.c 6649 2007-07-11 21:46:42Z mike $"
1824  */