cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
   3  *
   4  *   Transcoding support for CUPS.
   5  *
   6  *   Copyright 2007-2010 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  *   This file is subject to the Apple OS-Developed Software exception.
  16  *
  17  * Contents:
  18  *
  19  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  20  *   _cupsCharmapFree()  - Free a character set map.
  21  *   _cupsCharmapGet()   - Get a character set map.
  22  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  23  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  24  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  25  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  26  *   compare_wide()      - Compare key for wide (VBCS) match.
  27  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  28  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  29  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  30  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  31  *   free_sbcs_charmap() - Free memory used by a single byte character set.
  32  *   free_vbcs_charmap() - Free memory used by a variable byte character set.
  33  *   get_charmap()       - Lookup or get a character set map (private).
  34  *   get_charmap_count() - Count lines in a charmap file.
  35  *   get_sbcs_charmap()  - Get SBCS Charmap.
  36  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  37  */
  38
  39 /*
  40  * Include necessary headers...
  41  */
  42
  43 #include "cups-private.h"
  44 #include <limits.h>
  45 #include <stdlib.h>
  46 #include <time.h>
  47
  48
  49 /*
  50  * Local globals...
  51  */
  52
  53 #ifdef HAVE_PTHREAD_H
  54 static pthread_mutex_t  map_mutex = PTHREAD_MUTEX_INITIALIZER;
  55                                         /* Mutex to control access to maps */
  56 #endif /* HAVE_PTHREAD_H */
  57 static _cups_cmap_t     *cmap_cache = NULL;
  58                                         /* SBCS Charmap Cache */
  59 static _cups_vmap_t     *vmap_cache = NULL;
  60                                         /* VBCS Charmap Cache */
  61
  62
  63 /*
  64  * Local functions...
  65  */
  66
  67 static int              compare_wide(const void *k1, const void *k2);
  68 static int              conv_sbcs_to_utf8(cups_utf8_t *dest,
  69                                           const cups_sbcs_t *src,
  70                                           int maxout,
  71                                           const cups_encoding_t encoding);
  72 static int              conv_utf8_to_sbcs(cups_sbcs_t *dest,
  73                                           const cups_utf8_t *src,
  74                                           int maxout,
  75                                           const cups_encoding_t encoding);
  76 static int              conv_utf8_to_vbcs(cups_sbcs_t *dest,
  77                                           const cups_utf8_t *src,
  78                                           int maxout,
  79                                           const cups_encoding_t encoding);
  80 static int              conv_vbcs_to_utf8(cups_utf8_t *dest,
  81                                           const cups_sbcs_t *src,
  82                                           int maxout,
  83                                           const cups_encoding_t encoding);
  84 static void             free_sbcs_charmap(_cups_cmap_t *sbcs);
  85 static void             free_vbcs_charmap(_cups_vmap_t *vbcs);
  86 static void             *get_charmap(const cups_encoding_t encoding);
  87 static int              get_charmap_count(cups_file_t *fp);
  88 static _cups_cmap_t     *get_sbcs_charmap(const cups_encoding_t encoding,
  89                                           const char *filename);
  90 static _cups_vmap_t     *get_vbcs_charmap(const cups_encoding_t encoding,
  91                                           const char *filename);
  92
  93
  94 /*
  95  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
  96  */
  97
  98 void
  99 _cupsCharmapFlush(void)
 100 {
 101   _cups_cmap_t  *cmap,                  /* Legacy SBCS / Unicode Charset Map */
 102                 *cnext;                 /* Next Legacy SBCS Charset Map */
 103   _cups_vmap_t  *vmap,                  /* Legacy VBCS / Unicode Charset Map */
 104                 *vnext;                 /* Next Legacy VBCS Charset Map */
 105
 106
 107 #ifdef HAVE_PTHREAD_H
 108   pthread_mutex_lock(&map_mutex);
 109 #endif /* HAVE_PTHREAD_H */
 110
 111  /*
 112   * Loop through SBCS charset map cache, free all memory...
 113   */
 114
 115   for (cmap = cmap_cache; cmap; cmap = cnext)
 116   {
 117     cnext = cmap->next;
 118
 119     free_sbcs_charmap(cmap);
 120   }
 121
 122   cmap_cache = NULL;
 123
 124  /*
 125   * Loop through DBCS/VBCS charset map cache, free all memory...
 126   */
 127
 128   for (vmap = vmap_cache; vmap; vmap = vnext)
 129   {
 130     vnext = vmap->next;
 131
 132     free_vbcs_charmap(vmap);
 133   }
 134
 135   vmap_cache = NULL;
 136
 137 #ifdef HAVE_PTHREAD_H
 138   pthread_mutex_unlock(&map_mutex);
 139 #endif /* HAVE_PTHREAD_H */
 140 }
 141
 142
 143 /*
 144  * '_cupsCharmapFree()' - Free a character set map.
 145  *
 146  * This does not actually free; use '_cupsCharmapFlush()' for that.
 147  */
 148
 149 void
 150 _cupsCharmapFree(
 151     const cups_encoding_t encoding)     /* I - Encoding */
 152 {
 153   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 154   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 155
 156
 157  /*
 158   * See if we already have this SBCS charset map loaded...
 159   */
 160
 161 #ifdef HAVE_PTHREAD_H
 162   pthread_mutex_lock(&map_mutex);
 163 #endif /* HAVE_PTHREAD_H */
 164
 165   for (cmap = cmap_cache; cmap; cmap = cmap->next)
 166   {
 167     if (cmap->encoding == encoding)
 168     {
 169       if (cmap->used > 0)
 170         cmap->used --;
 171       break;
 172     }
 173   }
 174
 175  /*
 176   * See if we already have this DBCS/VBCS charset map loaded...
 177   */
 178
 179   for (vmap = vmap_cache; vmap; vmap = vmap->next)
 180   {
 181     if (vmap->encoding == encoding)
 182     {
 183       if (vmap->used > 0)
 184         vmap->used --;
 185       break;
 186     }
 187   }
 188
 189 #ifdef HAVE_PTHREAD_H
 190   pthread_mutex_unlock(&map_mutex);
 191 #endif /* HAVE_PTHREAD_H */
 192 }
 193
 194
 195 /*
 196  * '_cupsCharmapGet()' - Get a character set map.
 197  *
 198  * This code handles single-byte (SBCS), double-byte (DBCS), and
 199  * variable-byte (VBCS) character sets _without_ charset escapes...
 200  * This code does not handle multiple-byte character sets (MBCS)
 201  * (such as ISO-2022-JP) with charset switching via escapes...
 202  */
 203
 204 void *                                  /* O - Charset map pointer */
 205 _cupsCharmapGet(
 206     const cups_encoding_t encoding)     /* I - Encoding */
 207 {
 208   void  *charmap;                       /* Charset map pointer */
 209
 210
 211   DEBUG_printf(("7_cupsCharmapGet(encoding=%d)", encoding));
 212
 213  /*
 214   * Check for valid arguments...
 215   */
 216
 217   if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 218   {
 219     DEBUG_puts("8_cupsCharmapGet: Bad encoding, returning NULL!");
 220     return (NULL);
 221   }
 222
 223  /*
 224   * Lookup or get the charset map pointer and return...
 225   */
 226
 227 #ifdef HAVE_PTHREAD_H
 228   pthread_mutex_lock(&map_mutex);
 229 #endif /* HAVE_PTHREAD_H */
 230
 231   charmap = get_charmap(encoding);
 232
 233 #ifdef HAVE_PTHREAD_H
 234   pthread_mutex_unlock(&map_mutex);
 235 #endif /* HAVE_PTHREAD_H */
 236
 237   return (charmap);
 238 }
 239
 240
 241 /*
 242  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 243  *
 244  * This code handles single-byte (SBCS), double-byte (DBCS), and
 245  * variable-byte (VBCS) character sets _without_ charset escapes...
 246  * This code does not handle multiple-byte character sets (MBCS)
 247  * (such as ISO-2022-JP) with charset switching via escapes...
 248  */
 249
 250 int                                     /* O - Count or -1 on error */
 251 cupsCharsetToUTF8(
 252     cups_utf8_t *dest,                  /* O - Target string */
 253     const char *src,                    /* I - Source string */
 254     const int maxout,                   /* I - Max output */
 255     const cups_encoding_t encoding)     /* I - Encoding */
 256 {
 257   int   bytes;                          /* Number of bytes converted */
 258
 259
 260  /*
 261   * Check for valid arguments...
 262   */
 263
 264   DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
 265                 dest, src, maxout, encoding));
 266
 267   if (dest)
 268     *dest = '\0';
 269
 270   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 271   {
 272     DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
 273     return (-1);
 274   }
 275
 276  /*
 277   * Handle identity conversions...
 278   */
 279
 280   if (encoding == CUPS_UTF8 ||
 281       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 282   {
 283     strlcpy((char *)dest, src, maxout);
 284     return ((int)strlen((char *)dest));
 285   }
 286
 287  /*
 288   * Handle ISO-8859-1 to UTF-8 directly...
 289   */
 290
 291   if (encoding == CUPS_ISO8859_1)
 292   {
 293     int         ch;                     /* Character from string */
 294     cups_utf8_t *destptr,               /* Pointer into UTF-8 buffer */
 295                 *destend;               /* End of UTF-8 buffer */
 296
 297
 298     destptr = dest;
 299     destend = dest + maxout - 2;
 300
 301     while (*src && destptr < destend)
 302     {
 303       ch = *src++ & 255;
 304
 305       if (ch & 128)
 306       {
 307         *destptr++ = 0xc0 | (ch >> 6);
 308         *destptr++ = 0x80 | (ch & 0x3f);
 309       }
 310       else
 311         *destptr++ = ch;
 312     }
 313
 314     *destptr = '\0';
 315
 316     return ((int)(destptr - dest));
 317   }
 318
 319  /*
 320   * Convert input legacy charset to UTF-8...
 321   */
 322
 323 #ifdef HAVE_PTHREAD_H
 324   pthread_mutex_lock(&map_mutex);
 325 #endif /* HAVE_PTHREAD_H */
 326
 327   if (encoding < CUPS_ENCODING_SBCS_END)
 328     bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 329   else
 330     bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 331
 332 #ifdef HAVE_PTHREAD_H
 333   pthread_mutex_unlock(&map_mutex);
 334 #endif /* HAVE_PTHREAD_H */
 335
 336   return (bytes);
 337 }
 338
 339
 340 /*
 341  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 342  *
 343  * This code handles single-byte (SBCS), double-byte (DBCS), and
 344  * variable-byte (VBCS) character sets _without_ charset escapes...
 345  * This code does not handle multiple-byte character sets (MBCS)
 346  * (such as ISO-2022-JP) with charset switching via escapes...
 347  */
 348
 349 int                                     /* O - Count or -1 on error */
 350 cupsUTF8ToCharset(
 351     char                  *dest,        /* O - Target string */
 352     const cups_utf8_t     *src,         /* I - Source string */
 353     const int             maxout,       /* I - Max output */
 354     const cups_encoding_t encoding)     /* I - Encoding */
 355 {
 356   int   bytes;                          /* Number of bytes converted */
 357
 358
 359  /*
 360   * Check for valid arguments...
 361   */
 362
 363   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 364   {
 365     if (dest)
 366       *dest = '\0';
 367
 368     return (-1);
 369   }
 370
 371  /*
 372   * Handle identity conversions...
 373   */
 374
 375   if (encoding == CUPS_UTF8 ||
 376       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 377   {
 378     strlcpy(dest, (char *)src, maxout);
 379     return ((int)strlen(dest));
 380   }
 381
 382  /*
 383   * Handle UTF-8 to ISO-8859-1 directly...
 384   */
 385
 386   if (encoding == CUPS_ISO8859_1)
 387   {
 388     int         ch;                     /* Character from string */
 389     char        *destptr,               /* Pointer into ISO-8859-1 buffer */
 390                 *destend;               /* End of ISO-8859-1 buffer */
 391
 392
 393     destptr = dest;
 394     destend = dest + maxout - 1;
 395
 396     while (*src && destptr < destend)
 397     {
 398       ch = *src++;
 399
 400       if ((ch & 0xe0) == 0xc0)
 401       {
 402         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 403
 404         if (ch < 256)
 405           *destptr++ = ch;
 406         else
 407           *destptr++ = '?';
 408       }
 409       else if ((ch & 0xf0) == 0xe0 ||
 410                (ch & 0xf8) == 0xf0)
 411         *destptr++ = '?';
 412       else if (!(ch & 0x80))
 413         *destptr++ = ch;
 414     }
 415
 416     *destptr = '\0';
 417
 418     return ((int)(destptr - dest));
 419   }
 420
 421  /*
 422   * Convert input UTF-8 to legacy charset...
 423   */
 424
 425 #ifdef HAVE_PTHREAD_H
 426   pthread_mutex_lock(&map_mutex);
 427 #endif /* HAVE_PTHREAD_H */
 428
 429   if (encoding < CUPS_ENCODING_SBCS_END)
 430     bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 431   else
 432     bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 433
 434 #ifdef HAVE_PTHREAD_H
 435   pthread_mutex_unlock(&map_mutex);
 436 #endif /* HAVE_PTHREAD_H */
 437
 438   return (bytes);
 439 }
 440
 441
 442 /*
 443  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 444  *
 445  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 446  *
 447  *   UTF-32 char     UTF-8 char(s)
 448  *   --------------------------------------------------
 449  *        0 to 127 = 0xxxxxxx (US-ASCII)
 450  *     128 to 2047 = 110xxxxx 10yyyyyy
 451  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 452  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 453  *
 454  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 455  * which would convert to five- or six-octet UTF-8 sequences...
 456  */
 457
 458 int                                     /* O - Count or -1 on error */
 459 cupsUTF8ToUTF32(
 460     cups_utf32_t      *dest,            /* O - Target string */
 461     const cups_utf8_t *src,             /* I - Source string */
 462     const int         maxout)           /* I - Max output */
 463 {
 464   int           i;                      /* Looping variable */
 465   cups_utf8_t   ch;                     /* Character value */
 466   cups_utf8_t   next;                   /* Next character value */
 467   cups_utf32_t  ch32;                   /* UTF-32 character value */
 468
 469
 470  /*
 471   * Check for valid arguments and clear output...
 472   */
 473
 474   DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
 475                 src, maxout));
 476
 477   if (dest)
 478     *dest = 0;
 479
 480   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 481   {
 482     DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
 483
 484     return (-1);
 485   }
 486
 487  /*
 488   * Convert input UTF-8 to output UTF-32...
 489   */
 490
 491   for (i = maxout - 1; *src && i > 0; i --)
 492   {
 493     ch = *src++;
 494
 495    /*
 496     * Convert UTF-8 character(s) to UTF-32 character...
 497     */
 498
 499     if (!(ch & 0x80))
 500     {
 501      /*
 502       * One-octet UTF-8 <= 127 (US-ASCII)...
 503       */
 504
 505       *dest++ = ch;
 506
 507       DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
 508       continue;
 509     }
 510     else if ((ch & 0xe0) == 0xc0)
 511     {
 512      /*
 513       * Two-octet UTF-8 <= 2047 (Latin-x)...
 514       */
 515
 516       next = *src++;
 517       if ((next & 0xc0) != 0x80)
 518       {
 519         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 520
 521         return (-1);
 522       }
 523
 524       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 525
 526      /*
 527       * Check for non-shortest form (invalid UTF-8)...
 528       */
 529
 530       if (ch32 < 0x80)
 531       {
 532         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 533
 534         return (-1);
 535       }
 536
 537       *dest++ = ch32;
 538
 539       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
 540                     src[-2], src[-1], (unsigned)ch32));
 541     }
 542     else if ((ch & 0xf0) == 0xe0)
 543     {
 544      /*
 545       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 546       */
 547
 548       next = *src++;
 549       if ((next & 0xc0) != 0x80)
 550       {
 551         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 552
 553         return (-1);
 554       }
 555
 556       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 557
 558       next = *src++;
 559       if ((next & 0xc0) != 0x80)
 560       {
 561         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 562
 563         return (-1);
 564       }
 565
 566       ch32 = (ch32 << 6) | (next & 0x3f);
 567
 568      /*
 569       * Check for non-shortest form (invalid UTF-8)...
 570       */
 571
 572       if (ch32 < 0x800)
 573       {
 574         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 575
 576         return (-1);
 577       }
 578
 579       *dest++ = ch32;
 580
 581       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
 582                     src[-3], src[-2], src[-1], (unsigned)ch32));
 583     }
 584     else if ((ch & 0xf8) == 0xf0)
 585     {
 586      /*
 587       * Four-octet UTF-8...
 588       */
 589
 590       next = *src++;
 591       if ((next & 0xc0) != 0x80)
 592       {
 593         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 594
 595         return (-1);
 596       }
 597
 598       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 599
 600       next = *src++;
 601       if ((next & 0xc0) != 0x80)
 602       {
 603         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 604
 605         return (-1);
 606       }
 607
 608       ch32 = (ch32 << 6) | (next & 0x3f);
 609
 610       next = *src++;
 611       if ((next & 0xc0) != 0x80)
 612       {
 613         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 614
 615         return (-1);
 616       }
 617
 618       ch32 = (ch32 << 6) | (next & 0x3f);
 619
 620      /*
 621       * Check for non-shortest form (invalid UTF-8)...
 622       */
 623
 624       if (ch32 < 0x10000)
 625       {
 626         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 627
 628         return (-1);
 629       }
 630
 631       *dest++ = ch32;
 632
 633       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
 634                     src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
 635     }
 636     else
 637     {
 638      /*
 639       * More than 4-octet (invalid UTF-8 sequence)...
 640       */
 641
 642       DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 643
 644       return (-1);
 645     }
 646
 647    /*
 648     * Check for UTF-16 surrogate (illegal UTF-8)...
 649     */
 650
 651     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 652       return (-1);
 653   }
 654
 655   *dest = 0;
 656
 657   DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
 658
 659   return (maxout - 1 - i);
 660 }
 661
 662
 663 /*
 664  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 665  *
 666  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 667  *
 668  *   UTF-32 char     UTF-8 char(s)
 669  *   --------------------------------------------------
 670  *        0 to 127 = 0xxxxxxx (US-ASCII)
 671  *     128 to 2047 = 110xxxxx 10yyyyyy
 672  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 673  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 674  *
 675  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 676  * which would convert to five- or six-octet UTF-8 sequences...
 677  */
 678
 679 int                                     /* O - Count or -1 on error */
 680 cupsUTF32ToUTF8(
 681     cups_utf8_t        *dest,           /* O - Target string */
 682     const cups_utf32_t *src,            /* I - Source string */
 683     const int          maxout)          /* I - Max output */
 684 {
 685   cups_utf8_t   *start;                 /* Start of destination string */
 686   int           i;                      /* Looping variable */
 687   int           swap;                   /* Byte-swap input to output */
 688   cups_utf32_t  ch;                     /* Character value */
 689
 690
 691  /*
 692   * Check for valid arguments and clear output...
 693   */
 694
 695   DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
 696                 maxout));
 697
 698   if (dest)
 699     *dest = '\0';
 700
 701   if (!dest || !src || maxout < 1)
 702   {
 703     DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
 704
 705     return (-1);
 706   }
 707
 708  /*
 709   * Check for leading BOM in UTF-32 and inverted BOM...
 710   */
 711
 712   start = dest;
 713   swap  = *src == 0xfffe0000;
 714
 715   DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
 716
 717   if (*src == 0xfffe0000 || *src == 0xfeff)
 718     src ++;
 719
 720  /*
 721   * Convert input UTF-32 to output UTF-8...
 722   */
 723
 724   for (i = maxout - 1; *src && i > 0;)
 725   {
 726     ch = *src++;
 727
 728    /*
 729     * Byte swap input UTF-32, if necessary...
 730     * (only byte-swapping 24 of 32 bits)
 731     */
 732
 733     if (swap)
 734       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 735
 736    /*
 737     * Check for beyond Plane 16 (invalid UTF-32)...
 738     */
 739
 740     if (ch > 0x10ffff)
 741     {
 742       DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
 743
 744       return (-1);
 745     }
 746
 747    /*
 748     * Convert UTF-32 character to UTF-8 character(s)...
 749     */
 750
 751     if (ch < 0x80)
 752     {
 753      /*
 754       * One-octet UTF-8 <= 127 (US-ASCII)...
 755       */
 756
 757       *dest++ = (cups_utf8_t)ch;
 758       i --;
 759
 760       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
 761     }
 762     else if (ch < 0x800)
 763     {
 764      /*
 765       * Two-octet UTF-8 <= 2047 (Latin-x)...
 766       */
 767
 768       if (i < 2)
 769       {
 770         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
 771
 772         return (-1);
 773       }
 774
 775       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 776       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 777       i -= 2;
 778
 779       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
 780                     dest[-2], dest[-1]));
 781     }
 782     else if (ch < 0x10000)
 783     {
 784      /*
 785       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 786       */
 787
 788       if (i < 3)
 789       {
 790         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
 791
 792         return (-1);
 793       }
 794
 795       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 796       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 797       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 798       i -= 3;
 799
 800       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
 801                     dest[-3], dest[-2], dest[-1]));
 802     }
 803     else
 804     {
 805      /*
 806       * Four-octet UTF-8...
 807       */
 808
 809       if (i < 4)
 810       {
 811         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
 812
 813         return (-1);
 814       }
 815
 816       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 817       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 818       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 819       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 820       i -= 4;
 821
 822       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
 823                     (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
 824     }
 825   }
 826
 827   *dest = '\0';
 828
 829   DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
 830
 831   return ((int)(dest - start));
 832 }
 833
 834
 835 /*
 836  * 'compare_wide()' - Compare key for wide (VBCS) match.
 837  */
 838
 839 static int
 840 compare_wide(const void *k1,            /* I - Key char */
 841              const void *k2)            /* I - Map char */
 842 {
 843   cups_vbcs_t   key;                    /* Legacy key character */
 844   cups_vbcs_t   map;                    /* Legacy map character */
 845
 846
 847   key = *((cups_vbcs_t *)k1);
 848   map = ((_cups_wide2uni_t *)k2)->widechar;
 849
 850   return ((int)(key - map));
 851 }
 852
 853
 854 /*
 855  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 856  */
 857
 858 static int                              /* O - Count or -1 on error */
 859 conv_sbcs_to_utf8(
 860     cups_utf8_t           *dest,        /* O - Target string */
 861     const cups_sbcs_t     *src,         /* I - Source string */
 862     int                   maxout,       /* I - Max output */
 863     const cups_encoding_t encoding)     /* I - Encoding */
 864 {
 865   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 866   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 867   cups_sbcs_t   legchar;                /* Legacy character value */
 868   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 869                 *workptr;               /* Pointer into string */
 870
 871
 872  /*
 873   * Find legacy charset map in cache...
 874   */
 875
 876   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 877     return (-1);
 878
 879  /*
 880   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 881   */
 882
 883   work[0] = 0xfeff;
 884   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 885   {
 886     legchar = *src++;
 887
 888    /*
 889     * Convert ASCII verbatim (optimization)...
 890     */
 891
 892     if (legchar < 0x80)
 893       *workptr++ = (cups_utf32_t)legchar;
 894     else
 895     {
 896      /*
 897       * Convert unknown character to Replacement Character...
 898       */
 899
 900       crow = cmap->char2uni + legchar;
 901
 902       if (!*crow)
 903         *workptr++ = 0xfffd;
 904       else
 905         *workptr++ = (cups_utf32_t)*crow;
 906     }
 907   }
 908
 909   *workptr = 0;
 910
 911  /*
 912   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 913   */
 914
 915   cmap->used --;
 916
 917   return (cupsUTF32ToUTF8(dest, work, maxout));
 918 }
 919
 920
 921 /*
 922  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 923  */
 924
 925 static int                              /* O - Count or -1 on error */
 926 conv_utf8_to_sbcs(
 927     cups_sbcs_t           *dest,        /* O - Target string */
 928     const cups_utf8_t     *src,         /* I - Source string */
 929     int                   maxout,       /* I - Max output */
 930     const cups_encoding_t encoding)     /* I - Encoding */
 931 {
 932   cups_sbcs_t   *start;                 /* Start of destination string */
 933   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 934   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 935   cups_utf32_t  unichar;                /* Character value */
 936   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 937                 *workptr;               /* Pointer into string */
 938
 939
 940  /*
 941   * Find legacy charset map in cache...
 942   */
 943
 944   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 945     return (-1);
 946
 947  /*
 948   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 949   */
 950
 951   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 952     return (-1);
 953
 954  /*
 955   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 956   */
 957
 958   for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
 959   {
 960     unichar = *workptr++;
 961     if (!unichar)
 962       break;
 963
 964    /*
 965     * Convert ASCII verbatim (optimization)...
 966     */
 967
 968     if (unichar < 0x80)
 969     {
 970       *dest++ = (cups_sbcs_t)unichar;
 971       continue;
 972     }
 973
 974    /*
 975     * Convert unknown character to visible replacement...
 976     */
 977
 978     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 979
 980     if (srow)
 981       srow += (int)(unichar & 0xff);
 982
 983     if (!srow || !*srow)
 984       *dest++ = '?';
 985     else
 986       *dest++ = *srow;
 987   }
 988
 989   *dest = '\0';
 990
 991   cmap->used --;
 992
 993   return ((int)(dest - start));
 994 }
 995
 996
 997 /*
 998  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 999  */
1000
1001 static int                              /* O - Count or -1 on error */
1002 conv_utf8_to_vbcs(
1003     cups_sbcs_t           *dest,        /* O - Target string */
1004     const cups_utf8_t     *src,         /* I - Source string */
1005     int                   maxout,       /* I - Max output */
1006     const cups_encoding_t encoding)     /* I - Encoding */
1007 {
1008   cups_sbcs_t   *start;                 /* Start of destination string */
1009   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
1010   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1011   cups_utf32_t  unichar;                /* Character value */
1012   cups_vbcs_t   legchar;                /* Legacy character value */
1013   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1014                 *workptr;               /* Pointer into string */
1015
1016
1017   DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
1018                 "encoding=%d)", dest, src, maxout, encoding));
1019
1020  /*
1021   * Find legacy charset map in cache...
1022   */
1023
1024   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1025   {
1026     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
1027
1028     return (-1);
1029   }
1030
1031  /*
1032   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1033   */
1034
1035   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
1036   {
1037     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
1038
1039     return (-1);
1040   }
1041
1042  /*
1043   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1044   */
1045
1046   for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
1047   {
1048     unichar = *workptr++;
1049
1050    /*
1051     * Convert ASCII verbatim (optimization)...
1052     */
1053
1054     if (unichar < 0x80)
1055     {
1056       *dest++ = (cups_sbcs_t)unichar;
1057
1058       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
1059                     dest[-1]));
1060
1061       continue;
1062     }
1063
1064    /*
1065     * Convert unknown character to visible replacement...
1066     */
1067
1068     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1069
1070     if (vrow)
1071       vrow += (int)(unichar & 0xff);
1072
1073     if (!vrow || !*vrow)
1074       legchar = (cups_vbcs_t)'?';
1075     else
1076       legchar = (cups_vbcs_t)*vrow;
1077
1078    /*
1079     * Save n-byte legacy character...
1080     */
1081
1082     if (legchar > 0xffffff)
1083     {
1084       if (maxout < 5)
1085       {
1086         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
1087
1088         return (-1);
1089       }
1090
1091       *dest++ = (cups_sbcs_t)(legchar >> 24);
1092       *dest++ = (cups_sbcs_t)(legchar >> 16);
1093       *dest++ = (cups_sbcs_t)(legchar >> 8);
1094       *dest++ = (cups_sbcs_t)legchar;
1095
1096       maxout -= 3;
1097
1098       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
1099                     (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
1100     }
1101     else if (legchar > 0xffff)
1102     {
1103       if (maxout < 4)
1104       {
1105         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
1106
1107         return (-1);
1108       }
1109
1110       *dest++ = (cups_sbcs_t)(legchar >> 16);
1111       *dest++ = (cups_sbcs_t)(legchar >> 8);
1112       *dest++ = (cups_sbcs_t)legchar;
1113
1114       maxout -= 2;
1115
1116       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
1117                     (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
1118     }
1119     else if (legchar > 0xff)
1120     {
1121       *dest++ = (cups_sbcs_t)(legchar >> 8);
1122       *dest++ = (cups_sbcs_t)legchar;
1123
1124       maxout --;
1125
1126       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
1127                     (unsigned)unichar, dest[-2], dest[-1]));
1128     }
1129     else
1130     {
1131       *dest++ = (cups_sbcs_t)legchar;
1132
1133       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
1134                     (unsigned)unichar, dest[-1]));
1135     }
1136   }
1137
1138   *dest = '\0';
1139
1140   vmap->used --;
1141
1142   DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
1143                 (int)(dest - start)));
1144
1145   return ((int)(dest - start));
1146 }
1147
1148
1149 /*
1150  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1151  */
1152
1153 static int                              /* O - Count or -1 on error */
1154 conv_vbcs_to_utf8(
1155     cups_utf8_t           *dest,        /* O - Target string */
1156     const cups_sbcs_t     *src,         /* I - Source string */
1157     int                   maxout,       /* I - Max output */
1158     const cups_encoding_t encoding)     /* I - Encoding */
1159 {
1160   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1161   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1162   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1163   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1164   cups_vbcs_t   legchar;                /* Legacy character value */
1165   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1166                 *workptr;               /* Pointer into string */
1167
1168
1169  /*
1170   * Find legacy charset map in cache...
1171   */
1172
1173   DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
1174                 dest, src, maxout, encoding));
1175
1176   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1177   {
1178     DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1179
1180     return (-1);
1181   }
1182
1183  /*
1184   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1185   */
1186
1187   work[0] = 0xfeff;
1188   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1189   {
1190     legchar  = *src++;
1191     leadchar = (cups_sbcs_t)legchar;
1192
1193    /*
1194     * Convert ASCII verbatim (optimization)...
1195     */
1196
1197     if (legchar < 0x80)
1198     {
1199       *workptr++ = (cups_utf32_t)legchar;
1200
1201       DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
1202                     (unsigned)legchar));
1203       continue;
1204     }
1205
1206    /*
1207     * Convert 2-byte legacy character...
1208     */
1209
1210     if (vmap->lead2char[(int)leadchar] == leadchar)
1211     {
1212       if (!*src)
1213       {
1214         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
1215
1216         return (-1);
1217       }
1218
1219       legchar = (legchar << 8) | *src++;
1220
1221      /*
1222       * Convert unknown character to Replacement Character...
1223       */
1224
1225       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1226       if (crow)
1227         crow += (int) (legchar & 0xff);
1228
1229       if (!crow || !*crow)
1230         *workptr++ = 0xfffd;
1231       else
1232         *workptr++ = (cups_utf32_t)*crow;
1233
1234       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
1235                     src[-2], src[-1], (unsigned)workptr[-1]));
1236       continue;
1237     }
1238
1239    /*
1240     * Fetch 3-byte or 4-byte legacy character...
1241     */
1242
1243     if (vmap->lead3char[(int)leadchar] == leadchar)
1244     {
1245       if (!*src || !src[1])
1246       {
1247         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
1248
1249         return (-1);
1250       }
1251
1252       legchar = (legchar << 8) | *src++;
1253       legchar = (legchar << 8) | *src++;
1254     }
1255     else if (vmap->lead4char[(int)leadchar] == leadchar)
1256     {
1257       if (!*src || !src[1] || !src[2])
1258       {
1259         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
1260
1261         return (-1);
1262       }
1263
1264       legchar = (legchar << 8) | *src++;
1265       legchar = (legchar << 8) | *src++;
1266       legchar = (legchar << 8) | *src++;
1267     }
1268     else
1269     {
1270       DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
1271
1272       return (-1);
1273     }
1274
1275    /*
1276     * Find 3-byte or 4-byte legacy character...
1277     */
1278
1279     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1280                                            vmap->wide2uni,
1281                                            vmap->widecount,
1282                                            sizeof(_cups_wide2uni_t),
1283                                            compare_wide);
1284
1285    /*
1286     * Convert unknown character to Replacement Character...
1287     */
1288
1289     if (!wide2uni || !wide2uni->unichar)
1290       *workptr++ = 0xfffd;
1291     else
1292       *workptr++ = wide2uni->unichar;
1293
1294     if (vmap->lead3char[(int)leadchar] == leadchar)
1295       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
1296                     src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1297     else
1298       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
1299                     src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1300   }
1301
1302   *workptr = 0;
1303
1304   vmap->used --;
1305
1306   DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
1307                 (int)(workptr - work)));
1308
1309  /*
1310   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1311   */
1312
1313   return (cupsUTF32ToUTF8(dest, work, maxout));
1314 }
1315
1316
1317 /*
1318  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1319  */
1320
1321 static void
1322 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1323 {
1324   int           i;                      /* Looping variable */
1325
1326
1327   for (i = 0; i < 256; i ++)
1328     if (cmap->uni2char[i])
1329       free(cmap->uni2char[i]);
1330
1331   free(cmap);
1332 }
1333
1334
1335 /*
1336  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1337  */
1338
1339 static void
1340 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1341 {
1342   int           i;                      /* Looping variable */
1343
1344
1345   for (i = 0; i < 256; i ++)
1346     if (vmap->char2uni[i])
1347       free(vmap->char2uni[i]);
1348
1349   for (i = 0; i < 256; i ++)
1350     if (vmap->uni2char[i])
1351       free(vmap->uni2char[i]);
1352
1353   if (vmap->wide2uni)
1354     free(vmap->wide2uni);
1355
1356   free(vmap);
1357 }
1358
1359
1360 /*
1361  * 'get_charmap()' - Lookup or get a character set map (private).
1362  *
1363  * This code handles single-byte (SBCS), double-byte (DBCS), and
1364  * variable-byte (VBCS) character sets _without_ charset escapes...
1365  * This code does not handle multiple-byte character sets (MBCS)
1366  * (such as ISO-2022-JP) with charset switching via escapes...
1367  */
1368
1369
1370 static void *                           /* O - Charset map pointer */
1371 get_charmap(
1372     const cups_encoding_t encoding)     /* I - Encoding */
1373 {
1374   char          filename[1024];         /* Filename for charset map file */
1375   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1376
1377
1378   DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
1379
1380  /*
1381   * Get the data directory and charset map name...
1382   */
1383
1384   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1385            cg->cups_datadir, _cupsEncodingName(encoding));
1386
1387   DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
1388
1389  /*
1390   * Read charset map input file into cache...
1391   */
1392
1393   if (encoding < CUPS_ENCODING_SBCS_END)
1394     return (get_sbcs_charmap(encoding, filename));
1395   else if (encoding < CUPS_ENCODING_VBCS_END)
1396     return (get_vbcs_charmap(encoding, filename));
1397   else
1398     return (NULL);
1399 }
1400
1401
1402 /*
1403  * 'get_charmap_count()' - Count lines in a charmap file.
1404  */
1405
1406 static int                              /* O - Count or -1 on error */
1407 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1408 {
1409   int   count;                          /* Number of lines */
1410   char  line[256];                      /* Line from input map file */
1411
1412
1413  /*
1414   * Count lines in map input file...
1415   */
1416
1417   count = 0;
1418
1419   while (cupsFileGets(fp, line, sizeof(line)))
1420     if (line[0] == '0')
1421       count ++;
1422
1423  /*
1424   * Return the number of lines...
1425   */
1426
1427   if (count > 0)
1428     return (count);
1429   else
1430     return (-1);
1431 }
1432
1433
1434 /*
1435  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1436  */
1437
1438 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1439 get_sbcs_charmap(
1440     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1441     const char            *filename)    /* I - Charmap Filename */
1442 {
1443   unsigned long legchar;                /* Legacy character value */
1444   cups_utf32_t  unichar;                /* Unicode character value */
1445   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1446   cups_file_t   *fp;                    /* Charset map file pointer */
1447   char          *s;                     /* Line parsing pointer */
1448   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1449   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1450   char          line[256];              /* Line from charset map file */
1451
1452
1453  /*
1454   * See if we already have this SBCS charset map loaded...
1455   */
1456
1457   DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
1458                 filename));
1459
1460   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1461   {
1462     if (cmap->encoding == encoding)
1463     {
1464       cmap->used ++;
1465       DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
1466
1467       return ((void *)cmap);
1468     }
1469   }
1470
1471  /*
1472   * Open SBCS charset map input file...
1473   */
1474
1475   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1476   {
1477     DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
1478
1479     return (NULL);
1480   }
1481
1482  /*
1483   * Allocate memory for SBCS charset map...
1484   */
1485
1486   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1487   {
1488     cupsFileClose(fp);
1489     DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1490
1491     return (NULL);
1492   }
1493
1494   cmap->used ++;
1495   cmap->encoding = encoding;
1496
1497  /*
1498   * Save SBCS charset map into memory for transcoding...
1499   */
1500
1501   while (cupsFileGets(fp, line, sizeof(line)))
1502   {
1503     if (line[0] != '0')
1504       continue;
1505
1506     legchar = strtol(line, &s, 16);
1507     if (legchar < 0 || legchar > 0xff)
1508       goto sbcs_error;
1509
1510     unichar = strtol(s, NULL, 16);
1511     if (unichar < 0 || unichar > 0x10ffff)
1512       goto sbcs_error;
1513
1514    /*
1515     * Save legacy to Unicode mapping in direct lookup table...
1516     */
1517
1518     crow  = cmap->char2uni + legchar;
1519     *crow = (cups_ucs2_t)(unichar & 0xffff);
1520
1521    /*
1522     * Save Unicode to legacy mapping in indirect lookup table...
1523     */
1524
1525     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1526     if (!srow)
1527     {
1528       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1529       if (!srow)
1530         goto sbcs_error;
1531
1532       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1533     }
1534
1535     srow += unichar & 0xff;
1536
1537    /*
1538     * Convert Replacement Character to visible replacement...
1539     */
1540
1541     if (unichar == 0xfffd)
1542       legchar = (unsigned long)'?';
1543
1544    /*
1545     * First (oldest) legacy character uses Unicode mapping cell...
1546     */
1547
1548     if (!*srow)
1549       *srow = (cups_sbcs_t)legchar;
1550   }
1551
1552   cupsFileClose(fp);
1553
1554  /*
1555   * Add it to the cache and return...
1556   */
1557
1558   cmap->next = cmap_cache;
1559   cmap_cache = cmap;
1560
1561   DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
1562
1563   return (cmap);
1564
1565  /*
1566   * If we get here, there was an error in the cmap file...
1567   */
1568
1569   sbcs_error:
1570
1571   free_sbcs_charmap(cmap);
1572
1573   cupsFileClose(fp);
1574
1575   DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
1576
1577   return (NULL);
1578 }
1579
1580
1581 /*
1582  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1583  */
1584
1585 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1586 get_vbcs_charmap(
1587     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1588     const char            *filename)    /* I - Charmap Filename */
1589 {
1590   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1591   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1592   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1593   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1594   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1595   unsigned long legchar;                /* Legacy character value */
1596   cups_utf32_t  unichar;                /* Unicode character value */
1597   int           mapcount;               /* Count of lines in charmap file */
1598   cups_file_t   *fp;                    /* Charset map file pointer */
1599   char          *s;                     /* Line parsing pointer */
1600   char          line[256];              /* Line from charset map file */
1601   int           i;                      /* Loop variable */
1602   int           legacy;                 /* 32-bit legacy char */
1603
1604
1605   DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1606                 encoding, filename));
1607
1608  /*
1609   * See if we already have this DBCS/VBCS charset map loaded...
1610   */
1611
1612   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1613   {
1614     if (vmap->encoding == encoding)
1615     {
1616       vmap->used ++;
1617       DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
1618
1619       return ((void *)vmap);
1620     }
1621   }
1622
1623  /*
1624   * Open VBCS charset map input file...
1625   */
1626
1627   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1628   {
1629     DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
1630
1631     return (NULL);
1632   }
1633
1634  /*
1635   * Count lines in charmap file...
1636   */
1637
1638   if ((mapcount = get_charmap_count(fp)) <= 0)
1639   {
1640     DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
1641
1642     cupsFileClose(fp);
1643
1644     return (NULL);
1645   }
1646
1647   DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
1648
1649  /*
1650   * Allocate memory for DBCS/VBCS charset map...
1651   */
1652
1653   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1654   {
1655     DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
1656
1657     cupsFileClose(fp);
1658
1659     return (NULL);
1660   }
1661
1662   vmap->used ++;
1663   vmap->encoding = encoding;
1664
1665  /*
1666   * Save DBCS/VBCS charset map into memory for transcoding...
1667   */
1668
1669   wide2uni = NULL;
1670
1671   cupsFileRewind(fp);
1672
1673   i      = 0;
1674   legacy = 0;
1675
1676   while (cupsFileGets(fp, line, sizeof(line)))
1677   {
1678     if (line[0] != '0')
1679       continue;
1680
1681     legchar = strtoul(line, &s, 16);
1682     if (legchar == ULONG_MAX)
1683       goto vbcs_error;
1684
1685     unichar = strtol(s, NULL, 16);
1686     if (unichar < 0 || unichar > 0x10ffff)
1687       goto vbcs_error;
1688
1689     i ++;
1690
1691     DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1692                   legchar, (unsigned)unichar));
1693
1694    /*
1695     * Save lead char of 2/3/4-byte legacy char...
1696     */
1697
1698     if (legchar > 0xffffff)
1699     {
1700       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1701       vmap->lead4char[leadchar] = leadchar;
1702     }
1703     else if (legchar > 0xffff)
1704     {
1705       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1706       vmap->lead3char[leadchar] = leadchar;
1707     }
1708     else
1709     {
1710       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1711       vmap->lead2char[leadchar] = leadchar;
1712     }
1713
1714    /*
1715     * Save Legacy to Unicode mapping...
1716     */
1717
1718     if (legchar <= 0xffff)
1719     {
1720      /*
1721       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1722       */
1723
1724       crow = vmap->char2uni[(int)leadchar];
1725       if (!crow)
1726       {
1727         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1728         if (!crow)
1729           goto vbcs_error;
1730
1731         vmap->char2uni[(int)leadchar] = crow;
1732       }
1733
1734       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1735     }
1736     else
1737     {
1738      /*
1739       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1740       */
1741
1742       if (!legacy)
1743       {
1744         legacy          = 1;
1745         vmap->widecount = (mapcount - i + 1);
1746         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1747                                                      sizeof(_cups_wide2uni_t));
1748         if (!wide2uni)
1749           goto vbcs_error;
1750
1751         vmap->wide2uni = wide2uni;
1752       }
1753
1754       wide2uni->widechar = (cups_vbcs_t)legchar;
1755       wide2uni->unichar  = (cups_ucs2_t)unichar;
1756       wide2uni ++;
1757     }
1758
1759    /*
1760     * Save Unicode to legacy mapping in indirect lookup table...
1761     */
1762
1763     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1764     if (!vrow)
1765     {
1766       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1767       if (!vrow)
1768         goto vbcs_error;
1769
1770       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1771     }
1772
1773     vrow += (int)(unichar & 0xff);
1774
1775    /*
1776     * Convert Replacement Character to visible replacement...
1777     */
1778
1779     if (unichar == 0xfffd)
1780       legchar = (unsigned long)'?';
1781
1782    /*
1783     * First (oldest) legacy character uses Unicode mapping cell...
1784     */
1785
1786     if (!*vrow)
1787       *vrow = (cups_vbcs_t)legchar;
1788   }
1789
1790   vmap->charcount = (i - vmap->widecount);
1791
1792   cupsFileClose(fp);
1793
1794  /*
1795   * Add it to the cache and return...
1796   */
1797
1798   vmap->next = vmap_cache;
1799   vmap_cache = vmap;
1800
1801   DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
1802
1803   return (vmap);
1804
1805  /*
1806   * If we get here, the file contains errors...
1807   */
1808
1809   vbcs_error:
1810
1811   free_vbcs_charmap(vmap);
1812
1813   cupsFileClose(fp);
1814
1815   DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
1816
1817   return (NULL);
1818 }
1819
1820
1821 /*
1822  * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1823  */