cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 2007-2008 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  *   This file is subject to the Apple OS-Developed Software exception.
  16  *
  17  * Contents:
  18  *
  19  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  20  *   _cupsCharmapFree()  - Free a character set map.
  21  *   _cupsCharmapGet()   - Get a character set map.
  22  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  23  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  24  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  25  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  26  *   compare_wide()      - Compare key for wide (VBCS) match.
  27  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  28  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  29  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  30  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  31  *   free_sbcs_charmap() - Free memory used by a single byte character set.
  32  *   free_vbcs_charmap() - Free memory used by a variable byte character set.
  33  *   get_charmap()       - Lookup or get a character set map (private).
  34  *   get_charmap_count() - Count lines in a charmap file.
  35  *   get_sbcs_charmap()  - Get SBCS Charmap.
  36  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  37  */
  38
  39 /*
  40  * Include necessary headers...
  41  */
  42
  43 #include "globals.h"
  44 #include "debug.h"
  45 #include <limits.h>
  46 #include <stdlib.h>
  47 #include <errno.h>
  48 #include <time.h>
  49
  50
  51 /*
  52  * Local globals...
  53  */
  54
  55 #ifdef HAVE_PTHREAD_H
  56 static pthread_mutex_t  map_mutex = PTHREAD_MUTEX_INITIALIZER;
  57                                         /* Mutex to control access to maps */
  58 #endif /* HAVE_PTHREAD_H */
  59 static _cups_cmap_t     *cmap_cache = NULL;
  60                                         /* SBCS Charmap Cache */
  61 static _cups_vmap_t     *vmap_cache = NULL;
  62                                         /* VBCS Charmap Cache */
  63
  64
  65 /*
  66  * Local functions...
  67  */
  68
  69 static int              compare_wide(const void *k1, const void *k2);
  70 static int              conv_sbcs_to_utf8(cups_utf8_t *dest,
  71                                           const cups_sbcs_t *src,
  72                                           int maxout,
  73                                           const cups_encoding_t encoding);
  74 static int              conv_utf8_to_sbcs(cups_sbcs_t *dest,
  75                                           const cups_utf8_t *src,
  76                                           int maxout,
  77                                           const cups_encoding_t encoding);
  78 static int              conv_utf8_to_vbcs(cups_sbcs_t *dest,
  79                                           const cups_utf8_t *src,
  80                                           int maxout,
  81                                           const cups_encoding_t encoding);
  82 static int              conv_vbcs_to_utf8(cups_utf8_t *dest,
  83                                           const cups_sbcs_t *src,
  84                                           int maxout,
  85                                           const cups_encoding_t encoding);
  86 static void             free_sbcs_charmap(_cups_cmap_t *sbcs);
  87 static void             free_vbcs_charmap(_cups_vmap_t *vbcs);
  88 static void             *get_charmap(const cups_encoding_t encoding);
  89 static int              get_charmap_count(cups_file_t *fp);
  90 static _cups_cmap_t     *get_sbcs_charmap(const cups_encoding_t encoding,
  91                                           const char *filename);
  92 static _cups_vmap_t     *get_vbcs_charmap(const cups_encoding_t encoding,
  93                                           const char *filename);
  94
  95
  96 /*
  97  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
  98  */
  99
 100 void
 101 _cupsCharmapFlush(void)
 102 {
 103   _cups_cmap_t  *cmap,                  /* Legacy SBCS / Unicode Charset Map */
 104                 *cnext;                 /* Next Legacy SBCS Charset Map */
 105   _cups_vmap_t  *vmap,                  /* Legacy VBCS / Unicode Charset Map */
 106                 *vnext;                 /* Next Legacy VBCS Charset Map */
 107
 108
 109 #ifdef HAVE_PTHREAD_H
 110   pthread_mutex_lock(&map_mutex);
 111 #endif /* HAVE_PTHREAD_H */
 112
 113  /*
 114   * Loop through SBCS charset map cache, free all memory...
 115   */
 116
 117   for (cmap = cmap_cache; cmap; cmap = cnext)
 118   {
 119     cnext = cmap->next;
 120
 121     free_sbcs_charmap(cmap);
 122   }
 123
 124   cmap_cache = NULL;
 125
 126  /*
 127   * Loop through DBCS/VBCS charset map cache, free all memory...
 128   */
 129
 130   for (vmap = vmap_cache; vmap; vmap = vnext)
 131   {
 132     vnext = vmap->next;
 133
 134     free_vbcs_charmap(vmap);
 135   }
 136
 137   vmap_cache = NULL;
 138
 139 #ifdef HAVE_PTHREAD_H
 140   pthread_mutex_unlock(&map_mutex);
 141 #endif /* HAVE_PTHREAD_H */
 142 }
 143
 144
 145 /*
 146  * '_cupsCharmapFree()' - Free a character set map.
 147  *
 148  * This does not actually free; use '_cupsCharmapFlush()' for that.
 149  */
 150
 151 void
 152 _cupsCharmapFree(
 153     const cups_encoding_t encoding)     /* I - Encoding */
 154 {
 155   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 156   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 157
 158
 159  /*
 160   * See if we already have this SBCS charset map loaded...
 161   */
 162
 163 #ifdef HAVE_PTHREAD_H
 164   pthread_mutex_lock(&map_mutex);
 165 #endif /* HAVE_PTHREAD_H */
 166
 167   for (cmap = cmap_cache; cmap; cmap = cmap->next)
 168   {
 169     if (cmap->encoding == encoding)
 170     {
 171       if (cmap->used > 0)
 172         cmap->used --;
 173       break;
 174     }
 175   }
 176
 177  /*
 178   * See if we already have this DBCS/VBCS charset map loaded...
 179   */
 180
 181   for (vmap = vmap_cache; vmap; vmap = vmap->next)
 182   {
 183     if (vmap->encoding == encoding)
 184     {
 185       if (vmap->used > 0)
 186         vmap->used --;
 187       break;
 188     }
 189   }
 190
 191 #ifdef HAVE_PTHREAD_H
 192   pthread_mutex_unlock(&map_mutex);
 193 #endif /* HAVE_PTHREAD_H */
 194 }
 195
 196
 197 /*
 198  * '_cupsCharmapGet()' - Get a character set map.
 199  *
 200  * This code handles single-byte (SBCS), double-byte (DBCS), and
 201  * variable-byte (VBCS) character sets _without_ charset escapes...
 202  * This code does not handle multiple-byte character sets (MBCS)
 203  * (such as ISO-2022-JP) with charset switching via escapes...
 204  */
 205
 206 void *                                  /* O - Charset map pointer */
 207 _cupsCharmapGet(
 208     const cups_encoding_t encoding)     /* I - Encoding */
 209 {
 210   void  *charmap;                       /* Charset map pointer */
 211
 212
 213   DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
 214
 215  /*
 216   * Check for valid arguments...
 217   */
 218
 219   if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 220   {
 221     DEBUG_puts("    Bad encoding, returning NULL!");
 222     return (NULL);
 223   }
 224
 225  /*
 226   * Lookup or get the charset map pointer and return...
 227   */
 228
 229 #ifdef HAVE_PTHREAD_H
 230   pthread_mutex_lock(&map_mutex);
 231 #endif /* HAVE_PTHREAD_H */
 232
 233   charmap = get_charmap(encoding);
 234
 235 #ifdef HAVE_PTHREAD_H
 236   pthread_mutex_unlock(&map_mutex);
 237 #endif /* HAVE_PTHREAD_H */
 238
 239   return (charmap);
 240 }
 241
 242
 243 /*
 244  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 245  *
 246  * This code handles single-byte (SBCS), double-byte (DBCS), and
 247  * variable-byte (VBCS) character sets _without_ charset escapes...
 248  * This code does not handle multiple-byte character sets (MBCS)
 249  * (such as ISO-2022-JP) with charset switching via escapes...
 250  */
 251
 252 int                                     /* O - Count or -1 on error */
 253 cupsCharsetToUTF8(
 254     cups_utf8_t *dest,                  /* O - Target string */
 255     const char *src,                    /* I - Source string */
 256     const int maxout,                   /* I - Max output */
 257     const cups_encoding_t encoding)     /* I - Encoding */
 258 {
 259   int   bytes;                          /* Number of bytes converted */
 260
 261
 262  /*
 263   * Check for valid arguments...
 264   */
 265
 266   DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
 267                 dest, src, maxout, encoding));
 268
 269   if (dest)
 270     *dest = '\0';
 271
 272   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 273   {
 274     DEBUG_puts("    Bad arguments, returning -1");
 275     return (-1);
 276   }
 277
 278  /*
 279   * Handle identity conversions...
 280   */
 281
 282   if (encoding == CUPS_UTF8 ||
 283       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 284   {
 285     strlcpy((char *)dest, src, maxout);
 286     return ((int)strlen((char *)dest));
 287   }
 288
 289  /*
 290   * Handle ISO-8859-1 to UTF-8 directly...
 291   */
 292
 293   if (encoding == CUPS_ISO8859_1)
 294   {
 295     int         ch;                     /* Character from string */
 296     cups_utf8_t *destptr,               /* Pointer into UTF-8 buffer */
 297                 *destend;               /* End of UTF-8 buffer */
 298
 299
 300     destptr = dest;
 301     destend = dest + maxout - 2;
 302
 303     while (*src && destptr < destend)
 304     {
 305       ch = *src++ & 255;
 306
 307       if (ch & 128)
 308       {
 309         *destptr++ = 0xc0 | (ch >> 6);
 310         *destptr++ = 0x80 | (ch & 0x3f);
 311       }
 312       else
 313         *destptr++ = ch;
 314     }
 315
 316     *destptr = '\0';
 317
 318     return ((int)(destptr - dest));
 319   }
 320
 321  /*
 322   * Convert input legacy charset to UTF-8...
 323   */
 324
 325 #ifdef HAVE_PTHREAD_H
 326   pthread_mutex_lock(&map_mutex);
 327 #endif /* HAVE_PTHREAD_H */
 328
 329   if (encoding < CUPS_ENCODING_SBCS_END)
 330     bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 331   else
 332     bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 333
 334 #ifdef HAVE_PTHREAD_H
 335   pthread_mutex_unlock(&map_mutex);
 336 #endif /* HAVE_PTHREAD_H */
 337
 338   return (bytes);
 339 }
 340
 341
 342 /*
 343  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 344  *
 345  * This code handles single-byte (SBCS), double-byte (DBCS), and
 346  * variable-byte (VBCS) character sets _without_ charset escapes...
 347  * This code does not handle multiple-byte character sets (MBCS)
 348  * (such as ISO-2022-JP) with charset switching via escapes...
 349  */
 350
 351 int                                     /* O - Count or -1 on error */
 352 cupsUTF8ToCharset(
 353     char                  *dest,        /* O - Target string */
 354     const cups_utf8_t     *src,         /* I - Source string */
 355     const int             maxout,       /* I - Max output */
 356     const cups_encoding_t encoding)     /* I - Encoding */
 357 {
 358   int   bytes;                          /* Number of bytes converted */
 359
 360
 361  /*
 362   * Check for valid arguments...
 363   */
 364
 365   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 366   {
 367     if (dest)
 368       *dest = '\0';
 369
 370     return (-1);
 371   }
 372
 373  /*
 374   * Handle identity conversions...
 375   */
 376
 377   if (encoding == CUPS_UTF8 ||
 378       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 379   {
 380     strlcpy(dest, (char *)src, maxout);
 381     return ((int)strlen(dest));
 382   }
 383
 384  /*
 385   * Handle UTF-8 to ISO-8859-1 directly...
 386   */
 387
 388   if (encoding == CUPS_ISO8859_1)
 389   {
 390     int         ch;                     /* Character from string */
 391     char        *destptr,               /* Pointer into ISO-8859-1 buffer */
 392                 *destend;               /* End of ISO-8859-1 buffer */
 393
 394
 395     destptr = dest;
 396     destend = dest + maxout - 1;
 397
 398     while (*src && destptr < destend)
 399     {
 400       ch = *src++;
 401
 402       if ((ch & 0xe0) == 0xc0)
 403       {
 404         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 405
 406         if (ch < 256)
 407           *destptr++ = ch;
 408         else
 409           *destptr++ = '?';
 410       }
 411       else if ((ch & 0xf0) == 0xe0 ||
 412                (ch & 0xf8) == 0xf0)
 413         *destptr++ = '?';
 414       else if (!(ch & 0x80))
 415         *destptr++ = ch;
 416     }
 417
 418     *destptr = '\0';
 419
 420     return ((int)(destptr - dest));
 421   }
 422
 423  /*
 424   * Convert input UTF-8 to legacy charset...
 425   */
 426
 427 #ifdef HAVE_PTHREAD_H
 428   pthread_mutex_lock(&map_mutex);
 429 #endif /* HAVE_PTHREAD_H */
 430
 431   if (encoding < CUPS_ENCODING_SBCS_END)
 432     bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 433   else
 434     bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 435
 436 #ifdef HAVE_PTHREAD_H
 437   pthread_mutex_unlock(&map_mutex);
 438 #endif /* HAVE_PTHREAD_H */
 439
 440   return (bytes);
 441 }
 442
 443
 444 /*
 445  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 446  *
 447  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 448  *
 449  *   UTF-32 char     UTF-8 char(s)
 450  *   --------------------------------------------------
 451  *        0 to 127 = 0xxxxxxx (US-ASCII)
 452  *     128 to 2047 = 110xxxxx 10yyyyyy
 453  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 454  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 455  *
 456  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 457  * which would convert to five- or six-octet UTF-8 sequences...
 458  */
 459
 460 int                                     /* O - Count or -1 on error */
 461 cupsUTF8ToUTF32(
 462     cups_utf32_t      *dest,            /* O - Target string */
 463     const cups_utf8_t *src,             /* I - Source string */
 464     const int         maxout)           /* I - Max output */
 465 {
 466   int           i;                      /* Looping variable */
 467   cups_utf8_t   ch;                     /* Character value */
 468   cups_utf8_t   next;                   /* Next character value */
 469   cups_utf32_t  ch32;                   /* UTF-32 character value */
 470
 471
 472  /*
 473   * Check for valid arguments and clear output...
 474   */
 475
 476   DEBUG_printf(("cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)\n", dest,
 477                 src ? (const char *)src : "(null)", maxout));
 478
 479   if (dest)
 480     *dest = 0;
 481
 482   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 483   {
 484     DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad arguments)");
 485
 486     return (-1);
 487   }
 488
 489  /*
 490   * Convert input UTF-8 to output UTF-32...
 491   */
 492
 493   for (i = maxout - 1; *src && i > 0; i --)
 494   {
 495     ch = *src++;
 496
 497    /*
 498     * Convert UTF-8 character(s) to UTF-32 character...
 499     */
 500
 501     if (!(ch & 0x80))
 502     {
 503      /*
 504       * One-octet UTF-8 <= 127 (US-ASCII)...
 505       */
 506
 507       *dest++ = ch;
 508
 509       DEBUG_printf(("cupsUTF8ToUTF32: %02x => %08X\n", src[-1], ch));
 510       continue;
 511     }
 512     else if ((ch & 0xe0) == 0xc0)
 513     {
 514      /*
 515       * Two-octet UTF-8 <= 2047 (Latin-x)...
 516       */
 517
 518       next = *src++;
 519       if ((next & 0xc0) != 0x80)
 520       {
 521         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 522
 523         return (-1);
 524       }
 525
 526       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 527
 528      /*
 529       * Check for non-shortest form (invalid UTF-8)...
 530       */
 531
 532       if (ch32 < 0x80)
 533       {
 534         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 535
 536         return (-1);
 537       }
 538
 539       *dest++ = ch32;
 540
 541       DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x => %08X\n",
 542                     src[-2], src[-1], (unsigned)ch32));
 543     }
 544     else if ((ch & 0xf0) == 0xe0)
 545     {
 546      /*
 547       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 548       */
 549
 550       next = *src++;
 551       if ((next & 0xc0) != 0x80)
 552       {
 553         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 554
 555         return (-1);
 556       }
 557
 558       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 559
 560       next = *src++;
 561       if ((next & 0xc0) != 0x80)
 562       {
 563         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 564
 565         return (-1);
 566       }
 567
 568       ch32 = (ch32 << 6) | (next & 0x3f);
 569
 570      /*
 571       * Check for non-shortest form (invalid UTF-8)...
 572       */
 573
 574       if (ch32 < 0x800)
 575       {
 576         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 577
 578         return (-1);
 579       }
 580
 581       *dest++ = ch32;
 582
 583       DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x %02x => %08X\n",
 584                     src[-3], src[-2], src[-1], (unsigned)ch32));
 585     }
 586     else if ((ch & 0xf8) == 0xf0)
 587     {
 588      /*
 589       * Four-octet UTF-8...
 590       */
 591
 592       next = *src++;
 593       if ((next & 0xc0) != 0x80)
 594       {
 595         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 596
 597         return (-1);
 598       }
 599
 600       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 601
 602       next = *src++;
 603       if ((next & 0xc0) != 0x80)
 604       {
 605         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 606
 607         return (-1);
 608       }
 609
 610       ch32 = (ch32 << 6) | (next & 0x3f);
 611
 612       next = *src++;
 613       if ((next & 0xc0) != 0x80)
 614       {
 615         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 616
 617         return (-1);
 618       }
 619
 620       ch32 = (ch32 << 6) | (next & 0x3f);
 621
 622      /*
 623       * Check for non-shortest form (invalid UTF-8)...
 624       */
 625
 626       if (ch32 < 0x10000)
 627       {
 628         DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 629
 630         return (-1);
 631       }
 632
 633       *dest++ = ch32;
 634
 635       DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X\n",
 636                     src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
 637     }
 638     else
 639     {
 640      /*
 641       * More than 4-octet (invalid UTF-8 sequence)...
 642       */
 643
 644       DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 645
 646       return (-1);
 647     }
 648
 649    /*
 650     * Check for UTF-16 surrogate (illegal UTF-8)...
 651     */
 652
 653     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 654       return (-1);
 655   }
 656
 657   *dest = 0;
 658
 659   DEBUG_printf(("cupsUTF8ToUTF32: Returning %d characters\n", maxout - 1 - i));
 660
 661   return (maxout - 1 - i);
 662 }
 663
 664
 665 /*
 666  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 667  *
 668  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 669  *
 670  *   UTF-32 char     UTF-8 char(s)
 671  *   --------------------------------------------------
 672  *        0 to 127 = 0xxxxxxx (US-ASCII)
 673  *     128 to 2047 = 110xxxxx 10yyyyyy
 674  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 675  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 676  *
 677  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 678  * which would convert to five- or six-octet UTF-8 sequences...
 679  */
 680
 681 int                                     /* O - Count or -1 on error */
 682 cupsUTF32ToUTF8(
 683     cups_utf8_t        *dest,           /* O - Target string */
 684     const cups_utf32_t *src,            /* I - Source string */
 685     const int          maxout)          /* I - Max output */
 686 {
 687   cups_utf8_t   *start;                 /* Start of destination string */
 688   int           i;                      /* Looping variable */
 689   int           swap;                   /* Byte-swap input to output */
 690   cups_utf32_t  ch;                     /* Character value */
 691
 692
 693  /*
 694   * Check for valid arguments and clear output...
 695   */
 696
 697   DEBUG_printf(("cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)\n", dest, src,
 698                 maxout));
 699
 700   if (dest)
 701     *dest = '\0';
 702
 703   if (!dest || !src || maxout < 1)
 704   {
 705     DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (bad args)");
 706
 707     return (-1);
 708   }
 709
 710  /*
 711   * Check for leading BOM in UTF-32 and inverted BOM...
 712   */
 713
 714   start = dest;
 715   swap  = *src == 0xfffe0000;
 716
 717   DEBUG_printf(("cupsUTF32ToUTF8: swap=%d\n", swap));
 718
 719   if (*src == 0xfffe0000 || *src == 0xfeff)
 720     src ++;
 721
 722  /*
 723   * Convert input UTF-32 to output UTF-8...
 724   */
 725
 726   for (i = maxout - 1; *src && i > 0;)
 727   {
 728     ch = *src++;
 729
 730    /*
 731     * Byte swap input UTF-32, if necessary...
 732     * (only byte-swapping 24 of 32 bits)
 733     */
 734
 735     if (swap)
 736       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 737
 738    /*
 739     * Check for beyond Plane 16 (invalid UTF-32)...
 740     */
 741
 742     if (ch > 0x10ffff)
 743     {
 744       DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (character out of range)");
 745
 746       return (-1);
 747     }
 748
 749    /*
 750     * Convert UTF-32 character to UTF-8 character(s)...
 751     */
 752
 753     if (ch < 0x80)
 754     {
 755      /*
 756       * One-octet UTF-8 <= 127 (US-ASCII)...
 757       */
 758
 759       *dest++ = (cups_utf8_t)ch;
 760       i --;
 761
 762       DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x\n", (unsigned)ch, dest[-1]));
 763     }
 764     else if (ch < 0x800)
 765     {
 766      /*
 767       * Two-octet UTF-8 <= 2047 (Latin-x)...
 768       */
 769
 770       if (i < 2)
 771       {
 772         DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (too long 2)");
 773
 774         return (-1);
 775       }
 776
 777       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 778       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 779       i -= 2;
 780
 781       DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x\n", (unsigned)ch,
 782                     dest[-2], dest[-1]));
 783     }
 784     else if (ch < 0x10000)
 785     {
 786      /*
 787       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 788       */
 789
 790       if (i < 3)
 791       {
 792         DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (too long 3)");
 793
 794         return (-1);
 795       }
 796
 797       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 798       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 799       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 800       i -= 3;
 801
 802       DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x %02x\n", (unsigned)ch,
 803                     dest[-3], dest[-2], dest[-1]));
 804     }
 805     else
 806     {
 807      /*
 808       * Four-octet UTF-8...
 809       */
 810
 811       if (i < 4)
 812         return (-1);
 813
 814       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 815       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 816       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 817       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 818       i -= 4;
 819
 820       DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x\n",
 821                     (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
 822     }
 823   }
 824
 825   *dest = '\0';
 826
 827   DEBUG_printf(("cupsUTF32ToUTF8: Returning %d\n", (int)(dest - start)));
 828
 829   return ((int)(dest - start));
 830 }
 831
 832
 833 /*
 834  * 'compare_wide()' - Compare key for wide (VBCS) match.
 835  */
 836
 837 static int
 838 compare_wide(const void *k1,            /* I - Key char */
 839              const void *k2)            /* I - Map char */
 840 {
 841   cups_vbcs_t   key;                    /* Legacy key character */
 842   cups_vbcs_t   map;                    /* Legacy map character */
 843
 844
 845   key = *((cups_vbcs_t *)k1);
 846   map = ((_cups_wide2uni_t *)k2)->widechar;
 847
 848   return ((int)(key - map));
 849 }
 850
 851
 852 /*
 853  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 854  */
 855
 856 static int                              /* O - Count or -1 on error */
 857 conv_sbcs_to_utf8(
 858     cups_utf8_t           *dest,        /* O - Target string */
 859     const cups_sbcs_t     *src,         /* I - Source string */
 860     int                   maxout,       /* I - Max output */
 861     const cups_encoding_t encoding)     /* I - Encoding */
 862 {
 863   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 864   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 865   cups_sbcs_t   legchar;                /* Legacy character value */
 866   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 867                 *workptr;               /* Pointer into string */
 868
 869
 870  /*
 871   * Find legacy charset map in cache...
 872   */
 873
 874   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 875     return (-1);
 876
 877  /*
 878   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 879   */
 880
 881   work[0] = 0xfeff;
 882   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 883   {
 884     legchar = *src++;
 885
 886    /*
 887     * Convert ASCII verbatim (optimization)...
 888     */
 889
 890     if (legchar < 0x80)
 891       *workptr++ = (cups_utf32_t)legchar;
 892     else
 893     {
 894      /*
 895       * Convert unknown character to Replacement Character...
 896       */
 897
 898       crow = cmap->char2uni + legchar;
 899
 900       if (!*crow)
 901         *workptr++ = 0xfffd;
 902       else
 903         *workptr++ = (cups_utf32_t)*crow;
 904     }
 905   }
 906
 907   *workptr = 0;
 908
 909  /*
 910   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 911   */
 912
 913   cmap->used --;
 914
 915   return (cupsUTF32ToUTF8(dest, work, maxout));
 916 }
 917
 918
 919 /*
 920  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 921  */
 922
 923 static int                              /* O - Count or -1 on error */
 924 conv_utf8_to_sbcs(
 925     cups_sbcs_t           *dest,        /* O - Target string */
 926     const cups_utf8_t     *src,         /* I - Source string */
 927     int                   maxout,       /* I - Max output */
 928     const cups_encoding_t encoding)     /* I - Encoding */
 929 {
 930   cups_sbcs_t   *start;                 /* Start of destination string */
 931   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 932   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 933   cups_utf32_t  unichar;                /* Character value */
 934   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 935                 *workptr;               /* Pointer into string */
 936
 937
 938  /*
 939   * Find legacy charset map in cache...
 940   */
 941
 942   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 943     return (-1);
 944
 945  /*
 946   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 947   */
 948
 949   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 950     return (-1);
 951
 952  /*
 953   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 954   */
 955
 956   for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
 957   {
 958     unichar = *workptr++;
 959     if (!unichar)
 960       break;
 961
 962    /*
 963     * Convert ASCII verbatim (optimization)...
 964     */
 965
 966     if (unichar < 0x80)
 967     {
 968       *dest++ = (cups_sbcs_t)unichar;
 969       continue;
 970     }
 971
 972    /*
 973     * Convert unknown character to visible replacement...
 974     */
 975
 976     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 977
 978     if (srow)
 979       srow += (int)(unichar & 0xff);
 980
 981     if (!srow || !*srow)
 982       *dest++ = '?';
 983     else
 984       *dest++ = *srow;
 985   }
 986
 987   *dest = '\0';
 988
 989   cmap->used --;
 990
 991   return ((int)(dest - start));
 992 }
 993
 994
 995 /*
 996  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 997  */
 998
 999 static int                              /* O - Count or -1 on error */
1000 conv_utf8_to_vbcs(
1001     cups_sbcs_t           *dest,        /* O - Target string */
1002     const cups_utf8_t     *src,         /* I - Source string */
1003     int                   maxout,       /* I - Max output */
1004     const cups_encoding_t encoding)     /* I - Encoding */
1005 {
1006   cups_sbcs_t   *start;                 /* Start of destination string */
1007   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
1008   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1009   cups_utf32_t  unichar;                /* Character value */
1010   cups_vbcs_t   legchar;                /* Legacy character value */
1011   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1012                 *workptr;               /* Pointer into string */
1013
1014
1015   DEBUG_printf(("conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
1016                 "encoding=%d)\n", dest, src, maxout, encoding));
1017
1018  /*
1019   * Find legacy charset map in cache...
1020   */
1021
1022   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1023   {
1024     DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (no charmap)");
1025
1026     return (-1);
1027   }
1028
1029  /*
1030   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1031   */
1032
1033   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
1034   {
1035     DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
1036
1037     return (-1);
1038   }
1039
1040  /*
1041   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1042   */
1043
1044   for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
1045   {
1046     unichar = *workptr++;
1047
1048    /*
1049     * Convert ASCII verbatim (optimization)...
1050     */
1051
1052     if (unichar < 0x80)
1053     {
1054       *dest++ = (cups_sbcs_t)unichar;
1055
1056       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X\n", (unsigned)unichar,
1057                     dest[-1]));
1058
1059       continue;
1060     }
1061
1062    /*
1063     * Convert unknown character to visible replacement...
1064     */
1065
1066     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1067
1068     if (vrow)
1069       vrow += (int)(unichar & 0xff);
1070
1071     if (!vrow || !*vrow)
1072       legchar = (cups_vbcs_t)'?';
1073     else
1074       legchar = (cups_vbcs_t)*vrow;
1075
1076    /*
1077     * Save n-byte legacy character...
1078     */
1079
1080     if (legchar > 0xffffff)
1081     {
1082       if (maxout < 5)
1083       {
1084         DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (out of space)");
1085
1086         return (-1);
1087       }
1088
1089       *dest++ = (cups_sbcs_t)(legchar >> 24);
1090       *dest++ = (cups_sbcs_t)(legchar >> 16);
1091       *dest++ = (cups_sbcs_t)(legchar >> 8);
1092       *dest++ = (cups_sbcs_t)legchar;
1093
1094       maxout -= 3;
1095
1096       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X\n",
1097                     (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
1098     }
1099     else if (legchar > 0xffff)
1100     {
1101       if (maxout < 4)
1102       {
1103         DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (out of space)");
1104
1105         return (-1);
1106       }
1107
1108       *dest++ = (cups_sbcs_t)(legchar >> 16);
1109       *dest++ = (cups_sbcs_t)(legchar >> 8);
1110       *dest++ = (cups_sbcs_t)legchar;
1111
1112       maxout -= 2;
1113
1114       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X %02X\n",
1115                     (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
1116     }
1117     else if (legchar > 0xff)
1118     {
1119       *dest++ = (cups_sbcs_t)(legchar >> 8);
1120       *dest++ = (cups_sbcs_t)legchar;
1121
1122       maxout --;
1123
1124       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X\n",
1125                     (unsigned)unichar, dest[-2], dest[-1]));
1126     }
1127     else
1128     {
1129       *dest++ = legchar;
1130
1131       DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X\n",
1132                     (unsigned)unichar, dest[-1]));
1133     }
1134   }
1135
1136   *dest = '\0';
1137
1138   vmap->used --;
1139
1140   DEBUG_printf(("conv_utf8_to_vbcs: Returning %d characters\n",
1141                 (int)(dest - start)));
1142
1143   return ((int)(dest - start));
1144 }
1145
1146
1147 /*
1148  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1149  */
1150
1151 static int                              /* O - Count or -1 on error */
1152 conv_vbcs_to_utf8(
1153     cups_utf8_t           *dest,        /* O - Target string */
1154     const cups_sbcs_t     *src,         /* I - Source string */
1155     int                   maxout,       /* I - Max output */
1156     const cups_encoding_t encoding)     /* I - Encoding */
1157 {
1158   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1159   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1160   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1161   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1162   cups_vbcs_t   legchar;                /* Legacy character value */
1163   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1164                 *workptr;               /* Pointer into string */
1165
1166
1167  /*
1168   * Find legacy charset map in cache...
1169   */
1170
1171   DEBUG_printf(("conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)\n",
1172                 dest, src, maxout, encoding));
1173
1174   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1175   {
1176     DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1177
1178     return (-1);
1179   }
1180
1181  /*
1182   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1183   */
1184
1185   work[0] = 0xfeff;
1186   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1187   {
1188     legchar  = *src++;
1189     leadchar = (cups_sbcs_t)legchar;
1190
1191    /*
1192     * Convert ASCII verbatim (optimization)...
1193     */
1194
1195     if (legchar < 0x80)
1196     {
1197       *workptr++ = (cups_utf32_t)legchar;
1198
1199       DEBUG_printf(("conv_vbcs_to_utf8: %02X => %08X\n", src[-1],
1200                     (unsigned)legchar));
1201       continue;
1202     }
1203
1204    /*
1205     * Convert 2-byte legacy character...
1206     */
1207
1208     if (vmap->lead2char[(int)leadchar] == leadchar)
1209     {
1210       if (!*src)
1211       {
1212         DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string)");
1213
1214         return (-1);
1215       }
1216
1217       legchar = (legchar << 8) | *src++;
1218
1219      /*
1220       * Convert unknown character to Replacement Character...
1221       */
1222
1223       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1224       if (crow)
1225         crow += (int) (legchar & 0xff);
1226
1227       if (!crow || !*crow)
1228         *workptr++ = 0xfffd;
1229       else
1230         *workptr++ = (cups_utf32_t)*crow;
1231
1232       DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X => %08X\n",
1233                     src[-2], src[-1], (unsigned)workptr[-1]));
1234       continue;
1235     }
1236
1237    /*
1238     * Fetch 3-byte or 4-byte legacy character...
1239     */
1240
1241     if (vmap->lead3char[(int)leadchar] == leadchar)
1242     {
1243       if (!*src || !src[1])
1244       {
1245         DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string 2)");
1246
1247         return (-1);
1248       }
1249
1250       legchar = (legchar << 8) | *src++;
1251       legchar = (legchar << 8) | *src++;
1252     }
1253     else if (vmap->lead4char[(int)leadchar] == leadchar)
1254     {
1255       if (!*src || !src[1] || !src[2])
1256       {
1257         DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string 3)");
1258
1259         return (-1);
1260       }
1261
1262       legchar = (legchar << 8) | *src++;
1263       legchar = (legchar << 8) | *src++;
1264       legchar = (legchar << 8) | *src++;
1265     }
1266     else
1267     {
1268       DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (bad character)");
1269
1270       return (-1);
1271     }
1272
1273    /*
1274     * Find 3-byte or 4-byte legacy character...
1275     */
1276
1277     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1278                                            vmap->wide2uni,
1279                                            vmap->widecount,
1280                                            sizeof(_cups_wide2uni_t),
1281                                            compare_wide);
1282
1283    /*
1284     * Convert unknown character to Replacement Character...
1285     */
1286
1287     if (!wide2uni || !wide2uni->unichar)
1288       *workptr++ = 0xfffd;
1289     else
1290       *workptr++ = wide2uni->unichar;
1291
1292     if (vmap->lead3char[(int)leadchar] == leadchar)
1293       DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X %02X => %08X\n",
1294                     src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1295     else
1296       DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X\n",
1297                     src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1298   }
1299
1300   *workptr = 0;
1301
1302   vmap->used --;
1303
1304   DEBUG_printf(("conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8\n",
1305                 (int)(workptr - work)));
1306
1307  /*
1308   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1309   */
1310
1311   return (cupsUTF32ToUTF8(dest, work, maxout));
1312 }
1313
1314
1315 /*
1316  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1317  */
1318
1319 static void
1320 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1321 {
1322   int           i;                      /* Looping variable */
1323
1324
1325   for (i = 0; i < 256; i ++)
1326     if (cmap->uni2char[i])
1327       free(cmap->uni2char[i]);
1328
1329   free(cmap);
1330 }
1331
1332
1333 /*
1334  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1335  */
1336
1337 static void
1338 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1339 {
1340   int           i;                      /* Looping variable */
1341
1342
1343   for (i = 0; i < 256; i ++)
1344     if (vmap->char2uni[i])
1345       free(vmap->char2uni[i]);
1346
1347   for (i = 0; i < 256; i ++)
1348     if (vmap->uni2char[i])
1349       free(vmap->uni2char[i]);
1350
1351   if (vmap->wide2uni)
1352     free(vmap->wide2uni);
1353
1354   free(vmap);
1355 }
1356
1357
1358 /*
1359  * 'get_charmap()' - Lookup or get a character set map (private).
1360  *
1361  * This code handles single-byte (SBCS), double-byte (DBCS), and
1362  * variable-byte (VBCS) character sets _without_ charset escapes...
1363  * This code does not handle multiple-byte character sets (MBCS)
1364  * (such as ISO-2022-JP) with charset switching via escapes...
1365  */
1366
1367
1368 static void *                           /* O - Charset map pointer */
1369 get_charmap(
1370     const cups_encoding_t encoding)     /* I - Encoding */
1371 {
1372   char          filename[1024];         /* Filename for charset map file */
1373   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1374
1375
1376   DEBUG_printf(("get_charmap(encoding=%d)\n", encoding));
1377
1378  /*
1379   * Get the data directory and charset map name...
1380   */
1381
1382   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1383            cg->cups_datadir, _cupsEncodingName(encoding));
1384
1385   DEBUG_printf(("get_charmap: filename=\"%s\"\n", filename));
1386
1387  /*
1388   * Read charset map input file into cache...
1389   */
1390
1391   if (encoding < CUPS_ENCODING_SBCS_END)
1392     return (get_sbcs_charmap(encoding, filename));
1393   else if (encoding < CUPS_ENCODING_VBCS_END)
1394     return (get_vbcs_charmap(encoding, filename));
1395   else
1396     return (NULL);
1397 }
1398
1399
1400 /*
1401  * 'get_charmap_count()' - Count lines in a charmap file.
1402  */
1403
1404 static int                              /* O - Count or -1 on error */
1405 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1406 {
1407   int   count;                          /* Number of lines */
1408   char  line[256];                      /* Line from input map file */
1409
1410
1411  /*
1412   * Count lines in map input file...
1413   */
1414
1415   count = 0;
1416
1417   while (cupsFileGets(fp, line, sizeof(line)))
1418     if (line[0] == '0')
1419       count ++;
1420
1421  /*
1422   * Return the number of lines...
1423   */
1424
1425   if (count > 0)
1426     return (count);
1427   else
1428     return (-1);
1429 }
1430
1431
1432 /*
1433  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1434  */
1435
1436 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1437 get_sbcs_charmap(
1438     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1439     const char            *filename)    /* I - Charmap Filename */
1440 {
1441   unsigned long legchar;                /* Legacy character value */
1442   cups_utf32_t  unichar;                /* Unicode character value */
1443   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1444   cups_file_t   *fp;                    /* Charset map file pointer */
1445   char          *s;                     /* Line parsing pointer */
1446   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1447   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1448   char          line[256];              /* Line from charset map file */
1449
1450
1451  /*
1452   * See if we already have this SBCS charset map loaded...
1453   */
1454
1455   DEBUG_printf(("get_sbcs_charmap(encoding=%d, filename=\"%s\")\n", encoding,
1456                 filename));
1457
1458   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1459   {
1460     if (cmap->encoding == encoding)
1461     {
1462       cmap->used ++;
1463       DEBUG_printf(("get_sbcs_charmap: Returning existing cmap=%p\n", cmap));
1464
1465       return ((void *)cmap);
1466     }
1467   }
1468
1469  /*
1470   * Open SBCS charset map input file...
1471   */
1472
1473   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1474   {
1475     DEBUG_printf(("get_sbcs_charmap: Returning NULL (%s)\n", strerror(errno)));
1476
1477     return (NULL);
1478   }
1479
1480  /*
1481   * Allocate memory for SBCS charset map...
1482   */
1483
1484   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1485   {
1486     cupsFileClose(fp);
1487     DEBUG_puts("get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1488
1489     return (NULL);
1490   }
1491
1492   cmap->used ++;
1493   cmap->encoding = encoding;
1494
1495  /*
1496   * Save SBCS charset map into memory for transcoding...
1497   */
1498
1499   while (cupsFileGets(fp, line, sizeof(line)))
1500   {
1501     if (line[0] != '0')
1502       continue;
1503
1504     legchar = strtol(line, &s, 16);
1505     if (legchar < 0 || legchar > 0xff)
1506       goto sbcs_error;
1507
1508     unichar = strtol(s, NULL, 16);
1509     if (unichar < 0 || unichar > 0x10ffff)
1510       goto sbcs_error;
1511
1512    /*
1513     * Save legacy to Unicode mapping in direct lookup table...
1514     */
1515
1516     crow  = cmap->char2uni + legchar;
1517     *crow = (cups_ucs2_t)(unichar & 0xffff);
1518
1519    /*
1520     * Save Unicode to legacy mapping in indirect lookup table...
1521     */
1522
1523     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1524     if (!srow)
1525     {
1526       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1527       if (!srow)
1528         goto sbcs_error;
1529
1530       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1531     }
1532
1533     srow += unichar & 0xff;
1534
1535    /*
1536     * Convert Replacement Character to visible replacement...
1537     */
1538
1539     if (unichar == 0xfffd)
1540       legchar = (unsigned long)'?';
1541
1542    /*
1543     * First (oldest) legacy character uses Unicode mapping cell...
1544     */
1545
1546     if (!*srow)
1547       *srow = (cups_sbcs_t)legchar;
1548   }
1549
1550   cupsFileClose(fp);
1551
1552  /*
1553   * Add it to the cache and return...
1554   */
1555
1556   cmap->next = cmap_cache;
1557   cmap_cache = cmap;
1558
1559   DEBUG_printf(("get_sbcs_charmap: Returning new cmap=%p\n", cmap));
1560
1561   return (cmap);
1562
1563  /*
1564   * If we get here, there was an error in the cmap file...
1565   */
1566
1567   sbcs_error:
1568
1569   free_sbcs_charmap(cmap);
1570
1571   cupsFileClose(fp);
1572
1573   DEBUG_puts("get_sbcs_charmap: Returning NULL (Read/format error)");
1574
1575   return (NULL);
1576 }
1577
1578
1579 /*
1580  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1581  */
1582
1583 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1584 get_vbcs_charmap(
1585     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1586     const char            *filename)    /* I - Charmap Filename */
1587 {
1588   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1589   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1590   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1591   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1592   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1593   unsigned long legchar;                /* Legacy character value */
1594   cups_utf32_t  unichar;                /* Unicode character value */
1595   int           mapcount;               /* Count of lines in charmap file */
1596   cups_file_t   *fp;                    /* Charset map file pointer */
1597   char          *s;                     /* Line parsing pointer */
1598   char          line[256];              /* Line from charset map file */
1599   int           i;                      /* Loop variable */
1600   int           legacy;                 /* 32-bit legacy char */
1601
1602
1603   DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1604                 encoding, filename));
1605
1606  /*
1607   * See if we already have this DBCS/VBCS charset map loaded...
1608   */
1609
1610   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1611   {
1612     if (vmap->encoding == encoding)
1613     {
1614       vmap->used ++;
1615       DEBUG_printf(("get_vbcs_charmap: Returning existing vmap=%p\n", vmap));
1616
1617       return ((void *)vmap);
1618     }
1619   }
1620
1621  /*
1622   * Open VBCS charset map input file...
1623   */
1624
1625   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1626   {
1627     DEBUG_printf(("get_vbcs_charmap: Returning NULL (%s)\n", strerror(errno)));
1628
1629     return (NULL);
1630   }
1631
1632  /*
1633   * Count lines in charmap file...
1634   */
1635
1636   if ((mapcount = get_charmap_count(fp)) <= 0)
1637   {
1638     DEBUG_puts("get_vbcs_charmap: Unable to get charmap count!");
1639
1640     cupsFileClose(fp);
1641
1642     return (NULL);
1643   }
1644
1645   DEBUG_printf(("get_vbcs_charmap: mapcount=%d\n", mapcount));
1646
1647  /*
1648   * Allocate memory for DBCS/VBCS charset map...
1649   */
1650
1651   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1652   {
1653     DEBUG_puts("get_vbcs_charmap: Unable to allocate memory!");
1654
1655     cupsFileClose(fp);
1656
1657     return (NULL);
1658   }
1659
1660   vmap->used ++;
1661   vmap->encoding = encoding;
1662
1663  /*
1664   * Save DBCS/VBCS charset map into memory for transcoding...
1665   */
1666
1667   wide2uni = NULL;
1668
1669   cupsFileRewind(fp);
1670
1671   i      = 0;
1672   legacy = 0;
1673
1674   while (cupsFileGets(fp, line, sizeof(line)))
1675   {
1676     if (line[0] != '0')
1677       continue;
1678
1679     legchar = strtoul(line, &s, 16);
1680     if (legchar == ULONG_MAX)
1681       goto vbcs_error;
1682
1683     unichar = strtol(s, NULL, 16);
1684     if (unichar < 0 || unichar > 0x10ffff)
1685       goto vbcs_error;
1686
1687     i ++;
1688
1689 /*    DEBUG_printf(("    i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1690                   legchar, (unsigned)unichar)); */
1691
1692    /*
1693     * Save lead char of 2/3/4-byte legacy char...
1694     */
1695
1696     if (legchar > 0xffffff)
1697     {
1698       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1699       vmap->lead4char[leadchar] = leadchar;
1700     }
1701     else if (legchar > 0xffff)
1702     {
1703       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1704       vmap->lead3char[leadchar] = leadchar;
1705     }
1706     else
1707     {
1708       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1709       vmap->lead2char[leadchar] = leadchar;
1710     }
1711
1712    /*
1713     * Save Legacy to Unicode mapping...
1714     */
1715
1716     if (legchar <= 0xffff)
1717     {
1718      /*
1719       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1720       */
1721
1722       crow = vmap->char2uni[(int)leadchar];
1723       if (!crow)
1724       {
1725         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1726         if (!crow)
1727           goto vbcs_error;
1728
1729         vmap->char2uni[(int)leadchar] = crow;
1730       }
1731
1732       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1733     }
1734     else
1735     {
1736      /*
1737       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1738       */
1739
1740       if (!legacy)
1741       {
1742         legacy          = 1;
1743         vmap->widecount = (mapcount - i + 1);
1744         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1745                                                      sizeof(_cups_wide2uni_t));
1746         if (!wide2uni)
1747           goto vbcs_error;
1748
1749         vmap->wide2uni = wide2uni;
1750       }
1751
1752       wide2uni->widechar = (cups_vbcs_t)legchar;
1753       wide2uni->unichar  = (cups_ucs2_t)unichar;
1754       wide2uni ++;
1755     }
1756
1757    /*
1758     * Save Unicode to legacy mapping in indirect lookup table...
1759     */
1760
1761     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1762     if (!vrow)
1763     {
1764       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1765       if (!vrow)
1766         goto vbcs_error;
1767
1768       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1769     }
1770
1771     vrow += (int)(unichar & 0xff);
1772
1773    /*
1774     * Convert Replacement Character to visible replacement...
1775     */
1776
1777     if (unichar == 0xfffd)
1778       legchar = (unsigned long)'?';
1779
1780    /*
1781     * First (oldest) legacy character uses Unicode mapping cell...
1782     */
1783
1784     if (!*vrow)
1785       *vrow = (cups_vbcs_t)legchar;
1786   }
1787
1788   vmap->charcount = (i - vmap->widecount);
1789
1790   cupsFileClose(fp);
1791
1792  /*
1793   * Add it to the cache and return...
1794   */
1795
1796   vmap->next = vmap_cache;
1797   vmap_cache = vmap;
1798
1799   DEBUG_printf(("get_vbcs_charmap: Returning new vmap=%p\n", vmap));
1800
1801   return (vmap);
1802
1803  /*
1804   * If we get here, the file contains errors...
1805   */
1806
1807   vbcs_error:
1808
1809   free_vbcs_charmap(vmap);
1810
1811   cupsFileClose(fp);
1812
1813   DEBUG_puts("get_vbcs_charmap: Returning NULL (Read/format error)");
1814
1815   return (NULL);
1816 }
1817
1818
1819 /*
1820  * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1821  */