cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 2007-2009 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  *   This file is subject to the Apple OS-Developed Software exception.
  16  *
  17  * Contents:
  18  *
  19  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  20  *   _cupsCharmapFree()  - Free a character set map.
  21  *   _cupsCharmapGet()   - Get a character set map.
  22  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  23  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  24  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  25  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  26  *   compare_wide()      - Compare key for wide (VBCS) match.
  27  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  28  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  29  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  30  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  31  *   free_sbcs_charmap() - Free memory used by a single byte character set.
  32  *   free_vbcs_charmap() - Free memory used by a variable byte character set.
  33  *   get_charmap()       - Lookup or get a character set map (private).
  34  *   get_charmap_count() - Count lines in a charmap file.
  35  *   get_sbcs_charmap()  - Get SBCS Charmap.
  36  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  37  */
  38
  39 /*
  40  * Include necessary headers...
  41  */
  42
  43 #include "globals.h"
  44 #include "debug.h"
  45 #include <limits.h>
  46 #include <stdlib.h>
  47 #include <errno.h>
  48 #include <time.h>
  49
  50
  51 /*
  52  * Local globals...
  53  */
  54
  55 #ifdef HAVE_PTHREAD_H
  56 static pthread_mutex_t  map_mutex = PTHREAD_MUTEX_INITIALIZER;
  57                                         /* Mutex to control access to maps */
  58 #endif /* HAVE_PTHREAD_H */
  59 static _cups_cmap_t     *cmap_cache = NULL;
  60                                         /* SBCS Charmap Cache */
  61 static _cups_vmap_t     *vmap_cache = NULL;
  62                                         /* VBCS Charmap Cache */
  63
  64
  65 /*
  66  * Local functions...
  67  */
  68
  69 static int              compare_wide(const void *k1, const void *k2);
  70 static int              conv_sbcs_to_utf8(cups_utf8_t *dest,
  71                                           const cups_sbcs_t *src,
  72                                           int maxout,
  73                                           const cups_encoding_t encoding);
  74 static int              conv_utf8_to_sbcs(cups_sbcs_t *dest,
  75                                           const cups_utf8_t *src,
  76                                           int maxout,
  77                                           const cups_encoding_t encoding);
  78 static int              conv_utf8_to_vbcs(cups_sbcs_t *dest,
  79                                           const cups_utf8_t *src,
  80                                           int maxout,
  81                                           const cups_encoding_t encoding);
  82 static int              conv_vbcs_to_utf8(cups_utf8_t *dest,
  83                                           const cups_sbcs_t *src,
  84                                           int maxout,
  85                                           const cups_encoding_t encoding);
  86 static void             free_sbcs_charmap(_cups_cmap_t *sbcs);
  87 static void             free_vbcs_charmap(_cups_vmap_t *vbcs);
  88 static void             *get_charmap(const cups_encoding_t encoding);
  89 static int              get_charmap_count(cups_file_t *fp);
  90 static _cups_cmap_t     *get_sbcs_charmap(const cups_encoding_t encoding,
  91                                           const char *filename);
  92 static _cups_vmap_t     *get_vbcs_charmap(const cups_encoding_t encoding,
  93                                           const char *filename);
  94
  95
  96 /*
  97  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
  98  */
  99
 100 void
 101 _cupsCharmapFlush(void)
 102 {
 103   _cups_cmap_t  *cmap,                  /* Legacy SBCS / Unicode Charset Map */
 104                 *cnext;                 /* Next Legacy SBCS Charset Map */
 105   _cups_vmap_t  *vmap,                  /* Legacy VBCS / Unicode Charset Map */
 106                 *vnext;                 /* Next Legacy VBCS Charset Map */
 107
 108
 109 #ifdef HAVE_PTHREAD_H
 110   pthread_mutex_lock(&map_mutex);
 111 #endif /* HAVE_PTHREAD_H */
 112
 113  /*
 114   * Loop through SBCS charset map cache, free all memory...
 115   */
 116
 117   for (cmap = cmap_cache; cmap; cmap = cnext)
 118   {
 119     cnext = cmap->next;
 120
 121     free_sbcs_charmap(cmap);
 122   }
 123
 124   cmap_cache = NULL;
 125
 126  /*
 127   * Loop through DBCS/VBCS charset map cache, free all memory...
 128   */
 129
 130   for (vmap = vmap_cache; vmap; vmap = vnext)
 131   {
 132     vnext = vmap->next;
 133
 134     free_vbcs_charmap(vmap);
 135   }
 136
 137   vmap_cache = NULL;
 138
 139 #ifdef HAVE_PTHREAD_H
 140   pthread_mutex_unlock(&map_mutex);
 141 #endif /* HAVE_PTHREAD_H */
 142 }
 143
 144
 145 /*
 146  * '_cupsCharmapFree()' - Free a character set map.
 147  *
 148  * This does not actually free; use '_cupsCharmapFlush()' for that.
 149  */
 150
 151 void
 152 _cupsCharmapFree(
 153     const cups_encoding_t encoding)     /* I - Encoding */
 154 {
 155   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 156   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
 157
 158
 159  /*
 160   * See if we already have this SBCS charset map loaded...
 161   */
 162
 163 #ifdef HAVE_PTHREAD_H
 164   pthread_mutex_lock(&map_mutex);
 165 #endif /* HAVE_PTHREAD_H */
 166
 167   for (cmap = cmap_cache; cmap; cmap = cmap->next)
 168   {
 169     if (cmap->encoding == encoding)
 170     {
 171       if (cmap->used > 0)
 172         cmap->used --;
 173       break;
 174     }
 175   }
 176
 177  /*
 178   * See if we already have this DBCS/VBCS charset map loaded...
 179   */
 180
 181   for (vmap = vmap_cache; vmap; vmap = vmap->next)
 182   {
 183     if (vmap->encoding == encoding)
 184     {
 185       if (vmap->used > 0)
 186         vmap->used --;
 187       break;
 188     }
 189   }
 190
 191 #ifdef HAVE_PTHREAD_H
 192   pthread_mutex_unlock(&map_mutex);
 193 #endif /* HAVE_PTHREAD_H */
 194 }
 195
 196
 197 /*
 198  * '_cupsCharmapGet()' - Get a character set map.
 199  *
 200  * This code handles single-byte (SBCS), double-byte (DBCS), and
 201  * variable-byte (VBCS) character sets _without_ charset escapes...
 202  * This code does not handle multiple-byte character sets (MBCS)
 203  * (such as ISO-2022-JP) with charset switching via escapes...
 204  */
 205
 206 void *                                  /* O - Charset map pointer */
 207 _cupsCharmapGet(
 208     const cups_encoding_t encoding)     /* I - Encoding */
 209 {
 210   void  *charmap;                       /* Charset map pointer */
 211
 212
 213   DEBUG_printf(("7_cupsCharmapGet(encoding=%d)", encoding));
 214
 215  /*
 216   * Check for valid arguments...
 217   */
 218
 219   if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 220   {
 221     DEBUG_puts("8_cupsCharmapGet: Bad encoding, returning NULL!");
 222     return (NULL);
 223   }
 224
 225  /*
 226   * Lookup or get the charset map pointer and return...
 227   */
 228
 229 #ifdef HAVE_PTHREAD_H
 230   pthread_mutex_lock(&map_mutex);
 231 #endif /* HAVE_PTHREAD_H */
 232
 233   charmap = get_charmap(encoding);
 234
 235 #ifdef HAVE_PTHREAD_H
 236   pthread_mutex_unlock(&map_mutex);
 237 #endif /* HAVE_PTHREAD_H */
 238
 239   return (charmap);
 240 }
 241
 242
 243 /*
 244  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 245  *
 246  * This code handles single-byte (SBCS), double-byte (DBCS), and
 247  * variable-byte (VBCS) character sets _without_ charset escapes...
 248  * This code does not handle multiple-byte character sets (MBCS)
 249  * (such as ISO-2022-JP) with charset switching via escapes...
 250  */
 251
 252 int                                     /* O - Count or -1 on error */
 253 cupsCharsetToUTF8(
 254     cups_utf8_t *dest,                  /* O - Target string */
 255     const char *src,                    /* I - Source string */
 256     const int maxout,                   /* I - Max output */
 257     const cups_encoding_t encoding)     /* I - Encoding */
 258 {
 259   int   bytes;                          /* Number of bytes converted */
 260
 261
 262  /*
 263   * Check for valid arguments...
 264   */
 265
 266   DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
 267                 dest, src, maxout, encoding));
 268
 269   if (dest)
 270     *dest = '\0';
 271
 272   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 273   {
 274     DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
 275     return (-1);
 276   }
 277
 278  /*
 279   * Handle identity conversions...
 280   */
 281
 282   if (encoding == CUPS_UTF8 ||
 283       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 284   {
 285     strlcpy((char *)dest, src, maxout);
 286     return ((int)strlen((char *)dest));
 287   }
 288
 289  /*
 290   * Handle ISO-8859-1 to UTF-8 directly...
 291   */
 292
 293   if (encoding == CUPS_ISO8859_1)
 294   {
 295     int         ch;                     /* Character from string */
 296     cups_utf8_t *destptr,               /* Pointer into UTF-8 buffer */
 297                 *destend;               /* End of UTF-8 buffer */
 298
 299
 300     destptr = dest;
 301     destend = dest + maxout - 2;
 302
 303     while (*src && destptr < destend)
 304     {
 305       ch = *src++ & 255;
 306
 307       if (ch & 128)
 308       {
 309         *destptr++ = 0xc0 | (ch >> 6);
 310         *destptr++ = 0x80 | (ch & 0x3f);
 311       }
 312       else
 313         *destptr++ = ch;
 314     }
 315
 316     *destptr = '\0';
 317
 318     return ((int)(destptr - dest));
 319   }
 320
 321  /*
 322   * Convert input legacy charset to UTF-8...
 323   */
 324
 325 #ifdef HAVE_PTHREAD_H
 326   pthread_mutex_lock(&map_mutex);
 327 #endif /* HAVE_PTHREAD_H */
 328
 329   if (encoding < CUPS_ENCODING_SBCS_END)
 330     bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 331   else
 332     bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
 333
 334 #ifdef HAVE_PTHREAD_H
 335   pthread_mutex_unlock(&map_mutex);
 336 #endif /* HAVE_PTHREAD_H */
 337
 338   return (bytes);
 339 }
 340
 341
 342 /*
 343  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 344  *
 345  * This code handles single-byte (SBCS), double-byte (DBCS), and
 346  * variable-byte (VBCS) character sets _without_ charset escapes...
 347  * This code does not handle multiple-byte character sets (MBCS)
 348  * (such as ISO-2022-JP) with charset switching via escapes...
 349  */
 350
 351 int                                     /* O - Count or -1 on error */
 352 cupsUTF8ToCharset(
 353     char                  *dest,        /* O - Target string */
 354     const cups_utf8_t     *src,         /* I - Source string */
 355     const int             maxout,       /* I - Max output */
 356     const cups_encoding_t encoding)     /* I - Encoding */
 357 {
 358   int   bytes;                          /* Number of bytes converted */
 359
 360
 361  /*
 362   * Check for valid arguments...
 363   */
 364
 365   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 366   {
 367     if (dest)
 368       *dest = '\0';
 369
 370     return (-1);
 371   }
 372
 373  /*
 374   * Handle identity conversions...
 375   */
 376
 377   if (encoding == CUPS_UTF8 ||
 378       encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
 379   {
 380     strlcpy(dest, (char *)src, maxout);
 381     return ((int)strlen(dest));
 382   }
 383
 384  /*
 385   * Handle UTF-8 to ISO-8859-1 directly...
 386   */
 387
 388   if (encoding == CUPS_ISO8859_1)
 389   {
 390     int         ch;                     /* Character from string */
 391     char        *destptr,               /* Pointer into ISO-8859-1 buffer */
 392                 *destend;               /* End of ISO-8859-1 buffer */
 393
 394
 395     destptr = dest;
 396     destend = dest + maxout - 1;
 397
 398     while (*src && destptr < destend)
 399     {
 400       ch = *src++;
 401
 402       if ((ch & 0xe0) == 0xc0)
 403       {
 404         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 405
 406         if (ch < 256)
 407           *destptr++ = ch;
 408         else
 409           *destptr++ = '?';
 410       }
 411       else if ((ch & 0xf0) == 0xe0 ||
 412                (ch & 0xf8) == 0xf0)
 413         *destptr++ = '?';
 414       else if (!(ch & 0x80))
 415         *destptr++ = ch;
 416     }
 417
 418     *destptr = '\0';
 419
 420     return ((int)(destptr - dest));
 421   }
 422
 423  /*
 424   * Convert input UTF-8 to legacy charset...
 425   */
 426
 427 #ifdef HAVE_PTHREAD_H
 428   pthread_mutex_lock(&map_mutex);
 429 #endif /* HAVE_PTHREAD_H */
 430
 431   if (encoding < CUPS_ENCODING_SBCS_END)
 432     bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 433   else
 434     bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
 435
 436 #ifdef HAVE_PTHREAD_H
 437   pthread_mutex_unlock(&map_mutex);
 438 #endif /* HAVE_PTHREAD_H */
 439
 440   return (bytes);
 441 }
 442
 443
 444 /*
 445  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 446  *
 447  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 448  *
 449  *   UTF-32 char     UTF-8 char(s)
 450  *   --------------------------------------------------
 451  *        0 to 127 = 0xxxxxxx (US-ASCII)
 452  *     128 to 2047 = 110xxxxx 10yyyyyy
 453  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 454  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 455  *
 456  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 457  * which would convert to five- or six-octet UTF-8 sequences...
 458  */
 459
 460 int                                     /* O - Count or -1 on error */
 461 cupsUTF8ToUTF32(
 462     cups_utf32_t      *dest,            /* O - Target string */
 463     const cups_utf8_t *src,             /* I - Source string */
 464     const int         maxout)           /* I - Max output */
 465 {
 466   int           i;                      /* Looping variable */
 467   cups_utf8_t   ch;                     /* Character value */
 468   cups_utf8_t   next;                   /* Next character value */
 469   cups_utf32_t  ch32;                   /* UTF-32 character value */
 470
 471
 472  /*
 473   * Check for valid arguments and clear output...
 474   */
 475
 476   DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
 477                 src, maxout));
 478
 479   if (dest)
 480     *dest = 0;
 481
 482   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 483   {
 484     DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
 485
 486     return (-1);
 487   }
 488
 489  /*
 490   * Convert input UTF-8 to output UTF-32...
 491   */
 492
 493   for (i = maxout - 1; *src && i > 0; i --)
 494   {
 495     ch = *src++;
 496
 497    /*
 498     * Convert UTF-8 character(s) to UTF-32 character...
 499     */
 500
 501     if (!(ch & 0x80))
 502     {
 503      /*
 504       * One-octet UTF-8 <= 127 (US-ASCII)...
 505       */
 506
 507       *dest++ = ch;
 508
 509       DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
 510       continue;
 511     }
 512     else if ((ch & 0xe0) == 0xc0)
 513     {
 514      /*
 515       * Two-octet UTF-8 <= 2047 (Latin-x)...
 516       */
 517
 518       next = *src++;
 519       if ((next & 0xc0) != 0x80)
 520       {
 521         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 522
 523         return (-1);
 524       }
 525
 526       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 527
 528      /*
 529       * Check for non-shortest form (invalid UTF-8)...
 530       */
 531
 532       if (ch32 < 0x80)
 533       {
 534         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 535
 536         return (-1);
 537       }
 538
 539       *dest++ = ch32;
 540
 541       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
 542                     src[-2], src[-1], (unsigned)ch32));
 543     }
 544     else if ((ch & 0xf0) == 0xe0)
 545     {
 546      /*
 547       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 548       */
 549
 550       next = *src++;
 551       if ((next & 0xc0) != 0x80)
 552       {
 553         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 554
 555         return (-1);
 556       }
 557
 558       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 559
 560       next = *src++;
 561       if ((next & 0xc0) != 0x80)
 562       {
 563         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 564
 565         return (-1);
 566       }
 567
 568       ch32 = (ch32 << 6) | (next & 0x3f);
 569
 570      /*
 571       * Check for non-shortest form (invalid UTF-8)...
 572       */
 573
 574       if (ch32 < 0x800)
 575       {
 576         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 577
 578         return (-1);
 579       }
 580
 581       *dest++ = ch32;
 582
 583       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
 584                     src[-3], src[-2], src[-1], (unsigned)ch32));
 585     }
 586     else if ((ch & 0xf8) == 0xf0)
 587     {
 588      /*
 589       * Four-octet UTF-8...
 590       */
 591
 592       next = *src++;
 593       if ((next & 0xc0) != 0x80)
 594       {
 595         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 596
 597         return (-1);
 598       }
 599
 600       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 601
 602       next = *src++;
 603       if ((next & 0xc0) != 0x80)
 604       {
 605         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 606
 607         return (-1);
 608       }
 609
 610       ch32 = (ch32 << 6) | (next & 0x3f);
 611
 612       next = *src++;
 613       if ((next & 0xc0) != 0x80)
 614       {
 615         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 616
 617         return (-1);
 618       }
 619
 620       ch32 = (ch32 << 6) | (next & 0x3f);
 621
 622      /*
 623       * Check for non-shortest form (invalid UTF-8)...
 624       */
 625
 626       if (ch32 < 0x10000)
 627       {
 628         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 629
 630         return (-1);
 631       }
 632
 633       *dest++ = ch32;
 634
 635       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
 636                     src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
 637     }
 638     else
 639     {
 640      /*
 641       * More than 4-octet (invalid UTF-8 sequence)...
 642       */
 643
 644       DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 645
 646       return (-1);
 647     }
 648
 649    /*
 650     * Check for UTF-16 surrogate (illegal UTF-8)...
 651     */
 652
 653     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 654       return (-1);
 655   }
 656
 657   *dest = 0;
 658
 659   DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
 660
 661   return (maxout - 1 - i);
 662 }
 663
 664
 665 /*
 666  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 667  *
 668  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 669  *
 670  *   UTF-32 char     UTF-8 char(s)
 671  *   --------------------------------------------------
 672  *        0 to 127 = 0xxxxxxx (US-ASCII)
 673  *     128 to 2047 = 110xxxxx 10yyyyyy
 674  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 675  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 676  *
 677  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 678  * which would convert to five- or six-octet UTF-8 sequences...
 679  */
 680
 681 int                                     /* O - Count or -1 on error */
 682 cupsUTF32ToUTF8(
 683     cups_utf8_t        *dest,           /* O - Target string */
 684     const cups_utf32_t *src,            /* I - Source string */
 685     const int          maxout)          /* I - Max output */
 686 {
 687   cups_utf8_t   *start;                 /* Start of destination string */
 688   int           i;                      /* Looping variable */
 689   int           swap;                   /* Byte-swap input to output */
 690   cups_utf32_t  ch;                     /* Character value */
 691
 692
 693  /*
 694   * Check for valid arguments and clear output...
 695   */
 696
 697   DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
 698                 maxout));
 699
 700   if (dest)
 701     *dest = '\0';
 702
 703   if (!dest || !src || maxout < 1)
 704   {
 705     DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
 706
 707     return (-1);
 708   }
 709
 710  /*
 711   * Check for leading BOM in UTF-32 and inverted BOM...
 712   */
 713
 714   start = dest;
 715   swap  = *src == 0xfffe0000;
 716
 717   DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
 718
 719   if (*src == 0xfffe0000 || *src == 0xfeff)
 720     src ++;
 721
 722  /*
 723   * Convert input UTF-32 to output UTF-8...
 724   */
 725
 726   for (i = maxout - 1; *src && i > 0;)
 727   {
 728     ch = *src++;
 729
 730    /*
 731     * Byte swap input UTF-32, if necessary...
 732     * (only byte-swapping 24 of 32 bits)
 733     */
 734
 735     if (swap)
 736       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 737
 738    /*
 739     * Check for beyond Plane 16 (invalid UTF-32)...
 740     */
 741
 742     if (ch > 0x10ffff)
 743     {
 744       DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
 745
 746       return (-1);
 747     }
 748
 749    /*
 750     * Convert UTF-32 character to UTF-8 character(s)...
 751     */
 752
 753     if (ch < 0x80)
 754     {
 755      /*
 756       * One-octet UTF-8 <= 127 (US-ASCII)...
 757       */
 758
 759       *dest++ = (cups_utf8_t)ch;
 760       i --;
 761
 762       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
 763     }
 764     else if (ch < 0x800)
 765     {
 766      /*
 767       * Two-octet UTF-8 <= 2047 (Latin-x)...
 768       */
 769
 770       if (i < 2)
 771       {
 772         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
 773
 774         return (-1);
 775       }
 776
 777       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 778       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 779       i -= 2;
 780
 781       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
 782                     dest[-2], dest[-1]));
 783     }
 784     else if (ch < 0x10000)
 785     {
 786      /*
 787       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 788       */
 789
 790       if (i < 3)
 791       {
 792         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
 793
 794         return (-1);
 795       }
 796
 797       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 798       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 799       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 800       i -= 3;
 801
 802       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
 803                     dest[-3], dest[-2], dest[-1]));
 804     }
 805     else
 806     {
 807      /*
 808       * Four-octet UTF-8...
 809       */
 810
 811       if (i < 4)
 812       {
 813         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
 814
 815         return (-1);
 816       }
 817
 818       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 819       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 820       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 821       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 822       i -= 4;
 823
 824       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
 825                     (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
 826     }
 827   }
 828
 829   *dest = '\0';
 830
 831   DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
 832
 833   return ((int)(dest - start));
 834 }
 835
 836
 837 /*
 838  * 'compare_wide()' - Compare key for wide (VBCS) match.
 839  */
 840
 841 static int
 842 compare_wide(const void *k1,            /* I - Key char */
 843              const void *k2)            /* I - Map char */
 844 {
 845   cups_vbcs_t   key;                    /* Legacy key character */
 846   cups_vbcs_t   map;                    /* Legacy map character */
 847
 848
 849   key = *((cups_vbcs_t *)k1);
 850   map = ((_cups_wide2uni_t *)k2)->widechar;
 851
 852   return ((int)(key - map));
 853 }
 854
 855
 856 /*
 857  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 858  */
 859
 860 static int                              /* O - Count or -1 on error */
 861 conv_sbcs_to_utf8(
 862     cups_utf8_t           *dest,        /* O - Target string */
 863     const cups_sbcs_t     *src,         /* I - Source string */
 864     int                   maxout,       /* I - Max output */
 865     const cups_encoding_t encoding)     /* I - Encoding */
 866 {
 867   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 868   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 869   cups_sbcs_t   legchar;                /* Legacy character value */
 870   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 871                 *workptr;               /* Pointer into string */
 872
 873
 874  /*
 875   * Find legacy charset map in cache...
 876   */
 877
 878   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 879     return (-1);
 880
 881  /*
 882   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 883   */
 884
 885   work[0] = 0xfeff;
 886   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 887   {
 888     legchar = *src++;
 889
 890    /*
 891     * Convert ASCII verbatim (optimization)...
 892     */
 893
 894     if (legchar < 0x80)
 895       *workptr++ = (cups_utf32_t)legchar;
 896     else
 897     {
 898      /*
 899       * Convert unknown character to Replacement Character...
 900       */
 901
 902       crow = cmap->char2uni + legchar;
 903
 904       if (!*crow)
 905         *workptr++ = 0xfffd;
 906       else
 907         *workptr++ = (cups_utf32_t)*crow;
 908     }
 909   }
 910
 911   *workptr = 0;
 912
 913  /*
 914   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 915   */
 916
 917   cmap->used --;
 918
 919   return (cupsUTF32ToUTF8(dest, work, maxout));
 920 }
 921
 922
 923 /*
 924  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 925  */
 926
 927 static int                              /* O - Count or -1 on error */
 928 conv_utf8_to_sbcs(
 929     cups_sbcs_t           *dest,        /* O - Target string */
 930     const cups_utf8_t     *src,         /* I - Source string */
 931     int                   maxout,       /* I - Max output */
 932     const cups_encoding_t encoding)     /* I - Encoding */
 933 {
 934   cups_sbcs_t   *start;                 /* Start of destination string */
 935   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 936   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 937   cups_utf32_t  unichar;                /* Character value */
 938   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 939                 *workptr;               /* Pointer into string */
 940
 941
 942  /*
 943   * Find legacy charset map in cache...
 944   */
 945
 946   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 947     return (-1);
 948
 949  /*
 950   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 951   */
 952
 953   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 954     return (-1);
 955
 956  /*
 957   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 958   */
 959
 960   for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
 961   {
 962     unichar = *workptr++;
 963     if (!unichar)
 964       break;
 965
 966    /*
 967     * Convert ASCII verbatim (optimization)...
 968     */
 969
 970     if (unichar < 0x80)
 971     {
 972       *dest++ = (cups_sbcs_t)unichar;
 973       continue;
 974     }
 975
 976    /*
 977     * Convert unknown character to visible replacement...
 978     */
 979
 980     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 981
 982     if (srow)
 983       srow += (int)(unichar & 0xff);
 984
 985     if (!srow || !*srow)
 986       *dest++ = '?';
 987     else
 988       *dest++ = *srow;
 989   }
 990
 991   *dest = '\0';
 992
 993   cmap->used --;
 994
 995   return ((int)(dest - start));
 996 }
 997
 998
 999 /*
1000  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1001  */
1002
1003 static int                              /* O - Count or -1 on error */
1004 conv_utf8_to_vbcs(
1005     cups_sbcs_t           *dest,        /* O - Target string */
1006     const cups_utf8_t     *src,         /* I - Source string */
1007     int                   maxout,       /* I - Max output */
1008     const cups_encoding_t encoding)     /* I - Encoding */
1009 {
1010   cups_sbcs_t   *start;                 /* Start of destination string */
1011   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
1012   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1013   cups_utf32_t  unichar;                /* Character value */
1014   cups_vbcs_t   legchar;                /* Legacy character value */
1015   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1016                 *workptr;               /* Pointer into string */
1017
1018
1019   DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
1020                 "encoding=%d)", dest, src, maxout, encoding));
1021
1022  /*
1023   * Find legacy charset map in cache...
1024   */
1025
1026   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1027   {
1028     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
1029
1030     return (-1);
1031   }
1032
1033  /*
1034   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1035   */
1036
1037   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
1038   {
1039     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
1040
1041     return (-1);
1042   }
1043
1044  /*
1045   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1046   */
1047
1048   for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
1049   {
1050     unichar = *workptr++;
1051
1052    /*
1053     * Convert ASCII verbatim (optimization)...
1054     */
1055
1056     if (unichar < 0x80)
1057     {
1058       *dest++ = (cups_sbcs_t)unichar;
1059
1060       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
1061                     dest[-1]));
1062
1063       continue;
1064     }
1065
1066    /*
1067     * Convert unknown character to visible replacement...
1068     */
1069
1070     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1071
1072     if (vrow)
1073       vrow += (int)(unichar & 0xff);
1074
1075     if (!vrow || !*vrow)
1076       legchar = (cups_vbcs_t)'?';
1077     else
1078       legchar = (cups_vbcs_t)*vrow;
1079
1080    /*
1081     * Save n-byte legacy character...
1082     */
1083
1084     if (legchar > 0xffffff)
1085     {
1086       if (maxout < 5)
1087       {
1088         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
1089
1090         return (-1);
1091       }
1092
1093       *dest++ = (cups_sbcs_t)(legchar >> 24);
1094       *dest++ = (cups_sbcs_t)(legchar >> 16);
1095       *dest++ = (cups_sbcs_t)(legchar >> 8);
1096       *dest++ = (cups_sbcs_t)legchar;
1097
1098       maxout -= 3;
1099
1100       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
1101                     (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
1102     }
1103     else if (legchar > 0xffff)
1104     {
1105       if (maxout < 4)
1106       {
1107         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
1108
1109         return (-1);
1110       }
1111
1112       *dest++ = (cups_sbcs_t)(legchar >> 16);
1113       *dest++ = (cups_sbcs_t)(legchar >> 8);
1114       *dest++ = (cups_sbcs_t)legchar;
1115
1116       maxout -= 2;
1117
1118       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
1119                     (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
1120     }
1121     else if (legchar > 0xff)
1122     {
1123       *dest++ = (cups_sbcs_t)(legchar >> 8);
1124       *dest++ = (cups_sbcs_t)legchar;
1125
1126       maxout --;
1127
1128       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
1129                     (unsigned)unichar, dest[-2], dest[-1]));
1130     }
1131     else
1132     {
1133       *dest++ = legchar;
1134
1135       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
1136                     (unsigned)unichar, dest[-1]));
1137     }
1138   }
1139
1140   *dest = '\0';
1141
1142   vmap->used --;
1143
1144   DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
1145                 (int)(dest - start)));
1146
1147   return ((int)(dest - start));
1148 }
1149
1150
1151 /*
1152  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1153  */
1154
1155 static int                              /* O - Count or -1 on error */
1156 conv_vbcs_to_utf8(
1157     cups_utf8_t           *dest,        /* O - Target string */
1158     const cups_sbcs_t     *src,         /* I - Source string */
1159     int                   maxout,       /* I - Max output */
1160     const cups_encoding_t encoding)     /* I - Encoding */
1161 {
1162   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1163   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1164   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1165   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1166   cups_vbcs_t   legchar;                /* Legacy character value */
1167   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1168                 *workptr;               /* Pointer into string */
1169
1170
1171  /*
1172   * Find legacy charset map in cache...
1173   */
1174
1175   DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
1176                 dest, src, maxout, encoding));
1177
1178   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1179   {
1180     DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1181
1182     return (-1);
1183   }
1184
1185  /*
1186   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1187   */
1188
1189   work[0] = 0xfeff;
1190   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1191   {
1192     legchar  = *src++;
1193     leadchar = (cups_sbcs_t)legchar;
1194
1195    /*
1196     * Convert ASCII verbatim (optimization)...
1197     */
1198
1199     if (legchar < 0x80)
1200     {
1201       *workptr++ = (cups_utf32_t)legchar;
1202
1203       DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
1204                     (unsigned)legchar));
1205       continue;
1206     }
1207
1208    /*
1209     * Convert 2-byte legacy character...
1210     */
1211
1212     if (vmap->lead2char[(int)leadchar] == leadchar)
1213     {
1214       if (!*src)
1215       {
1216         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
1217
1218         return (-1);
1219       }
1220
1221       legchar = (legchar << 8) | *src++;
1222
1223      /*
1224       * Convert unknown character to Replacement Character...
1225       */
1226
1227       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1228       if (crow)
1229         crow += (int) (legchar & 0xff);
1230
1231       if (!crow || !*crow)
1232         *workptr++ = 0xfffd;
1233       else
1234         *workptr++ = (cups_utf32_t)*crow;
1235
1236       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
1237                     src[-2], src[-1], (unsigned)workptr[-1]));
1238       continue;
1239     }
1240
1241    /*
1242     * Fetch 3-byte or 4-byte legacy character...
1243     */
1244
1245     if (vmap->lead3char[(int)leadchar] == leadchar)
1246     {
1247       if (!*src || !src[1])
1248       {
1249         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
1250
1251         return (-1);
1252       }
1253
1254       legchar = (legchar << 8) | *src++;
1255       legchar = (legchar << 8) | *src++;
1256     }
1257     else if (vmap->lead4char[(int)leadchar] == leadchar)
1258     {
1259       if (!*src || !src[1] || !src[2])
1260       {
1261         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
1262
1263         return (-1);
1264       }
1265
1266       legchar = (legchar << 8) | *src++;
1267       legchar = (legchar << 8) | *src++;
1268       legchar = (legchar << 8) | *src++;
1269     }
1270     else
1271     {
1272       DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
1273
1274       return (-1);
1275     }
1276
1277    /*
1278     * Find 3-byte or 4-byte legacy character...
1279     */
1280
1281     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1282                                            vmap->wide2uni,
1283                                            vmap->widecount,
1284                                            sizeof(_cups_wide2uni_t),
1285                                            compare_wide);
1286
1287    /*
1288     * Convert unknown character to Replacement Character...
1289     */
1290
1291     if (!wide2uni || !wide2uni->unichar)
1292       *workptr++ = 0xfffd;
1293     else
1294       *workptr++ = wide2uni->unichar;
1295
1296     if (vmap->lead3char[(int)leadchar] == leadchar)
1297       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
1298                     src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1299     else
1300       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
1301                     src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1302   }
1303
1304   *workptr = 0;
1305
1306   vmap->used --;
1307
1308   DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
1309                 (int)(workptr - work)));
1310
1311  /*
1312   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1313   */
1314
1315   return (cupsUTF32ToUTF8(dest, work, maxout));
1316 }
1317
1318
1319 /*
1320  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1321  */
1322
1323 static void
1324 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1325 {
1326   int           i;                      /* Looping variable */
1327
1328
1329   for (i = 0; i < 256; i ++)
1330     if (cmap->uni2char[i])
1331       free(cmap->uni2char[i]);
1332
1333   free(cmap);
1334 }
1335
1336
1337 /*
1338  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1339  */
1340
1341 static void
1342 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1343 {
1344   int           i;                      /* Looping variable */
1345
1346
1347   for (i = 0; i < 256; i ++)
1348     if (vmap->char2uni[i])
1349       free(vmap->char2uni[i]);
1350
1351   for (i = 0; i < 256; i ++)
1352     if (vmap->uni2char[i])
1353       free(vmap->uni2char[i]);
1354
1355   if (vmap->wide2uni)
1356     free(vmap->wide2uni);
1357
1358   free(vmap);
1359 }
1360
1361
1362 /*
1363  * 'get_charmap()' - Lookup or get a character set map (private).
1364  *
1365  * This code handles single-byte (SBCS), double-byte (DBCS), and
1366  * variable-byte (VBCS) character sets _without_ charset escapes...
1367  * This code does not handle multiple-byte character sets (MBCS)
1368  * (such as ISO-2022-JP) with charset switching via escapes...
1369  */
1370
1371
1372 static void *                           /* O - Charset map pointer */
1373 get_charmap(
1374     const cups_encoding_t encoding)     /* I - Encoding */
1375 {
1376   char          filename[1024];         /* Filename for charset map file */
1377   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1378
1379
1380   DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
1381
1382  /*
1383   * Get the data directory and charset map name...
1384   */
1385
1386   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1387            cg->cups_datadir, _cupsEncodingName(encoding));
1388
1389   DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
1390
1391  /*
1392   * Read charset map input file into cache...
1393   */
1394
1395   if (encoding < CUPS_ENCODING_SBCS_END)
1396     return (get_sbcs_charmap(encoding, filename));
1397   else if (encoding < CUPS_ENCODING_VBCS_END)
1398     return (get_vbcs_charmap(encoding, filename));
1399   else
1400     return (NULL);
1401 }
1402
1403
1404 /*
1405  * 'get_charmap_count()' - Count lines in a charmap file.
1406  */
1407
1408 static int                              /* O - Count or -1 on error */
1409 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1410 {
1411   int   count;                          /* Number of lines */
1412   char  line[256];                      /* Line from input map file */
1413
1414
1415  /*
1416   * Count lines in map input file...
1417   */
1418
1419   count = 0;
1420
1421   while (cupsFileGets(fp, line, sizeof(line)))
1422     if (line[0] == '0')
1423       count ++;
1424
1425  /*
1426   * Return the number of lines...
1427   */
1428
1429   if (count > 0)
1430     return (count);
1431   else
1432     return (-1);
1433 }
1434
1435
1436 /*
1437  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1438  */
1439
1440 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1441 get_sbcs_charmap(
1442     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1443     const char            *filename)    /* I - Charmap Filename */
1444 {
1445   unsigned long legchar;                /* Legacy character value */
1446   cups_utf32_t  unichar;                /* Unicode character value */
1447   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1448   cups_file_t   *fp;                    /* Charset map file pointer */
1449   char          *s;                     /* Line parsing pointer */
1450   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1451   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1452   char          line[256];              /* Line from charset map file */
1453
1454
1455  /*
1456   * See if we already have this SBCS charset map loaded...
1457   */
1458
1459   DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
1460                 filename));
1461
1462   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1463   {
1464     if (cmap->encoding == encoding)
1465     {
1466       cmap->used ++;
1467       DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
1468
1469       return ((void *)cmap);
1470     }
1471   }
1472
1473  /*
1474   * Open SBCS charset map input file...
1475   */
1476
1477   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1478   {
1479     DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
1480
1481     return (NULL);
1482   }
1483
1484  /*
1485   * Allocate memory for SBCS charset map...
1486   */
1487
1488   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1489   {
1490     cupsFileClose(fp);
1491     DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1492
1493     return (NULL);
1494   }
1495
1496   cmap->used ++;
1497   cmap->encoding = encoding;
1498
1499  /*
1500   * Save SBCS charset map into memory for transcoding...
1501   */
1502
1503   while (cupsFileGets(fp, line, sizeof(line)))
1504   {
1505     if (line[0] != '0')
1506       continue;
1507
1508     legchar = strtol(line, &s, 16);
1509     if (legchar < 0 || legchar > 0xff)
1510       goto sbcs_error;
1511
1512     unichar = strtol(s, NULL, 16);
1513     if (unichar < 0 || unichar > 0x10ffff)
1514       goto sbcs_error;
1515
1516    /*
1517     * Save legacy to Unicode mapping in direct lookup table...
1518     */
1519
1520     crow  = cmap->char2uni + legchar;
1521     *crow = (cups_ucs2_t)(unichar & 0xffff);
1522
1523    /*
1524     * Save Unicode to legacy mapping in indirect lookup table...
1525     */
1526
1527     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1528     if (!srow)
1529     {
1530       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1531       if (!srow)
1532         goto sbcs_error;
1533
1534       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1535     }
1536
1537     srow += unichar & 0xff;
1538
1539    /*
1540     * Convert Replacement Character to visible replacement...
1541     */
1542
1543     if (unichar == 0xfffd)
1544       legchar = (unsigned long)'?';
1545
1546    /*
1547     * First (oldest) legacy character uses Unicode mapping cell...
1548     */
1549
1550     if (!*srow)
1551       *srow = (cups_sbcs_t)legchar;
1552   }
1553
1554   cupsFileClose(fp);
1555
1556  /*
1557   * Add it to the cache and return...
1558   */
1559
1560   cmap->next = cmap_cache;
1561   cmap_cache = cmap;
1562
1563   DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
1564
1565   return (cmap);
1566
1567  /*
1568   * If we get here, there was an error in the cmap file...
1569   */
1570
1571   sbcs_error:
1572
1573   free_sbcs_charmap(cmap);
1574
1575   cupsFileClose(fp);
1576
1577   DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
1578
1579   return (NULL);
1580 }
1581
1582
1583 /*
1584  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1585  */
1586
1587 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1588 get_vbcs_charmap(
1589     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1590     const char            *filename)    /* I - Charmap Filename */
1591 {
1592   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1593   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1594   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1595   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1596   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1597   unsigned long legchar;                /* Legacy character value */
1598   cups_utf32_t  unichar;                /* Unicode character value */
1599   int           mapcount;               /* Count of lines in charmap file */
1600   cups_file_t   *fp;                    /* Charset map file pointer */
1601   char          *s;                     /* Line parsing pointer */
1602   char          line[256];              /* Line from charset map file */
1603   int           i;                      /* Loop variable */
1604   int           legacy;                 /* 32-bit legacy char */
1605
1606
1607   DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1608                 encoding, filename));
1609
1610  /*
1611   * See if we already have this DBCS/VBCS charset map loaded...
1612   */
1613
1614   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1615   {
1616     if (vmap->encoding == encoding)
1617     {
1618       vmap->used ++;
1619       DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
1620
1621       return ((void *)vmap);
1622     }
1623   }
1624
1625  /*
1626   * Open VBCS charset map input file...
1627   */
1628
1629   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1630   {
1631     DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
1632
1633     return (NULL);
1634   }
1635
1636  /*
1637   * Count lines in charmap file...
1638   */
1639
1640   if ((mapcount = get_charmap_count(fp)) <= 0)
1641   {
1642     DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
1643
1644     cupsFileClose(fp);
1645
1646     return (NULL);
1647   }
1648
1649   DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
1650
1651  /*
1652   * Allocate memory for DBCS/VBCS charset map...
1653   */
1654
1655   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1656   {
1657     DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
1658
1659     cupsFileClose(fp);
1660
1661     return (NULL);
1662   }
1663
1664   vmap->used ++;
1665   vmap->encoding = encoding;
1666
1667  /*
1668   * Save DBCS/VBCS charset map into memory for transcoding...
1669   */
1670
1671   wide2uni = NULL;
1672
1673   cupsFileRewind(fp);
1674
1675   i      = 0;
1676   legacy = 0;
1677
1678   while (cupsFileGets(fp, line, sizeof(line)))
1679   {
1680     if (line[0] != '0')
1681       continue;
1682
1683     legchar = strtoul(line, &s, 16);
1684     if (legchar == ULONG_MAX)
1685       goto vbcs_error;
1686
1687     unichar = strtol(s, NULL, 16);
1688     if (unichar < 0 || unichar > 0x10ffff)
1689       goto vbcs_error;
1690
1691     i ++;
1692
1693     DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1694                   legchar, (unsigned)unichar));
1695
1696    /*
1697     * Save lead char of 2/3/4-byte legacy char...
1698     */
1699
1700     if (legchar > 0xffffff)
1701     {
1702       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1703       vmap->lead4char[leadchar] = leadchar;
1704     }
1705     else if (legchar > 0xffff)
1706     {
1707       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1708       vmap->lead3char[leadchar] = leadchar;
1709     }
1710     else
1711     {
1712       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1713       vmap->lead2char[leadchar] = leadchar;
1714     }
1715
1716    /*
1717     * Save Legacy to Unicode mapping...
1718     */
1719
1720     if (legchar <= 0xffff)
1721     {
1722      /*
1723       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1724       */
1725
1726       crow = vmap->char2uni[(int)leadchar];
1727       if (!crow)
1728       {
1729         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1730         if (!crow)
1731           goto vbcs_error;
1732
1733         vmap->char2uni[(int)leadchar] = crow;
1734       }
1735
1736       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1737     }
1738     else
1739     {
1740      /*
1741       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1742       */
1743
1744       if (!legacy)
1745       {
1746         legacy          = 1;
1747         vmap->widecount = (mapcount - i + 1);
1748         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1749                                                      sizeof(_cups_wide2uni_t));
1750         if (!wide2uni)
1751           goto vbcs_error;
1752
1753         vmap->wide2uni = wide2uni;
1754       }
1755
1756       wide2uni->widechar = (cups_vbcs_t)legchar;
1757       wide2uni->unichar  = (cups_ucs2_t)unichar;
1758       wide2uni ++;
1759     }
1760
1761    /*
1762     * Save Unicode to legacy mapping in indirect lookup table...
1763     */
1764
1765     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1766     if (!vrow)
1767     {
1768       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1769       if (!vrow)
1770         goto vbcs_error;
1771
1772       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1773     }
1774
1775     vrow += (int)(unichar & 0xff);
1776
1777    /*
1778     * Convert Replacement Character to visible replacement...
1779     */
1780
1781     if (unichar == 0xfffd)
1782       legchar = (unsigned long)'?';
1783
1784    /*
1785     * First (oldest) legacy character uses Unicode mapping cell...
1786     */
1787
1788     if (!*vrow)
1789       *vrow = (cups_vbcs_t)legchar;
1790   }
1791
1792   vmap->charcount = (i - vmap->widecount);
1793
1794   cupsFileClose(fp);
1795
1796  /*
1797   * Add it to the cache and return...
1798   */
1799
1800   vmap->next = vmap_cache;
1801   vmap_cache = vmap;
1802
1803   DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
1804
1805   return (vmap);
1806
1807  /*
1808   * If we get here, the file contains errors...
1809   */
1810
1811   vbcs_error:
1812
1813   free_vbcs_charmap(vmap);
1814
1815   cupsFileClose(fp);
1816
1817   DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
1818
1819   return (NULL);
1820 }
1821
1822
1823 /*
1824  * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1825  */