cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
   3  *
   4  *   Transcoding support for CUPS.
   5  *
   6  *   Copyright 2007-2010 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  *   This file is subject to the Apple OS-Developed Software exception.
  16  *
  17  * Contents:
  18  *
  19  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  20  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  21  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  22  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  23  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  24  */
  25
  26 /*
  27  * Include necessary headers...
  28  */
  29
  30 #include "cups-private.h"
  31 #include <limits.h>
  32 #include <time.h>
  33 #ifdef HAVE_ICONV_H
  34 #  include <iconv.h>
  35 #endif /* HAVE_ICONV_H */
  36
  37
  38 /*
  39  * Local globals...
  40  */
  41
  42 #ifdef HAVE_ICONV_H
  43 static _cups_mutex_t    map_mutex = _CUPS_MUTEX_INITIALIZER;
  44                                         /* Mutex to control access to maps */
  45 static iconv_t          map_from_utf8 = (iconv_t)-1;
  46                                         /* Convert from UTF-8 to charset */
  47 static iconv_t          map_to_utf8 = (iconv_t)-1;
  48                                         /* Convert from charset to UTF-8 */
  49 static cups_encoding_t  map_encoding = CUPS_AUTO_ENCODING;
  50                                         /* Which charset is cached */
  51 #endif /* HAVE_ICONV_H */
  52
  53
  54 /*
  55  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
  56  */
  57
  58 void
  59 _cupsCharmapFlush(void)
  60 {
  61 #ifdef HAVE_ICONV_H
  62   if (map_from_utf8 != (iconv_t)-1)
  63   {
  64     iconv_close(map_from_utf8);
  65     map_from_utf8 = (iconv_t)-1;
  66   }
  67
  68   if (map_to_utf8 != (iconv_t)-1)
  69   {
  70     iconv_close(map_to_utf8);
  71     map_to_utf8 = (iconv_t)-1;
  72   }
  73
  74   map_encoding = CUPS_AUTO_ENCODING;
  75 #endif /* HAVE_ICONV_H */
  76 }
  77
  78
  79 /*
  80  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
  81  */
  82
  83 int                                     /* O - Count or -1 on error */
  84 cupsCharsetToUTF8(
  85     cups_utf8_t           *dest,        /* O - Target string */
  86     const char            *src,         /* I - Source string */
  87     const int             maxout,       /* I - Max output */
  88     const cups_encoding_t encoding)     /* I - Encoding */
  89 {
  90   cups_utf8_t   *destptr;               /* Pointer into UTF-8 buffer */
  91   int           bytes;                  /* Number of bytes converted */
  92   size_t        srclen,                 /* Length of source string */
  93                 outBytesLeft;           /* Bytes remaining in output buffer */
  94
  95
  96  /*
  97   * Check for valid arguments...
  98   */
  99
 100   DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
 101                 dest, src, maxout, encoding));
 102
 103   if (!dest || !src || maxout < 1)
 104   {
 105     if (dest)
 106       *dest = '\0';
 107
 108     DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
 109     return (-1);
 110   }
 111
 112  /*
 113   * Handle identity conversions...
 114   */
 115
 116   if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
 117       encoding >= CUPS_ENCODING_VBCS_END)
 118   {
 119     strlcpy((char *)dest, src, maxout);
 120     return ((int)strlen((char *)dest));
 121   }
 122
 123  /*
 124   * Handle ISO-8859-1 to UTF-8 directly...
 125   */
 126
 127   destptr = dest;
 128
 129   if (encoding == CUPS_ISO8859_1)
 130   {
 131     int         ch;                     /* Character from string */
 132     cups_utf8_t *destend;               /* End of UTF-8 buffer */
 133
 134
 135     destend = dest + maxout - 2;
 136
 137     while (*src && destptr < destend)
 138     {
 139       ch = *src++ & 255;
 140
 141       if (ch & 128)
 142       {
 143         *destptr++ = 0xc0 | (ch >> 6);
 144         *destptr++ = 0x80 | (ch & 0x3f);
 145       }
 146       else
 147         *destptr++ = ch;
 148     }
 149
 150     *destptr = '\0';
 151
 152     return ((int)(destptr - dest));
 153   }
 154
 155  /*
 156   * Convert input legacy charset to UTF-8...
 157   */
 158
 159 #ifdef HAVE_ICONV_H
 160   _cupsMutexLock(&map_mutex);
 161
 162   if (map_encoding != encoding)
 163   {
 164     _cupsCharmapFlush();
 165
 166     map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
 167     map_to_utf8   = iconv_open("UTF-8", _cupsEncodingName(encoding));
 168     map_encoding     = encoding;
 169   }
 170
 171   if (map_to_utf8 != (iconv_t)-1)
 172   {
 173     srclen       = strlen(src);
 174     outBytesLeft = maxout - 1;
 175     bytes        = (int)iconv(map_to_utf8, (char **)&src, &srclen,
 176                               (char **)&destptr, &outBytesLeft);
 177     *destptr     = '\0';
 178
 179     _cupsMutexUnlock(&map_mutex);
 180
 181     return ((int)(destptr - dest));
 182   }
 183
 184   _cupsMutexUnlock(&map_mutex);
 185 #endif /* HAVE_ICONV_H */
 186
 187  /*
 188   * No iconv() support, so error out...
 189   */
 190
 191   *destptr = '\0';
 192
 193   return (-1);
 194 }
 195
 196
 197 /*
 198  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 199  */
 200
 201 int                                     /* O - Count or -1 on error */
 202 cupsUTF8ToCharset(
 203     char                  *dest,        /* O - Target string */
 204     const cups_utf8_t     *src,         /* I - Source string */
 205     const int             maxout,       /* I - Max output */
 206     const cups_encoding_t encoding)     /* I - Encoding */
 207 {
 208   char          *destptr;               /* Pointer into destination */
 209   int           bytes;                  /* Number of bytes converted */
 210   size_t        srclen,                 /* Length of source string */
 211                 outBytesLeft;           /* Bytes remaining in output buffer */
 212
 213
 214  /*
 215   * Check for valid arguments...
 216   */
 217
 218   if (!dest || !src || maxout < 1)
 219   {
 220     if (dest)
 221       *dest = '\0';
 222
 223     return (-1);
 224   }
 225
 226  /*
 227   * Handle identity conversions...
 228   */
 229
 230   if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
 231       encoding >= CUPS_ENCODING_VBCS_END)
 232   {
 233     strlcpy(dest, (char *)src, maxout);
 234     return ((int)strlen(dest));
 235   }
 236
 237  /*
 238   * Handle UTF-8 to ISO-8859-1 directly...
 239   */
 240
 241   destptr = dest;
 242
 243   if (encoding == CUPS_ISO8859_1)
 244   {
 245     int         ch;                     /* Character from string */
 246     char        *destend;               /* End of ISO-8859-1 buffer */
 247
 248
 249     destend = dest + maxout - 1;
 250
 251     while (*src && destptr < destend)
 252     {
 253       ch = *src++;
 254
 255       if ((ch & 0xe0) == 0xc0)
 256       {
 257         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 258
 259         if (ch < 256)
 260           *destptr++ = ch;
 261         else
 262           *destptr++ = '?';
 263       }
 264       else if ((ch & 0xf0) == 0xe0 ||
 265                (ch & 0xf8) == 0xf0)
 266         *destptr++ = '?';
 267       else if (!(ch & 0x80))
 268         *destptr++ = ch;
 269     }
 270
 271     *destptr = '\0';
 272
 273     return ((int)(destptr - dest));
 274   }
 275
 276 #ifdef HAVE_ICONV_H
 277  /*
 278   * Convert input UTF-8 to legacy charset...
 279   */
 280
 281   _cupsMutexLock(&map_mutex);
 282
 283   if (map_encoding != encoding)
 284   {
 285     _cupsCharmapFlush();
 286
 287     map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
 288     map_to_utf8   = iconv_open("UTF-8", _cupsEncodingName(encoding));
 289     map_encoding  = encoding;
 290   }
 291
 292   if (map_from_utf8 != (iconv_t)-1)
 293   {
 294     srclen       = strlen((char *)src);
 295     outBytesLeft = maxout - 1;
 296     bytes        = (int)iconv(map_from_utf8, (char **)&src, &srclen,
 297                               &destptr, &outBytesLeft);
 298     *destptr     = '\0';
 299
 300     _cupsMutexUnlock(&map_mutex);
 301
 302     return ((int)(destptr - dest));
 303   }
 304
 305   _cupsMutexUnlock(&map_mutex);
 306 #endif /* HAVE_ICONV_H */
 307
 308  /*
 309   * No iconv() support, so error out...
 310   */
 311
 312   *destptr = '\0';
 313
 314   return (-1);
 315 }
 316
 317
 318 /*
 319  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 320  *
 321  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 322  *
 323  *   UTF-32 char     UTF-8 char(s)
 324  *   --------------------------------------------------
 325  *        0 to 127 = 0xxxxxxx (US-ASCII)
 326  *     128 to 2047 = 110xxxxx 10yyyyyy
 327  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 328  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 329  *
 330  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 331  * which would convert to five- or six-octet UTF-8 sequences...
 332  */
 333
 334 int                                     /* O - Count or -1 on error */
 335 cupsUTF8ToUTF32(
 336     cups_utf32_t      *dest,            /* O - Target string */
 337     const cups_utf8_t *src,             /* I - Source string */
 338     const int         maxout)           /* I - Max output */
 339 {
 340   int           i;                      /* Looping variable */
 341   cups_utf8_t   ch;                     /* Character value */
 342   cups_utf8_t   next;                   /* Next character value */
 343   cups_utf32_t  ch32;                   /* UTF-32 character value */
 344
 345
 346  /*
 347   * Check for valid arguments and clear output...
 348   */
 349
 350   DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
 351                 src, maxout));
 352
 353   if (dest)
 354     *dest = 0;
 355
 356   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 357   {
 358     DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
 359
 360     return (-1);
 361   }
 362
 363  /*
 364   * Convert input UTF-8 to output UTF-32...
 365   */
 366
 367   for (i = maxout - 1; *src && i > 0; i --)
 368   {
 369     ch = *src++;
 370
 371    /*
 372     * Convert UTF-8 character(s) to UTF-32 character...
 373     */
 374
 375     if (!(ch & 0x80))
 376     {
 377      /*
 378       * One-octet UTF-8 <= 127 (US-ASCII)...
 379       */
 380
 381       *dest++ = ch;
 382
 383       DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
 384       continue;
 385     }
 386     else if ((ch & 0xe0) == 0xc0)
 387     {
 388      /*
 389       * Two-octet UTF-8 <= 2047 (Latin-x)...
 390       */
 391
 392       next = *src++;
 393       if ((next & 0xc0) != 0x80)
 394       {
 395         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 396
 397         return (-1);
 398       }
 399
 400       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 401
 402      /*
 403       * Check for non-shortest form (invalid UTF-8)...
 404       */
 405
 406       if (ch32 < 0x80)
 407       {
 408         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 409
 410         return (-1);
 411       }
 412
 413       *dest++ = ch32;
 414
 415       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
 416                     src[-2], src[-1], (unsigned)ch32));
 417     }
 418     else if ((ch & 0xf0) == 0xe0)
 419     {
 420      /*
 421       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 422       */
 423
 424       next = *src++;
 425       if ((next & 0xc0) != 0x80)
 426       {
 427         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 428
 429         return (-1);
 430       }
 431
 432       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 433
 434       next = *src++;
 435       if ((next & 0xc0) != 0x80)
 436       {
 437         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 438
 439         return (-1);
 440       }
 441
 442       ch32 = (ch32 << 6) | (next & 0x3f);
 443
 444      /*
 445       * Check for non-shortest form (invalid UTF-8)...
 446       */
 447
 448       if (ch32 < 0x800)
 449       {
 450         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 451
 452         return (-1);
 453       }
 454
 455       *dest++ = ch32;
 456
 457       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
 458                     src[-3], src[-2], src[-1], (unsigned)ch32));
 459     }
 460     else if ((ch & 0xf8) == 0xf0)
 461     {
 462      /*
 463       * Four-octet UTF-8...
 464       */
 465
 466       next = *src++;
 467       if ((next & 0xc0) != 0x80)
 468       {
 469         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 470
 471         return (-1);
 472       }
 473
 474       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 475
 476       next = *src++;
 477       if ((next & 0xc0) != 0x80)
 478       {
 479         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 480
 481         return (-1);
 482       }
 483
 484       ch32 = (ch32 << 6) | (next & 0x3f);
 485
 486       next = *src++;
 487       if ((next & 0xc0) != 0x80)
 488       {
 489         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 490
 491         return (-1);
 492       }
 493
 494       ch32 = (ch32 << 6) | (next & 0x3f);
 495
 496      /*
 497       * Check for non-shortest form (invalid UTF-8)...
 498       */
 499
 500       if (ch32 < 0x10000)
 501       {
 502         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 503
 504         return (-1);
 505       }
 506
 507       *dest++ = ch32;
 508
 509       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
 510                     src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
 511     }
 512     else
 513     {
 514      /*
 515       * More than 4-octet (invalid UTF-8 sequence)...
 516       */
 517
 518       DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 519
 520       return (-1);
 521     }
 522
 523    /*
 524     * Check for UTF-16 surrogate (illegal UTF-8)...
 525     */
 526
 527     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 528       return (-1);
 529   }
 530
 531   *dest = 0;
 532
 533   DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
 534
 535   return (maxout - 1 - i);
 536 }
 537
 538
 539 /*
 540  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 541  *
 542  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 543  *
 544  *   UTF-32 char     UTF-8 char(s)
 545  *   --------------------------------------------------
 546  *        0 to 127 = 0xxxxxxx (US-ASCII)
 547  *     128 to 2047 = 110xxxxx 10yyyyyy
 548  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 549  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 550  *
 551  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 552  * which would convert to five- or six-octet UTF-8 sequences...
 553  */
 554
 555 int                                     /* O - Count or -1 on error */
 556 cupsUTF32ToUTF8(
 557     cups_utf8_t        *dest,           /* O - Target string */
 558     const cups_utf32_t *src,            /* I - Source string */
 559     const int          maxout)          /* I - Max output */
 560 {
 561   cups_utf8_t   *start;                 /* Start of destination string */
 562   int           i;                      /* Looping variable */
 563   int           swap;                   /* Byte-swap input to output */
 564   cups_utf32_t  ch;                     /* Character value */
 565
 566
 567  /*
 568   * Check for valid arguments and clear output...
 569   */
 570
 571   DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
 572                 maxout));
 573
 574   if (dest)
 575     *dest = '\0';
 576
 577   if (!dest || !src || maxout < 1)
 578   {
 579     DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
 580
 581     return (-1);
 582   }
 583
 584  /*
 585   * Check for leading BOM in UTF-32 and inverted BOM...
 586   */
 587
 588   start = dest;
 589   swap  = *src == 0xfffe0000;
 590
 591   DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
 592
 593   if (*src == 0xfffe0000 || *src == 0xfeff)
 594     src ++;
 595
 596  /*
 597   * Convert input UTF-32 to output UTF-8...
 598   */
 599
 600   for (i = maxout - 1; *src && i > 0;)
 601   {
 602     ch = *src++;
 603
 604    /*
 605     * Byte swap input UTF-32, if necessary...
 606     * (only byte-swapping 24 of 32 bits)
 607     */
 608
 609     if (swap)
 610       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 611
 612    /*
 613     * Check for beyond Plane 16 (invalid UTF-32)...
 614     */
 615
 616     if (ch > 0x10ffff)
 617     {
 618       DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
 619
 620       return (-1);
 621     }
 622
 623    /*
 624     * Convert UTF-32 character to UTF-8 character(s)...
 625     */
 626
 627     if (ch < 0x80)
 628     {
 629      /*
 630       * One-octet UTF-8 <= 127 (US-ASCII)...
 631       */
 632
 633       *dest++ = (cups_utf8_t)ch;
 634       i --;
 635
 636       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
 637     }
 638     else if (ch < 0x800)
 639     {
 640      /*
 641       * Two-octet UTF-8 <= 2047 (Latin-x)...
 642       */
 643
 644       if (i < 2)
 645       {
 646         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
 647
 648         return (-1);
 649       }
 650
 651       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 652       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 653       i -= 2;
 654
 655       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
 656                     dest[-2], dest[-1]));
 657     }
 658     else if (ch < 0x10000)
 659     {
 660      /*
 661       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 662       */
 663
 664       if (i < 3)
 665       {
 666         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
 667
 668         return (-1);
 669       }
 670
 671       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 672       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 673       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 674       i -= 3;
 675
 676       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
 677                     dest[-3], dest[-2], dest[-1]));
 678     }
 679     else
 680     {
 681      /*
 682       * Four-octet UTF-8...
 683       */
 684
 685       if (i < 4)
 686       {
 687         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
 688
 689         return (-1);
 690       }
 691
 692       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 693       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 694       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 695       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 696       i -= 4;
 697
 698       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
 699                     (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
 700     }
 701   }
 702
 703   *dest = '\0';
 704
 705   DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
 706
 707   return ((int)(dest - start));
 708 }
 709
 710
 711 /*
 712  * 'compare_wide()' - Compare key for wide (VBCS) match.
 713  */
 714
 715 static int
 716 compare_wide(const void *k1,            /* I - Key char */
 717              const void *k2)            /* I - Map char */
 718 {
 719   cups_vbcs_t   key;                    /* Legacy key character */
 720   cups_vbcs_t   map;                    /* Legacy map character */
 721
 722
 723   key = *((cups_vbcs_t *)k1);
 724   map = ((_cups_wide2uni_t *)k2)->widechar;
 725
 726   return ((int)(key - map));
 727 }
 728
 729
 730 /*
 731  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 732  */
 733
 734 static int                              /* O - Count or -1 on error */
 735 conv_sbcs_to_utf8(
 736     cups_utf8_t           *dest,        /* O - Target string */
 737     const cups_sbcs_t     *src,         /* I - Source string */
 738     int                   maxout,       /* I - Max output */
 739     const cups_encoding_t encoding)     /* I - Encoding */
 740 {
 741   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 742   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 743   cups_sbcs_t   legchar;                /* Legacy character value */
 744   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 745                 *workptr;               /* Pointer into string */
 746
 747
 748  /*
 749   * Find legacy charset map in cache...
 750   */
 751
 752   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 753     return (-1);
 754
 755  /*
 756   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 757   */
 758
 759   work[0] = 0xfeff;
 760   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 761   {
 762     legchar = *src++;
 763
 764    /*
 765     * Convert ASCII verbatim (optimization)...
 766     */
 767
 768     if (legchar < 0x80)
 769       *workptr++ = (cups_utf32_t)legchar;
 770     else
 771     {
 772      /*
 773       * Convert unknown character to Replacement Character...
 774       */
 775
 776       crow = cmap->char2uni + legchar;
 777
 778       if (!*crow)
 779         *workptr++ = 0xfffd;
 780       else
 781         *workptr++ = (cups_utf32_t)*crow;
 782     }
 783   }
 784
 785   *workptr = 0;
 786
 787  /*
 788   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 789   */
 790
 791   cmap->used --;
 792
 793   return (cupsUTF32ToUTF8(dest, work, maxout));
 794 }
 795
 796
 797 /*
 798  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 799  */
 800
 801 static int                              /* O - Count or -1 on error */
 802 conv_utf8_to_sbcs(
 803     cups_sbcs_t           *dest,        /* O - Target string */
 804     const cups_utf8_t     *src,         /* I - Source string */
 805     int                   maxout,       /* I - Max output */
 806     const cups_encoding_t encoding)     /* I - Encoding */
 807 {
 808   cups_sbcs_t   *start;                 /* Start of destination string */
 809   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 810   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 811   cups_utf32_t  unichar;                /* Character value */
 812   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 813                 *workptr;               /* Pointer into string */
 814
 815
 816  /*
 817   * Find legacy charset map in cache...
 818   */
 819
 820   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 821     return (-1);
 822
 823  /*
 824   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 825   */
 826
 827   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 828     return (-1);
 829
 830  /*
 831   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 832   */
 833
 834   for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
 835   {
 836     unichar = *workptr++;
 837     if (!unichar)
 838       break;
 839
 840    /*
 841     * Convert ASCII verbatim (optimization)...
 842     */
 843
 844     if (unichar < 0x80)
 845     {
 846       *dest++ = (cups_sbcs_t)unichar;
 847       continue;
 848     }
 849
 850    /*
 851     * Convert unknown character to visible replacement...
 852     */
 853
 854     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 855
 856     if (srow)
 857       srow += (int)(unichar & 0xff);
 858
 859     if (!srow || !*srow)
 860       *dest++ = '?';
 861     else
 862       *dest++ = *srow;
 863   }
 864
 865   *dest = '\0';
 866
 867   cmap->used --;
 868
 869   return ((int)(dest - start));
 870 }
 871
 872
 873 /*
 874  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 875  */
 876
 877 static int                              /* O - Count or -1 on error */
 878 conv_utf8_to_vbcs(
 879     cups_sbcs_t           *dest,        /* O - Target string */
 880     const cups_utf8_t     *src,         /* I - Source string */
 881     int                   maxout,       /* I - Max output */
 882     const cups_encoding_t encoding)     /* I - Encoding */
 883 {
 884   cups_sbcs_t   *start;                 /* Start of destination string */
 885   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
 886   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
 887   cups_utf32_t  unichar;                /* Character value */
 888   cups_vbcs_t   legchar;                /* Legacy character value */
 889   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 890                 *workptr;               /* Pointer into string */
 891
 892
 893   DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
 894                 "encoding=%d)", dest, src, maxout, encoding));
 895
 896  /*
 897   * Find legacy charset map in cache...
 898   */
 899
 900   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 901   {
 902     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
 903
 904     return (-1);
 905   }
 906
 907  /*
 908   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 909   */
 910
 911   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 912   {
 913     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
 914
 915     return (-1);
 916   }
 917
 918  /*
 919   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
 920   */
 921
 922   for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
 923   {
 924     unichar = *workptr++;
 925
 926    /*
 927     * Convert ASCII verbatim (optimization)...
 928     */
 929
 930     if (unichar < 0x80)
 931     {
 932       *dest++ = (cups_sbcs_t)unichar;
 933
 934       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
 935                     dest[-1]));
 936
 937       continue;
 938     }
 939
 940    /*
 941     * Convert unknown character to visible replacement...
 942     */
 943
 944     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
 945
 946     if (vrow)
 947       vrow += (int)(unichar & 0xff);
 948
 949     if (!vrow || !*vrow)
 950       legchar = (cups_vbcs_t)'?';
 951     else
 952       legchar = (cups_vbcs_t)*vrow;
 953
 954    /*
 955     * Save n-byte legacy character...
 956     */
 957
 958     if (legchar > 0xffffff)
 959     {
 960       if (maxout < 5)
 961       {
 962         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
 963
 964         return (-1);
 965       }
 966
 967       *dest++ = (cups_sbcs_t)(legchar >> 24);
 968       *dest++ = (cups_sbcs_t)(legchar >> 16);
 969       *dest++ = (cups_sbcs_t)(legchar >> 8);
 970       *dest++ = (cups_sbcs_t)legchar;
 971
 972       maxout -= 3;
 973
 974       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
 975                     (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
 976     }
 977     else if (legchar > 0xffff)
 978     {
 979       if (maxout < 4)
 980       {
 981         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
 982
 983         return (-1);
 984       }
 985
 986       *dest++ = (cups_sbcs_t)(legchar >> 16);
 987       *dest++ = (cups_sbcs_t)(legchar >> 8);
 988       *dest++ = (cups_sbcs_t)legchar;
 989
 990       maxout -= 2;
 991
 992       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
 993                     (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
 994     }
 995     else if (legchar > 0xff)
 996     {
 997       *dest++ = (cups_sbcs_t)(legchar >> 8);
 998       *dest++ = (cups_sbcs_t)legchar;
 999
1000       maxout --;
1001
1002       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
1003                     (unsigned)unichar, dest[-2], dest[-1]));
1004     }
1005     else
1006     {
1007       *dest++ = (cups_sbcs_t)legchar;
1008
1009       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
1010                     (unsigned)unichar, dest[-1]));
1011     }
1012   }
1013
1014   *dest = '\0';
1015
1016   vmap->used --;
1017
1018   DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
1019                 (int)(dest - start)));
1020
1021   return ((int)(dest - start));
1022 }
1023
1024
1025 /*
1026  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1027  */
1028
1029 static int                              /* O - Count or -1 on error */
1030 conv_vbcs_to_utf8(
1031     cups_utf8_t           *dest,        /* O - Target string */
1032     const cups_sbcs_t     *src,         /* I - Source string */
1033     int                   maxout,       /* I - Max output */
1034     const cups_encoding_t encoding)     /* I - Encoding */
1035 {
1036   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1037   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1038   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1039   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1040   cups_vbcs_t   legchar;                /* Legacy character value */
1041   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1042                 *workptr;               /* Pointer into string */
1043
1044
1045  /*
1046   * Find legacy charset map in cache...
1047   */
1048
1049   DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
1050                 dest, src, maxout, encoding));
1051
1052   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1053   {
1054     DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1055
1056     return (-1);
1057   }
1058
1059  /*
1060   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1061   */
1062
1063   work[0] = 0xfeff;
1064   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1065   {
1066     legchar  = *src++;
1067     leadchar = (cups_sbcs_t)legchar;
1068
1069    /*
1070     * Convert ASCII verbatim (optimization)...
1071     */
1072
1073     if (legchar < 0x80)
1074     {
1075       *workptr++ = (cups_utf32_t)legchar;
1076
1077       DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
1078                     (unsigned)legchar));
1079       continue;
1080     }
1081
1082    /*
1083     * Convert 2-byte legacy character...
1084     */
1085
1086     if (vmap->lead2char[(int)leadchar] == leadchar)
1087     {
1088       if (!*src)
1089       {
1090         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
1091
1092         return (-1);
1093       }
1094
1095       legchar = (legchar << 8) | *src++;
1096
1097      /*
1098       * Convert unknown character to Replacement Character...
1099       */
1100
1101       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1102       if (crow)
1103         crow += (int) (legchar & 0xff);
1104
1105       if (!crow || !*crow)
1106         *workptr++ = 0xfffd;
1107       else
1108         *workptr++ = (cups_utf32_t)*crow;
1109
1110       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
1111                     src[-2], src[-1], (unsigned)workptr[-1]));
1112       continue;
1113     }
1114
1115    /*
1116     * Fetch 3-byte or 4-byte legacy character...
1117     */
1118
1119     if (vmap->lead3char[(int)leadchar] == leadchar)
1120     {
1121       if (!*src || !src[1])
1122       {
1123         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
1124
1125         return (-1);
1126       }
1127
1128       legchar = (legchar << 8) | *src++;
1129       legchar = (legchar << 8) | *src++;
1130     }
1131     else if (vmap->lead4char[(int)leadchar] == leadchar)
1132     {
1133       if (!*src || !src[1] || !src[2])
1134       {
1135         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
1136
1137         return (-1);
1138       }
1139
1140       legchar = (legchar << 8) | *src++;
1141       legchar = (legchar << 8) | *src++;
1142       legchar = (legchar << 8) | *src++;
1143     }
1144     else
1145     {
1146       DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
1147
1148       return (-1);
1149     }
1150
1151    /*
1152     * Find 3-byte or 4-byte legacy character...
1153     */
1154
1155     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1156                                            vmap->wide2uni,
1157                                            vmap->widecount,
1158                                            sizeof(_cups_wide2uni_t),
1159                                            compare_wide);
1160
1161    /*
1162     * Convert unknown character to Replacement Character...
1163     */
1164
1165     if (!wide2uni || !wide2uni->unichar)
1166       *workptr++ = 0xfffd;
1167     else
1168       *workptr++ = wide2uni->unichar;
1169
1170     if (vmap->lead3char[(int)leadchar] == leadchar)
1171       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
1172                     src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1173     else
1174       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
1175                     src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1176   }
1177
1178   *workptr = 0;
1179
1180   vmap->used --;
1181
1182   DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
1183                 (int)(workptr - work)));
1184
1185  /*
1186   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1187   */
1188
1189   return (cupsUTF32ToUTF8(dest, work, maxout));
1190 }
1191
1192
1193 /*
1194  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1195  */
1196
1197 static void
1198 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1199 {
1200   int           i;                      /* Looping variable */
1201
1202
1203   for (i = 0; i < 256; i ++)
1204     if (cmap->uni2char[i])
1205       free(cmap->uni2char[i]);
1206
1207   free(cmap);
1208 }
1209
1210
1211 /*
1212  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1213  */
1214
1215 static void
1216 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1217 {
1218   int           i;                      /* Looping variable */
1219
1220
1221   for (i = 0; i < 256; i ++)
1222     if (vmap->char2uni[i])
1223       free(vmap->char2uni[i]);
1224
1225   for (i = 0; i < 256; i ++)
1226     if (vmap->uni2char[i])
1227       free(vmap->uni2char[i]);
1228
1229   if (vmap->wide2uni)
1230     free(vmap->wide2uni);
1231
1232   free(vmap);
1233 }
1234
1235
1236 /*
1237  * 'get_charmap()' - Lookup or get a character set map (private).
1238  *
1239  * This code handles single-byte (SBCS), double-byte (DBCS), and
1240  * variable-byte (VBCS) character sets _without_ charset escapes...
1241  * This code does not handle multiple-byte character sets (MBCS)
1242  * (such as ISO-2022-JP) with charset switching via escapes...
1243  */
1244
1245
1246 static void *                           /* O - Charset map pointer */
1247 get_charmap(
1248     const cups_encoding_t encoding)     /* I - Encoding */
1249 {
1250   char          filename[1024];         /* Filename for charset map file */
1251   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1252
1253
1254   DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
1255
1256  /*
1257   * Get the data directory and charset map name...
1258   */
1259
1260   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1261            cg->cups_datadir, _cupsEncodingName(encoding));
1262
1263   DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
1264
1265  /*
1266   * Read charset map input file into cache...
1267   */
1268
1269   if (encoding < CUPS_ENCODING_SBCS_END)
1270     return (get_sbcs_charmap(encoding, filename));
1271   else if (encoding < CUPS_ENCODING_VBCS_END)
1272     return (get_vbcs_charmap(encoding, filename));
1273   else
1274     return (NULL);
1275 }
1276
1277
1278 /*
1279  * 'get_charmap_count()' - Count lines in a charmap file.
1280  */
1281
1282 static int                              /* O - Count or -1 on error */
1283 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1284 {
1285   int   count;                          /* Number of lines */
1286   char  line[256];                      /* Line from input map file */
1287
1288
1289  /*
1290   * Count lines in map input file...
1291   */
1292
1293   count = 0;
1294
1295   while (cupsFileGets(fp, line, sizeof(line)))
1296     if (line[0] == '0')
1297       count ++;
1298
1299  /*
1300   * Return the number of lines...
1301   */
1302
1303   if (count > 0)
1304     return (count);
1305   else
1306     return (-1);
1307 }
1308
1309
1310 /*
1311  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1312  */
1313
1314 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1315 get_sbcs_charmap(
1316     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1317     const char            *filename)    /* I - Charmap Filename */
1318 {
1319   unsigned long legchar;                /* Legacy character value */
1320   cups_utf32_t  unichar;                /* Unicode character value */
1321   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1322   cups_file_t   *fp;                    /* Charset map file pointer */
1323   char          *s;                     /* Line parsing pointer */
1324   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1325   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1326   char          line[256];              /* Line from charset map file */
1327
1328
1329  /*
1330   * See if we already have this SBCS charset map loaded...
1331   */
1332
1333   DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
1334                 filename));
1335
1336   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1337   {
1338     if (cmap->encoding == encoding)
1339     {
1340       cmap->used ++;
1341       DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
1342
1343       return ((void *)cmap);
1344     }
1345   }
1346
1347  /*
1348   * Open SBCS charset map input file...
1349   */
1350
1351   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1352   {
1353     DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
1354
1355     return (NULL);
1356   }
1357
1358  /*
1359   * Allocate memory for SBCS charset map...
1360   */
1361
1362   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1363   {
1364     cupsFileClose(fp);
1365     DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1366
1367     return (NULL);
1368   }
1369
1370   cmap->used ++;
1371   cmap->encoding = encoding;
1372
1373  /*
1374   * Save SBCS charset map into memory for transcoding...
1375   */
1376
1377   while (cupsFileGets(fp, line, sizeof(line)))
1378   {
1379     if (line[0] != '0')
1380       continue;
1381
1382     legchar = strtol(line, &s, 16);
1383     if (legchar < 0 || legchar > 0xff)
1384       goto sbcs_error;
1385
1386     unichar = strtol(s, NULL, 16);
1387     if (unichar < 0 || unichar > 0x10ffff)
1388       goto sbcs_error;
1389
1390    /*
1391     * Save legacy to Unicode mapping in direct lookup table...
1392     */
1393
1394     crow  = cmap->char2uni + legchar;
1395     *crow = (cups_ucs2_t)(unichar & 0xffff);
1396
1397    /*
1398     * Save Unicode to legacy mapping in indirect lookup table...
1399     */
1400
1401     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1402     if (!srow)
1403     {
1404       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1405       if (!srow)
1406         goto sbcs_error;
1407
1408       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1409     }
1410
1411     srow += unichar & 0xff;
1412
1413    /*
1414     * Convert Replacement Character to visible replacement...
1415     */
1416
1417     if (unichar == 0xfffd)
1418       legchar = (unsigned long)'?';
1419
1420    /*
1421     * First (oldest) legacy character uses Unicode mapping cell...
1422     */
1423
1424     if (!*srow)
1425       *srow = (cups_sbcs_t)legchar;
1426   }
1427
1428   cupsFileClose(fp);
1429
1430  /*
1431   * Add it to the cache and return...
1432   */
1433
1434   cmap->next = cmap_cache;
1435   cmap_cache = cmap;
1436
1437   DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
1438
1439   return (cmap);
1440
1441  /*
1442   * If we get here, there was an error in the cmap file...
1443   */
1444
1445   sbcs_error:
1446
1447   free_sbcs_charmap(cmap);
1448
1449   cupsFileClose(fp);
1450
1451   DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
1452
1453   return (NULL);
1454 }
1455
1456
1457 /*
1458  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1459  */
1460
1461 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1462 get_vbcs_charmap(
1463     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1464     const char            *filename)    /* I - Charmap Filename */
1465 {
1466   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1467   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1468   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1469   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1470   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1471   unsigned long legchar;                /* Legacy character value */
1472   cups_utf32_t  unichar;                /* Unicode character value */
1473   int           mapcount;               /* Count of lines in charmap file */
1474   cups_file_t   *fp;                    /* Charset map file pointer */
1475   char          *s;                     /* Line parsing pointer */
1476   char          line[256];              /* Line from charset map file */
1477   int           i;                      /* Loop variable */
1478   int           legacy;                 /* 32-bit legacy char */
1479
1480
1481   DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1482                 encoding, filename));
1483
1484  /*
1485   * See if we already have this DBCS/VBCS charset map loaded...
1486   */
1487
1488   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1489   {
1490     if (vmap->encoding == encoding)
1491     {
1492       vmap->used ++;
1493       DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
1494
1495       return ((void *)vmap);
1496     }
1497   }
1498
1499  /*
1500   * Open VBCS charset map input file...
1501   */
1502
1503   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1504   {
1505     DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
1506
1507     return (NULL);
1508   }
1509
1510  /*
1511   * Count lines in charmap file...
1512   */
1513
1514   if ((mapcount = get_charmap_count(fp)) <= 0)
1515   {
1516     DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
1517
1518     cupsFileClose(fp);
1519
1520     return (NULL);
1521   }
1522
1523   DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
1524
1525  /*
1526   * Allocate memory for DBCS/VBCS charset map...
1527   */
1528
1529   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1530   {
1531     DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
1532
1533     cupsFileClose(fp);
1534
1535     return (NULL);
1536   }
1537
1538   vmap->used ++;
1539   vmap->encoding = encoding;
1540
1541  /*
1542   * Save DBCS/VBCS charset map into memory for transcoding...
1543   */
1544
1545   wide2uni = NULL;
1546
1547   cupsFileRewind(fp);
1548
1549   i      = 0;
1550   legacy = 0;
1551
1552   while (cupsFileGets(fp, line, sizeof(line)))
1553   {
1554     if (line[0] != '0')
1555       continue;
1556
1557     legchar = strtoul(line, &s, 16);
1558     if (legchar == ULONG_MAX)
1559       goto vbcs_error;
1560
1561     unichar = strtol(s, NULL, 16);
1562     if (unichar < 0 || unichar > 0x10ffff)
1563       goto vbcs_error;
1564
1565     i ++;
1566
1567     DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1568                   legchar, (unsigned)unichar));
1569
1570    /*
1571     * Save lead char of 2/3/4-byte legacy char...
1572     */
1573
1574     if (legchar > 0xffffff)
1575     {
1576       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1577       vmap->lead4char[leadchar] = leadchar;
1578     }
1579     else if (legchar > 0xffff)
1580     {
1581       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1582       vmap->lead3char[leadchar] = leadchar;
1583     }
1584     else
1585     {
1586       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1587       vmap->lead2char[leadchar] = leadchar;
1588     }
1589
1590    /*
1591     * Save Legacy to Unicode mapping...
1592     */
1593
1594     if (legchar <= 0xffff)
1595     {
1596      /*
1597       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1598       */
1599
1600       crow = vmap->char2uni[(int)leadchar];
1601       if (!crow)
1602       {
1603         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1604         if (!crow)
1605           goto vbcs_error;
1606
1607         vmap->char2uni[(int)leadchar] = crow;
1608       }
1609
1610       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1611     }
1612     else
1613     {
1614      /*
1615       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1616       */
1617
1618       if (!legacy)
1619       {
1620         legacy          = 1;
1621         vmap->widecount = (mapcount - i + 1);
1622         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1623                                                      sizeof(_cups_wide2uni_t));
1624         if (!wide2uni)
1625           goto vbcs_error;
1626
1627         vmap->wide2uni = wide2uni;
1628       }
1629
1630       wide2uni->widechar = (cups_vbcs_t)legchar;
1631       wide2uni->unichar  = (cups_ucs2_t)unichar;
1632       wide2uni ++;
1633     }
1634
1635    /*
1636     * Save Unicode to legacy mapping in indirect lookup table...
1637     */
1638
1639     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1640     if (!vrow)
1641     {
1642       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1643       if (!vrow)
1644         goto vbcs_error;
1645
1646       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1647     }
1648
1649     vrow += (int)(unichar & 0xff);
1650
1651    /*
1652     * Convert Replacement Character to visible replacement...
1653     */
1654
1655     if (unichar == 0xfffd)
1656       legchar = (unsigned long)'?';
1657
1658    /*
1659     * First (oldest) legacy character uses Unicode mapping cell...
1660     */
1661
1662     if (!*vrow)
1663       *vrow = (cups_vbcs_t)legchar;
1664   }
1665
1666   vmap->charcount = (i - vmap->widecount);
1667
1668   cupsFileClose(fp);
1669
1670  /*
1671   * Add it to the cache and return...
1672   */
1673
1674   vmap->next = vmap_cache;
1675   vmap_cache = vmap;
1676
1677   DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
1678
1679   return (vmap);
1680
1681  /*
1682   * If we get here, the file contains errors...
1683   */
1684
1685   vbcs_error:
1686
1687   free_vbcs_charmap(vmap);
1688
1689   cupsFileClose(fp);
1690
1691   DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
1692
1693   return (NULL);
1694 }
1695
1696
1697 /*
1698  * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1699  */