cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
   3  *
   4  *   Transcoding support for CUPS.
   5  *
   6  *   Copyright 2007-2010 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  *   This file is subject to the Apple OS-Developed Software exception.
  16  *
  17  * Contents:
  18  *
  19  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  20  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  21  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  22  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  23  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  24  */
  25
  26 /*
  27  * Include necessary headers...
  28  */
  29
  30 #include "cups-private.h"
  31 #include <limits.h>
  32 #include <time.h>
  33 #ifdef HAVE_ICONV_H
  34 #  include <iconv.h>
  35 #endif /* HAVE_ICONV_H */
  36
  37
  38 /*
  39  * Local globals...
  40  */
  41
  42 #ifdef HAVE_ICONV_H
  43 static _cups_mutex_t    map_mutex = _CUPS_MUTEX_INITIALIZER;
  44                                         /* Mutex to control access to maps */
  45 static iconv_t          map_from_utf8 = (iconv_t)-1;
  46                                         /* Convert from UTF-8 to charset */
  47 static iconv_t          map_to_utf8 = (iconv_t)-1;
  48                                         /* Convert from charset to UTF-8 */
  49 static cups_encoding_t  map_encoding = CUPS_AUTO_ENCODING;
  50                                         /* Which charset is cached */
  51 #endif /* HAVE_ICONV_H */
  52
  53
  54 /*
  55  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
  56  */
  57
  58 void
  59 _cupsCharmapFlush(void)
  60 {
  61 #ifdef HAVE_ICONV_H
  62   if (map_from_utf8 != (iconv_t)-1)
  63   {
  64     iconv_close(map_from_utf8);
  65     map_from_utf8 = (iconv_t)-1;
  66   }
  67
  68   if (map_to_utf8 != (iconv_t)-1)
  69   {
  70     iconv_close(map_to_utf8);
  71     map_to_utf8 = (iconv_t)-1;
  72   }
  73
  74   map_encoding = CUPS_AUTO_ENCODING;
  75 #endif /* HAVE_ICONV_H */
  76 }
  77
  78
  79 /*
  80  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
  81  */
  82
  83 int                                     /* O - Count or -1 on error */
  84 cupsCharsetToUTF8(
  85     cups_utf8_t           *dest,        /* O - Target string */
  86     const char            *src,         /* I - Source string */
  87     const int             maxout,       /* I - Max output */
  88     const cups_encoding_t encoding)     /* I - Encoding */
  89 {
  90   cups_utf8_t   *destptr;               /* Pointer into UTF-8 buffer */
  91 #ifdef HAVE_ICONV_H
  92   size_t        srclen,                 /* Length of source string */
  93                 outBytesLeft;           /* Bytes remaining in output buffer */
  94 #endif /* HAVE_ICONV_H */
  95
  96
  97  /*
  98   * Check for valid arguments...
  99   */
 100
 101   DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
 102                 dest, src, maxout, encoding));
 103
 104   if (!dest || !src || maxout < 1)
 105   {
 106     if (dest)
 107       *dest = '\0';
 108
 109     DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
 110     return (-1);
 111   }
 112
 113  /*
 114   * Handle identity conversions...
 115   */
 116
 117   if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
 118       encoding >= CUPS_ENCODING_VBCS_END)
 119   {
 120     strlcpy((char *)dest, src, maxout);
 121     return ((int)strlen((char *)dest));
 122   }
 123
 124  /*
 125   * Handle ISO-8859-1 to UTF-8 directly...
 126   */
 127
 128   destptr = dest;
 129
 130   if (encoding == CUPS_ISO8859_1)
 131   {
 132     int         ch;                     /* Character from string */
 133     cups_utf8_t *destend;               /* End of UTF-8 buffer */
 134
 135
 136     destend = dest + maxout - 2;
 137
 138     while (*src && destptr < destend)
 139     {
 140       ch = *src++ & 255;
 141
 142       if (ch & 128)
 143       {
 144         *destptr++ = 0xc0 | (ch >> 6);
 145         *destptr++ = 0x80 | (ch & 0x3f);
 146       }
 147       else
 148         *destptr++ = ch;
 149     }
 150
 151     *destptr = '\0';
 152
 153     return ((int)(destptr - dest));
 154   }
 155
 156  /*
 157   * Convert input legacy charset to UTF-8...
 158   */
 159
 160 #ifdef HAVE_ICONV_H
 161   _cupsMutexLock(&map_mutex);
 162
 163   if (map_encoding != encoding)
 164   {
 165     _cupsCharmapFlush();
 166
 167     map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
 168     map_to_utf8   = iconv_open("UTF-8", _cupsEncodingName(encoding));
 169     map_encoding     = encoding;
 170   }
 171
 172   if (map_to_utf8 != (iconv_t)-1)
 173   {
 174     srclen       = strlen(src);
 175     outBytesLeft = maxout - 1;
 176
 177     iconv(map_to_utf8, (char **)&src, &srclen, (char **)&destptr,
 178           &outBytesLeft);
 179     *destptr = '\0';
 180
 181     _cupsMutexUnlock(&map_mutex);
 182
 183     return ((int)(destptr - dest));
 184   }
 185
 186   _cupsMutexUnlock(&map_mutex);
 187 #endif /* HAVE_ICONV_H */
 188
 189  /*
 190   * No iconv() support, so error out...
 191   */
 192
 193   *destptr = '\0';
 194
 195   return (-1);
 196 }
 197
 198
 199 /*
 200  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 201  */
 202
 203 int                                     /* O - Count or -1 on error */
 204 cupsUTF8ToCharset(
 205     char                  *dest,        /* O - Target string */
 206     const cups_utf8_t     *src,         /* I - Source string */
 207     const int             maxout,       /* I - Max output */
 208     const cups_encoding_t encoding)     /* I - Encoding */
 209 {
 210   char          *destptr;               /* Pointer into destination */
 211 #ifdef HAVE_ICONV_H
 212   size_t        srclen,                 /* Length of source string */
 213                 outBytesLeft;           /* Bytes remaining in output buffer */
 214 #endif /* HAVE_ICONV_H */
 215
 216
 217  /*
 218   * Check for valid arguments...
 219   */
 220
 221   if (!dest || !src || maxout < 1)
 222   {
 223     if (dest)
 224       *dest = '\0';
 225
 226     return (-1);
 227   }
 228
 229  /*
 230   * Handle identity conversions...
 231   */
 232
 233   if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
 234       encoding >= CUPS_ENCODING_VBCS_END)
 235   {
 236     strlcpy(dest, (char *)src, maxout);
 237     return ((int)strlen(dest));
 238   }
 239
 240  /*
 241   * Handle UTF-8 to ISO-8859-1 directly...
 242   */
 243
 244   destptr = dest;
 245
 246   if (encoding == CUPS_ISO8859_1)
 247   {
 248     int         ch;                     /* Character from string */
 249     char        *destend;               /* End of ISO-8859-1 buffer */
 250
 251
 252     destend = dest + maxout - 1;
 253
 254     while (*src && destptr < destend)
 255     {
 256       ch = *src++;
 257
 258       if ((ch & 0xe0) == 0xc0)
 259       {
 260         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 261
 262         if (ch < 256)
 263           *destptr++ = ch;
 264         else
 265           *destptr++ = '?';
 266       }
 267       else if ((ch & 0xf0) == 0xe0 ||
 268                (ch & 0xf8) == 0xf0)
 269         *destptr++ = '?';
 270       else if (!(ch & 0x80))
 271         *destptr++ = ch;
 272     }
 273
 274     *destptr = '\0';
 275
 276     return ((int)(destptr - dest));
 277   }
 278
 279 #ifdef HAVE_ICONV_H
 280  /*
 281   * Convert input UTF-8 to legacy charset...
 282   */
 283
 284   _cupsMutexLock(&map_mutex);
 285
 286   if (map_encoding != encoding)
 287   {
 288     _cupsCharmapFlush();
 289
 290     map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
 291     map_to_utf8   = iconv_open("UTF-8", _cupsEncodingName(encoding));
 292     map_encoding  = encoding;
 293   }
 294
 295   if (map_from_utf8 != (iconv_t)-1)
 296   {
 297     srclen       = strlen((char *)src);
 298     outBytesLeft = maxout - 1;
 299
 300     iconv(map_from_utf8, (char **)&src, &srclen, &destptr, &outBytesLeft);
 301     *destptr = '\0';
 302
 303     _cupsMutexUnlock(&map_mutex);
 304
 305     return ((int)(destptr - dest));
 306   }
 307
 308   _cupsMutexUnlock(&map_mutex);
 309 #endif /* HAVE_ICONV_H */
 310
 311  /*
 312   * No iconv() support, so error out...
 313   */
 314
 315   *destptr = '\0';
 316
 317   return (-1);
 318 }
 319
 320
 321 /*
 322  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 323  *
 324  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 325  *
 326  *   UTF-32 char     UTF-8 char(s)
 327  *   --------------------------------------------------
 328  *        0 to 127 = 0xxxxxxx (US-ASCII)
 329  *     128 to 2047 = 110xxxxx 10yyyyyy
 330  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 331  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 332  *
 333  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 334  * which would convert to five- or six-octet UTF-8 sequences...
 335  */
 336
 337 int                                     /* O - Count or -1 on error */
 338 cupsUTF8ToUTF32(
 339     cups_utf32_t      *dest,            /* O - Target string */
 340     const cups_utf8_t *src,             /* I - Source string */
 341     const int         maxout)           /* I - Max output */
 342 {
 343   int           i;                      /* Looping variable */
 344   cups_utf8_t   ch;                     /* Character value */
 345   cups_utf8_t   next;                   /* Next character value */
 346   cups_utf32_t  ch32;                   /* UTF-32 character value */
 347
 348
 349  /*
 350   * Check for valid arguments and clear output...
 351   */
 352
 353   DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
 354                 src, maxout));
 355
 356   if (dest)
 357     *dest = 0;
 358
 359   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 360   {
 361     DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
 362
 363     return (-1);
 364   }
 365
 366  /*
 367   * Convert input UTF-8 to output UTF-32...
 368   */
 369
 370   for (i = maxout - 1; *src && i > 0; i --)
 371   {
 372     ch = *src++;
 373
 374    /*
 375     * Convert UTF-8 character(s) to UTF-32 character...
 376     */
 377
 378     if (!(ch & 0x80))
 379     {
 380      /*
 381       * One-octet UTF-8 <= 127 (US-ASCII)...
 382       */
 383
 384       *dest++ = ch;
 385
 386       DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
 387       continue;
 388     }
 389     else if ((ch & 0xe0) == 0xc0)
 390     {
 391      /*
 392       * Two-octet UTF-8 <= 2047 (Latin-x)...
 393       */
 394
 395       next = *src++;
 396       if ((next & 0xc0) != 0x80)
 397       {
 398         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 399
 400         return (-1);
 401       }
 402
 403       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 404
 405      /*
 406       * Check for non-shortest form (invalid UTF-8)...
 407       */
 408
 409       if (ch32 < 0x80)
 410       {
 411         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 412
 413         return (-1);
 414       }
 415
 416       *dest++ = ch32;
 417
 418       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
 419                     src[-2], src[-1], (unsigned)ch32));
 420     }
 421     else if ((ch & 0xf0) == 0xe0)
 422     {
 423      /*
 424       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 425       */
 426
 427       next = *src++;
 428       if ((next & 0xc0) != 0x80)
 429       {
 430         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 431
 432         return (-1);
 433       }
 434
 435       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 436
 437       next = *src++;
 438       if ((next & 0xc0) != 0x80)
 439       {
 440         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 441
 442         return (-1);
 443       }
 444
 445       ch32 = (ch32 << 6) | (next & 0x3f);
 446
 447      /*
 448       * Check for non-shortest form (invalid UTF-8)...
 449       */
 450
 451       if (ch32 < 0x800)
 452       {
 453         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 454
 455         return (-1);
 456       }
 457
 458       *dest++ = ch32;
 459
 460       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
 461                     src[-3], src[-2], src[-1], (unsigned)ch32));
 462     }
 463     else if ((ch & 0xf8) == 0xf0)
 464     {
 465      /*
 466       * Four-octet UTF-8...
 467       */
 468
 469       next = *src++;
 470       if ((next & 0xc0) != 0x80)
 471       {
 472         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 473
 474         return (-1);
 475       }
 476
 477       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 478
 479       next = *src++;
 480       if ((next & 0xc0) != 0x80)
 481       {
 482         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 483
 484         return (-1);
 485       }
 486
 487       ch32 = (ch32 << 6) | (next & 0x3f);
 488
 489       next = *src++;
 490       if ((next & 0xc0) != 0x80)
 491       {
 492         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 493
 494         return (-1);
 495       }
 496
 497       ch32 = (ch32 << 6) | (next & 0x3f);
 498
 499      /*
 500       * Check for non-shortest form (invalid UTF-8)...
 501       */
 502
 503       if (ch32 < 0x10000)
 504       {
 505         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 506
 507         return (-1);
 508       }
 509
 510       *dest++ = ch32;
 511
 512       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
 513                     src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
 514     }
 515     else
 516     {
 517      /*
 518       * More than 4-octet (invalid UTF-8 sequence)...
 519       */
 520
 521       DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 522
 523       return (-1);
 524     }
 525
 526    /*
 527     * Check for UTF-16 surrogate (illegal UTF-8)...
 528     */
 529
 530     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 531       return (-1);
 532   }
 533
 534   *dest = 0;
 535
 536   DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
 537
 538   return (maxout - 1 - i);
 539 }
 540
 541
 542 /*
 543  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 544  *
 545  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 546  *
 547  *   UTF-32 char     UTF-8 char(s)
 548  *   --------------------------------------------------
 549  *        0 to 127 = 0xxxxxxx (US-ASCII)
 550  *     128 to 2047 = 110xxxxx 10yyyyyy
 551  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 552  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 553  *
 554  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 555  * which would convert to five- or six-octet UTF-8 sequences...
 556  */
 557
 558 int                                     /* O - Count or -1 on error */
 559 cupsUTF32ToUTF8(
 560     cups_utf8_t        *dest,           /* O - Target string */
 561     const cups_utf32_t *src,            /* I - Source string */
 562     const int          maxout)          /* I - Max output */
 563 {
 564   cups_utf8_t   *start;                 /* Start of destination string */
 565   int           i;                      /* Looping variable */
 566   int           swap;                   /* Byte-swap input to output */
 567   cups_utf32_t  ch;                     /* Character value */
 568
 569
 570  /*
 571   * Check for valid arguments and clear output...
 572   */
 573
 574   DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
 575                 maxout));
 576
 577   if (dest)
 578     *dest = '\0';
 579
 580   if (!dest || !src || maxout < 1)
 581   {
 582     DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
 583
 584     return (-1);
 585   }
 586
 587  /*
 588   * Check for leading BOM in UTF-32 and inverted BOM...
 589   */
 590
 591   start = dest;
 592   swap  = *src == 0xfffe0000;
 593
 594   DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
 595
 596   if (*src == 0xfffe0000 || *src == 0xfeff)
 597     src ++;
 598
 599  /*
 600   * Convert input UTF-32 to output UTF-8...
 601   */
 602
 603   for (i = maxout - 1; *src && i > 0;)
 604   {
 605     ch = *src++;
 606
 607    /*
 608     * Byte swap input UTF-32, if necessary...
 609     * (only byte-swapping 24 of 32 bits)
 610     */
 611
 612     if (swap)
 613       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 614
 615    /*
 616     * Check for beyond Plane 16 (invalid UTF-32)...
 617     */
 618
 619     if (ch > 0x10ffff)
 620     {
 621       DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
 622
 623       return (-1);
 624     }
 625
 626    /*
 627     * Convert UTF-32 character to UTF-8 character(s)...
 628     */
 629
 630     if (ch < 0x80)
 631     {
 632      /*
 633       * One-octet UTF-8 <= 127 (US-ASCII)...
 634       */
 635
 636       *dest++ = (cups_utf8_t)ch;
 637       i --;
 638
 639       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
 640     }
 641     else if (ch < 0x800)
 642     {
 643      /*
 644       * Two-octet UTF-8 <= 2047 (Latin-x)...
 645       */
 646
 647       if (i < 2)
 648       {
 649         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
 650
 651         return (-1);
 652       }
 653
 654       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 655       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 656       i -= 2;
 657
 658       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
 659                     dest[-2], dest[-1]));
 660     }
 661     else if (ch < 0x10000)
 662     {
 663      /*
 664       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 665       */
 666
 667       if (i < 3)
 668       {
 669         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
 670
 671         return (-1);
 672       }
 673
 674       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 675       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 676       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 677       i -= 3;
 678
 679       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
 680                     dest[-3], dest[-2], dest[-1]));
 681     }
 682     else
 683     {
 684      /*
 685       * Four-octet UTF-8...
 686       */
 687
 688       if (i < 4)
 689       {
 690         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
 691
 692         return (-1);
 693       }
 694
 695       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 696       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 697       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 698       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 699       i -= 4;
 700
 701       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
 702                     (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
 703     }
 704   }
 705
 706   *dest = '\0';
 707
 708   DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
 709
 710   return ((int)(dest - start));
 711 }
 712
 713
 714 /*
 715  * 'compare_wide()' - Compare key for wide (VBCS) match.
 716  */
 717
 718 static int
 719 compare_wide(const void *k1,            /* I - Key char */
 720              const void *k2)            /* I - Map char */
 721 {
 722   cups_vbcs_t   key;                    /* Legacy key character */
 723   cups_vbcs_t   map;                    /* Legacy map character */
 724
 725
 726   key = *((cups_vbcs_t *)k1);
 727   map = ((_cups_wide2uni_t *)k2)->widechar;
 728
 729   return ((int)(key - map));
 730 }
 731
 732
 733 /*
 734  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 735  */
 736
 737 static int                              /* O - Count or -1 on error */
 738 conv_sbcs_to_utf8(
 739     cups_utf8_t           *dest,        /* O - Target string */
 740     const cups_sbcs_t     *src,         /* I - Source string */
 741     int                   maxout,       /* I - Max output */
 742     const cups_encoding_t encoding)     /* I - Encoding */
 743 {
 744   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 745   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 746   cups_sbcs_t   legchar;                /* Legacy character value */
 747   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 748                 *workptr;               /* Pointer into string */
 749
 750
 751  /*
 752   * Find legacy charset map in cache...
 753   */
 754
 755   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 756     return (-1);
 757
 758  /*
 759   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 760   */
 761
 762   work[0] = 0xfeff;
 763   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 764   {
 765     legchar = *src++;
 766
 767    /*
 768     * Convert ASCII verbatim (optimization)...
 769     */
 770
 771     if (legchar < 0x80)
 772       *workptr++ = (cups_utf32_t)legchar;
 773     else
 774     {
 775      /*
 776       * Convert unknown character to Replacement Character...
 777       */
 778
 779       crow = cmap->char2uni + legchar;
 780
 781       if (!*crow)
 782         *workptr++ = 0xfffd;
 783       else
 784         *workptr++ = (cups_utf32_t)*crow;
 785     }
 786   }
 787
 788   *workptr = 0;
 789
 790  /*
 791   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 792   */
 793
 794   cmap->used --;
 795
 796   return (cupsUTF32ToUTF8(dest, work, maxout));
 797 }
 798
 799
 800 /*
 801  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 802  */
 803
 804 static int                              /* O - Count or -1 on error */
 805 conv_utf8_to_sbcs(
 806     cups_sbcs_t           *dest,        /* O - Target string */
 807     const cups_utf8_t     *src,         /* I - Source string */
 808     int                   maxout,       /* I - Max output */
 809     const cups_encoding_t encoding)     /* I - Encoding */
 810 {
 811   cups_sbcs_t   *start;                 /* Start of destination string */
 812   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 813   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 814   cups_utf32_t  unichar;                /* Character value */
 815   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 816                 *workptr;               /* Pointer into string */
 817
 818
 819  /*
 820   * Find legacy charset map in cache...
 821   */
 822
 823   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 824     return (-1);
 825
 826  /*
 827   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 828   */
 829
 830   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 831     return (-1);
 832
 833  /*
 834   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 835   */
 836
 837   for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
 838   {
 839     unichar = *workptr++;
 840     if (!unichar)
 841       break;
 842
 843    /*
 844     * Convert ASCII verbatim (optimization)...
 845     */
 846
 847     if (unichar < 0x80)
 848     {
 849       *dest++ = (cups_sbcs_t)unichar;
 850       continue;
 851     }
 852
 853    /*
 854     * Convert unknown character to visible replacement...
 855     */
 856
 857     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 858
 859     if (srow)
 860       srow += (int)(unichar & 0xff);
 861
 862     if (!srow || !*srow)
 863       *dest++ = '?';
 864     else
 865       *dest++ = *srow;
 866   }
 867
 868   *dest = '\0';
 869
 870   cmap->used --;
 871
 872   return ((int)(dest - start));
 873 }
 874
 875
 876 /*
 877  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 878  */
 879
 880 static int                              /* O - Count or -1 on error */
 881 conv_utf8_to_vbcs(
 882     cups_sbcs_t           *dest,        /* O - Target string */
 883     const cups_utf8_t     *src,         /* I - Source string */
 884     int                   maxout,       /* I - Max output */
 885     const cups_encoding_t encoding)     /* I - Encoding */
 886 {
 887   cups_sbcs_t   *start;                 /* Start of destination string */
 888   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
 889   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
 890   cups_utf32_t  unichar;                /* Character value */
 891   cups_vbcs_t   legchar;                /* Legacy character value */
 892   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 893                 *workptr;               /* Pointer into string */
 894
 895
 896   DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
 897                 "encoding=%d)", dest, src, maxout, encoding));
 898
 899  /*
 900   * Find legacy charset map in cache...
 901   */
 902
 903   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 904   {
 905     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
 906
 907     return (-1);
 908   }
 909
 910  /*
 911   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 912   */
 913
 914   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 915   {
 916     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
 917
 918     return (-1);
 919   }
 920
 921  /*
 922   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
 923   */
 924
 925   for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
 926   {
 927     unichar = *workptr++;
 928
 929    /*
 930     * Convert ASCII verbatim (optimization)...
 931     */
 932
 933     if (unichar < 0x80)
 934     {
 935       *dest++ = (cups_sbcs_t)unichar;
 936
 937       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
 938                     dest[-1]));
 939
 940       continue;
 941     }
 942
 943    /*
 944     * Convert unknown character to visible replacement...
 945     */
 946
 947     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
 948
 949     if (vrow)
 950       vrow += (int)(unichar & 0xff);
 951
 952     if (!vrow || !*vrow)
 953       legchar = (cups_vbcs_t)'?';
 954     else
 955       legchar = (cups_vbcs_t)*vrow;
 956
 957    /*
 958     * Save n-byte legacy character...
 959     */
 960
 961     if (legchar > 0xffffff)
 962     {
 963       if (maxout < 5)
 964       {
 965         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
 966
 967         return (-1);
 968       }
 969
 970       *dest++ = (cups_sbcs_t)(legchar >> 24);
 971       *dest++ = (cups_sbcs_t)(legchar >> 16);
 972       *dest++ = (cups_sbcs_t)(legchar >> 8);
 973       *dest++ = (cups_sbcs_t)legchar;
 974
 975       maxout -= 3;
 976
 977       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
 978                     (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
 979     }
 980     else if (legchar > 0xffff)
 981     {
 982       if (maxout < 4)
 983       {
 984         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
 985
 986         return (-1);
 987       }
 988
 989       *dest++ = (cups_sbcs_t)(legchar >> 16);
 990       *dest++ = (cups_sbcs_t)(legchar >> 8);
 991       *dest++ = (cups_sbcs_t)legchar;
 992
 993       maxout -= 2;
 994
 995       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
 996                     (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
 997     }
 998     else if (legchar > 0xff)
 999     {
1000       *dest++ = (cups_sbcs_t)(legchar >> 8);
1001       *dest++ = (cups_sbcs_t)legchar;
1002
1003       maxout --;
1004
1005       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
1006                     (unsigned)unichar, dest[-2], dest[-1]));
1007     }
1008     else
1009     {
1010       *dest++ = (cups_sbcs_t)legchar;
1011
1012       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
1013                     (unsigned)unichar, dest[-1]));
1014     }
1015   }
1016
1017   *dest = '\0';
1018
1019   vmap->used --;
1020
1021   DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
1022                 (int)(dest - start)));
1023
1024   return ((int)(dest - start));
1025 }
1026
1027
1028 /*
1029  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1030  */
1031
1032 static int                              /* O - Count or -1 on error */
1033 conv_vbcs_to_utf8(
1034     cups_utf8_t           *dest,        /* O - Target string */
1035     const cups_sbcs_t     *src,         /* I - Source string */
1036     int                   maxout,       /* I - Max output */
1037     const cups_encoding_t encoding)     /* I - Encoding */
1038 {
1039   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1040   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1041   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1042   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1043   cups_vbcs_t   legchar;                /* Legacy character value */
1044   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1045                 *workptr;               /* Pointer into string */
1046
1047
1048  /*
1049   * Find legacy charset map in cache...
1050   */
1051
1052   DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
1053                 dest, src, maxout, encoding));
1054
1055   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1056   {
1057     DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1058
1059     return (-1);
1060   }
1061
1062  /*
1063   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1064   */
1065
1066   work[0] = 0xfeff;
1067   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1068   {
1069     legchar  = *src++;
1070     leadchar = (cups_sbcs_t)legchar;
1071
1072    /*
1073     * Convert ASCII verbatim (optimization)...
1074     */
1075
1076     if (legchar < 0x80)
1077     {
1078       *workptr++ = (cups_utf32_t)legchar;
1079
1080       DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
1081                     (unsigned)legchar));
1082       continue;
1083     }
1084
1085    /*
1086     * Convert 2-byte legacy character...
1087     */
1088
1089     if (vmap->lead2char[(int)leadchar] == leadchar)
1090     {
1091       if (!*src)
1092       {
1093         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
1094
1095         return (-1);
1096       }
1097
1098       legchar = (legchar << 8) | *src++;
1099
1100      /*
1101       * Convert unknown character to Replacement Character...
1102       */
1103
1104       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1105       if (crow)
1106         crow += (int) (legchar & 0xff);
1107
1108       if (!crow || !*crow)
1109         *workptr++ = 0xfffd;
1110       else
1111         *workptr++ = (cups_utf32_t)*crow;
1112
1113       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
1114                     src[-2], src[-1], (unsigned)workptr[-1]));
1115       continue;
1116     }
1117
1118    /*
1119     * Fetch 3-byte or 4-byte legacy character...
1120     */
1121
1122     if (vmap->lead3char[(int)leadchar] == leadchar)
1123     {
1124       if (!*src || !src[1])
1125       {
1126         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
1127
1128         return (-1);
1129       }
1130
1131       legchar = (legchar << 8) | *src++;
1132       legchar = (legchar << 8) | *src++;
1133     }
1134     else if (vmap->lead4char[(int)leadchar] == leadchar)
1135     {
1136       if (!*src || !src[1] || !src[2])
1137       {
1138         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
1139
1140         return (-1);
1141       }
1142
1143       legchar = (legchar << 8) | *src++;
1144       legchar = (legchar << 8) | *src++;
1145       legchar = (legchar << 8) | *src++;
1146     }
1147     else
1148     {
1149       DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
1150
1151       return (-1);
1152     }
1153
1154    /*
1155     * Find 3-byte or 4-byte legacy character...
1156     */
1157
1158     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1159                                            vmap->wide2uni,
1160                                            vmap->widecount,
1161                                            sizeof(_cups_wide2uni_t),
1162                                            compare_wide);
1163
1164    /*
1165     * Convert unknown character to Replacement Character...
1166     */
1167
1168     if (!wide2uni || !wide2uni->unichar)
1169       *workptr++ = 0xfffd;
1170     else
1171       *workptr++ = wide2uni->unichar;
1172
1173     if (vmap->lead3char[(int)leadchar] == leadchar)
1174       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
1175                     src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1176     else
1177       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
1178                     src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1179   }
1180
1181   *workptr = 0;
1182
1183   vmap->used --;
1184
1185   DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
1186                 (int)(workptr - work)));
1187
1188  /*
1189   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1190   */
1191
1192   return (cupsUTF32ToUTF8(dest, work, maxout));
1193 }
1194
1195
1196 /*
1197  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1198  */
1199
1200 static void
1201 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1202 {
1203   int           i;                      /* Looping variable */
1204
1205
1206   for (i = 0; i < 256; i ++)
1207     if (cmap->uni2char[i])
1208       free(cmap->uni2char[i]);
1209
1210   free(cmap);
1211 }
1212
1213
1214 /*
1215  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1216  */
1217
1218 static void
1219 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1220 {
1221   int           i;                      /* Looping variable */
1222
1223
1224   for (i = 0; i < 256; i ++)
1225     if (vmap->char2uni[i])
1226       free(vmap->char2uni[i]);
1227
1228   for (i = 0; i < 256; i ++)
1229     if (vmap->uni2char[i])
1230       free(vmap->uni2char[i]);
1231
1232   if (vmap->wide2uni)
1233     free(vmap->wide2uni);
1234
1235   free(vmap);
1236 }
1237
1238
1239 /*
1240  * 'get_charmap()' - Lookup or get a character set map (private).
1241  *
1242  * This code handles single-byte (SBCS), double-byte (DBCS), and
1243  * variable-byte (VBCS) character sets _without_ charset escapes...
1244  * This code does not handle multiple-byte character sets (MBCS)
1245  * (such as ISO-2022-JP) with charset switching via escapes...
1246  */
1247
1248
1249 static void *                           /* O - Charset map pointer */
1250 get_charmap(
1251     const cups_encoding_t encoding)     /* I - Encoding */
1252 {
1253   char          filename[1024];         /* Filename for charset map file */
1254   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1255
1256
1257   DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
1258
1259  /*
1260   * Get the data directory and charset map name...
1261   */
1262
1263   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1264            cg->cups_datadir, _cupsEncodingName(encoding));
1265
1266   DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
1267
1268  /*
1269   * Read charset map input file into cache...
1270   */
1271
1272   if (encoding < CUPS_ENCODING_SBCS_END)
1273     return (get_sbcs_charmap(encoding, filename));
1274   else if (encoding < CUPS_ENCODING_VBCS_END)
1275     return (get_vbcs_charmap(encoding, filename));
1276   else
1277     return (NULL);
1278 }
1279
1280
1281 /*
1282  * 'get_charmap_count()' - Count lines in a charmap file.
1283  */
1284
1285 static int                              /* O - Count or -1 on error */
1286 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1287 {
1288   int   count;                          /* Number of lines */
1289   char  line[256];                      /* Line from input map file */
1290
1291
1292  /*
1293   * Count lines in map input file...
1294   */
1295
1296   count = 0;
1297
1298   while (cupsFileGets(fp, line, sizeof(line)))
1299     if (line[0] == '0')
1300       count ++;
1301
1302  /*
1303   * Return the number of lines...
1304   */
1305
1306   if (count > 0)
1307     return (count);
1308   else
1309     return (-1);
1310 }
1311
1312
1313 /*
1314  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1315  */
1316
1317 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1318 get_sbcs_charmap(
1319     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1320     const char            *filename)    /* I - Charmap Filename */
1321 {
1322   unsigned long legchar;                /* Legacy character value */
1323   cups_utf32_t  unichar;                /* Unicode character value */
1324   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1325   cups_file_t   *fp;                    /* Charset map file pointer */
1326   char          *s;                     /* Line parsing pointer */
1327   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1328   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1329   char          line[256];              /* Line from charset map file */
1330
1331
1332  /*
1333   * See if we already have this SBCS charset map loaded...
1334   */
1335
1336   DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
1337                 filename));
1338
1339   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1340   {
1341     if (cmap->encoding == encoding)
1342     {
1343       cmap->used ++;
1344       DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
1345
1346       return ((void *)cmap);
1347     }
1348   }
1349
1350  /*
1351   * Open SBCS charset map input file...
1352   */
1353
1354   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1355   {
1356     DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
1357
1358     return (NULL);
1359   }
1360
1361  /*
1362   * Allocate memory for SBCS charset map...
1363   */
1364
1365   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1366   {
1367     cupsFileClose(fp);
1368     DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1369
1370     return (NULL);
1371   }
1372
1373   cmap->used ++;
1374   cmap->encoding = encoding;
1375
1376  /*
1377   * Save SBCS charset map into memory for transcoding...
1378   */
1379
1380   while (cupsFileGets(fp, line, sizeof(line)))
1381   {
1382     if (line[0] != '0')
1383       continue;
1384
1385     legchar = strtol(line, &s, 16);
1386     if (legchar < 0 || legchar > 0xff)
1387       goto sbcs_error;
1388
1389     unichar = strtol(s, NULL, 16);
1390     if (unichar < 0 || unichar > 0x10ffff)
1391       goto sbcs_error;
1392
1393    /*
1394     * Save legacy to Unicode mapping in direct lookup table...
1395     */
1396
1397     crow  = cmap->char2uni + legchar;
1398     *crow = (cups_ucs2_t)(unichar & 0xffff);
1399
1400    /*
1401     * Save Unicode to legacy mapping in indirect lookup table...
1402     */
1403
1404     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1405     if (!srow)
1406     {
1407       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1408       if (!srow)
1409         goto sbcs_error;
1410
1411       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1412     }
1413
1414     srow += unichar & 0xff;
1415
1416    /*
1417     * Convert Replacement Character to visible replacement...
1418     */
1419
1420     if (unichar == 0xfffd)
1421       legchar = (unsigned long)'?';
1422
1423    /*
1424     * First (oldest) legacy character uses Unicode mapping cell...
1425     */
1426
1427     if (!*srow)
1428       *srow = (cups_sbcs_t)legchar;
1429   }
1430
1431   cupsFileClose(fp);
1432
1433  /*
1434   * Add it to the cache and return...
1435   */
1436
1437   cmap->next = cmap_cache;
1438   cmap_cache = cmap;
1439
1440   DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
1441
1442   return (cmap);
1443
1444  /*
1445   * If we get here, there was an error in the cmap file...
1446   */
1447
1448   sbcs_error:
1449
1450   free_sbcs_charmap(cmap);
1451
1452   cupsFileClose(fp);
1453
1454   DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
1455
1456   return (NULL);
1457 }
1458
1459
1460 /*
1461  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1462  */
1463
1464 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1465 get_vbcs_charmap(
1466     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1467     const char            *filename)    /* I - Charmap Filename */
1468 {
1469   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1470   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1471   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1472   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1473   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1474   unsigned long legchar;                /* Legacy character value */
1475   cups_utf32_t  unichar;                /* Unicode character value */
1476   int           mapcount;               /* Count of lines in charmap file */
1477   cups_file_t   *fp;                    /* Charset map file pointer */
1478   char          *s;                     /* Line parsing pointer */
1479   char          line[256];              /* Line from charset map file */
1480   int           i;                      /* Loop variable */
1481   int           legacy;                 /* 32-bit legacy char */
1482
1483
1484   DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1485                 encoding, filename));
1486
1487  /*
1488   * See if we already have this DBCS/VBCS charset map loaded...
1489   */
1490
1491   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1492   {
1493     if (vmap->encoding == encoding)
1494     {
1495       vmap->used ++;
1496       DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
1497
1498       return ((void *)vmap);
1499     }
1500   }
1501
1502  /*
1503   * Open VBCS charset map input file...
1504   */
1505
1506   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1507   {
1508     DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
1509
1510     return (NULL);
1511   }
1512
1513  /*
1514   * Count lines in charmap file...
1515   */
1516
1517   if ((mapcount = get_charmap_count(fp)) <= 0)
1518   {
1519     DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
1520
1521     cupsFileClose(fp);
1522
1523     return (NULL);
1524   }
1525
1526   DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
1527
1528  /*
1529   * Allocate memory for DBCS/VBCS charset map...
1530   */
1531
1532   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1533   {
1534     DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
1535
1536     cupsFileClose(fp);
1537
1538     return (NULL);
1539   }
1540
1541   vmap->used ++;
1542   vmap->encoding = encoding;
1543
1544  /*
1545   * Save DBCS/VBCS charset map into memory for transcoding...
1546   */
1547
1548   wide2uni = NULL;
1549
1550   cupsFileRewind(fp);
1551
1552   i      = 0;
1553   legacy = 0;
1554
1555   while (cupsFileGets(fp, line, sizeof(line)))
1556   {
1557     if (line[0] != '0')
1558       continue;
1559
1560     legchar = strtoul(line, &s, 16);
1561     if (legchar == ULONG_MAX)
1562       goto vbcs_error;
1563
1564     unichar = strtol(s, NULL, 16);
1565     if (unichar < 0 || unichar > 0x10ffff)
1566       goto vbcs_error;
1567
1568     i ++;
1569
1570     DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1571                   legchar, (unsigned)unichar));
1572
1573    /*
1574     * Save lead char of 2/3/4-byte legacy char...
1575     */
1576
1577     if (legchar > 0xffffff)
1578     {
1579       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1580       vmap->lead4char[leadchar] = leadchar;
1581     }
1582     else if (legchar > 0xffff)
1583     {
1584       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1585       vmap->lead3char[leadchar] = leadchar;
1586     }
1587     else
1588     {
1589       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1590       vmap->lead2char[leadchar] = leadchar;
1591     }
1592
1593    /*
1594     * Save Legacy to Unicode mapping...
1595     */
1596
1597     if (legchar <= 0xffff)
1598     {
1599      /*
1600       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1601       */
1602
1603       crow = vmap->char2uni[(int)leadchar];
1604       if (!crow)
1605       {
1606         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1607         if (!crow)
1608           goto vbcs_error;
1609
1610         vmap->char2uni[(int)leadchar] = crow;
1611       }
1612
1613       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1614     }
1615     else
1616     {
1617      /*
1618       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1619       */
1620
1621       if (!legacy)
1622       {
1623         legacy          = 1;
1624         vmap->widecount = (mapcount - i + 1);
1625         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1626                                                      sizeof(_cups_wide2uni_t));
1627         if (!wide2uni)
1628           goto vbcs_error;
1629
1630         vmap->wide2uni = wide2uni;
1631       }
1632
1633       wide2uni->widechar = (cups_vbcs_t)legchar;
1634       wide2uni->unichar  = (cups_ucs2_t)unichar;
1635       wide2uni ++;
1636     }
1637
1638    /*
1639     * Save Unicode to legacy mapping in indirect lookup table...
1640     */
1641
1642     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1643     if (!vrow)
1644     {
1645       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1646       if (!vrow)
1647         goto vbcs_error;
1648
1649       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1650     }
1651
1652     vrow += (int)(unichar & 0xff);
1653
1654    /*
1655     * Convert Replacement Character to visible replacement...
1656     */
1657
1658     if (unichar == 0xfffd)
1659       legchar = (unsigned long)'?';
1660
1661    /*
1662     * First (oldest) legacy character uses Unicode mapping cell...
1663     */
1664
1665     if (!*vrow)
1666       *vrow = (cups_vbcs_t)legchar;
1667   }
1668
1669   vmap->charcount = (i - vmap->widecount);
1670
1671   cupsFileClose(fp);
1672
1673  /*
1674   * Add it to the cache and return...
1675   */
1676
1677   vmap->next = vmap_cache;
1678   vmap_cache = vmap;
1679
1680   DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
1681
1682   return (vmap);
1683
1684  /*
1685   * If we get here, the file contains errors...
1686   */
1687
1688   vbcs_error:
1689
1690   free_vbcs_charmap(vmap);
1691
1692   cupsFileClose(fp);
1693
1694   DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
1695
1696   return (NULL);
1697 }
1698
1699
1700 /*
1701  * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1702  */