cups/transcode.c

   1 /*
   2  * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
   3  *
   4  *   Transcoding support for CUPS.
   5  *
   6  *   Copyright 2007-2010 by Apple Inc.
   7  *   Copyright 1997-2007 by Easy Software Products.
   8  *
   9  *   These coded instructions, statements, and computer programs are the
  10  *   property of Apple Inc. and are protected by Federal copyright
  11  *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
  12  *   which should have been included with this file.  If this file is
  13  *   file is missing or damaged, see the license at "http://www.cups.org/".
  14  *
  15  *   This file is subject to the Apple OS-Developed Software exception.
  16  *
  17  * Contents:
  18  *
  19  *   _cupsCharmapFlush() - Flush all character set maps out of cache.
  20  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  21  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  22  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  23  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  24  */
  25
  26 /*
  27  * Include necessary headers...
  28  */
  29
  30 #include "cups-private.h"
  31 #include <limits.h>
  32 #include <time.h>
  33 #ifdef HAVE_ICONV_H
  34 #  include <iconv.h>
  35 #endif /* HAVE_ICONV_H */
  36
  37
  38 /*
  39  * Local globals...
  40  */
  41
  42 #ifdef HAVE_ICONV_H
  43 static _cups_mutex_t    map_mutex = _CUPS_MUTEX_INITIALIZER;
  44                                         /* Mutex to control access to maps */
  45 static iconv_t          map_from_utf8 = (iconv_t)-1;
  46                                         /* Convert from UTF-8 to charset */
  47 static iconv_t          map_to_utf8 = (iconv_t)-1;
  48                                         /* Convert from charset to UTF-8 */
  49 static cups_encoding_t  map_encoding = CUPS_AUTO_ENCODING;
  50                                         /* Which charset is cached */
  51 #endif /* HAVE_ICONV_H */
  52
  53
  54 /*
  55  * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
  56  */
  57
  58 void
  59 _cupsCharmapFlush(void)
  60 {
  61 #ifdef HAVE_ICONV_H
  62   if (map_from_utf8 != (iconv_t)-1)
  63   {
  64     iconv_close(map_from_utf8);
  65     map_from_utf8 = (iconv_t)-1;
  66   }
  67
  68   if (map_to_utf8 != (iconv_t)-1)
  69   {
  70     iconv_close(map_to_utf8);
  71     map_to_utf8 = (iconv_t)-1;
  72   }
  73
  74   map_encoding = CUPS_AUTO_ENCODING;
  75 #endif /* HAVE_ICONV_H */
  76 }
  77
  78
  79 /*
  80  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
  81  */
  82
  83 int                                     /* O - Count or -1 on error */
  84 cupsCharsetToUTF8(
  85     cups_utf8_t           *dest,        /* O - Target string */
  86     const char            *src,         /* I - Source string */
  87     const int             maxout,       /* I - Max output */
  88     const cups_encoding_t encoding)     /* I - Encoding */
  89 {
  90   cups_utf8_t   *destptr;               /* Pointer into UTF-8 buffer */
  91   size_t        srclen,                 /* Length of source string */
  92                 outBytesLeft;           /* Bytes remaining in output buffer */
  93
  94
  95  /*
  96   * Check for valid arguments...
  97   */
  98
  99   DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
 100                 dest, src, maxout, encoding));
 101
 102   if (!dest || !src || maxout < 1)
 103   {
 104     if (dest)
 105       *dest = '\0';
 106
 107     DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
 108     return (-1);
 109   }
 110
 111  /*
 112   * Handle identity conversions...
 113   */
 114
 115   if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
 116       encoding >= CUPS_ENCODING_VBCS_END)
 117   {
 118     strlcpy((char *)dest, src, maxout);
 119     return ((int)strlen((char *)dest));
 120   }
 121
 122  /*
 123   * Handle ISO-8859-1 to UTF-8 directly...
 124   */
 125
 126   destptr = dest;
 127
 128   if (encoding == CUPS_ISO8859_1)
 129   {
 130     int         ch;                     /* Character from string */
 131     cups_utf8_t *destend;               /* End of UTF-8 buffer */
 132
 133
 134     destend = dest + maxout - 2;
 135
 136     while (*src && destptr < destend)
 137     {
 138       ch = *src++ & 255;
 139
 140       if (ch & 128)
 141       {
 142         *destptr++ = 0xc0 | (ch >> 6);
 143         *destptr++ = 0x80 | (ch & 0x3f);
 144       }
 145       else
 146         *destptr++ = ch;
 147     }
 148
 149     *destptr = '\0';
 150
 151     return ((int)(destptr - dest));
 152   }
 153
 154  /*
 155   * Convert input legacy charset to UTF-8...
 156   */
 157
 158 #ifdef HAVE_ICONV_H
 159   _cupsMutexLock(&map_mutex);
 160
 161   if (map_encoding != encoding)
 162   {
 163     _cupsCharmapFlush();
 164
 165     map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
 166     map_to_utf8   = iconv_open("UTF-8", _cupsEncodingName(encoding));
 167     map_encoding     = encoding;
 168   }
 169
 170   if (map_to_utf8 != (iconv_t)-1)
 171   {
 172     srclen       = strlen(src);
 173     outBytesLeft = maxout - 1;
 174
 175     iconv(map_to_utf8, (char **)&src, &srclen, (char **)&destptr,
 176           &outBytesLeft);
 177     *destptr = '\0';
 178
 179     _cupsMutexUnlock(&map_mutex);
 180
 181     return ((int)(destptr - dest));
 182   }
 183
 184   _cupsMutexUnlock(&map_mutex);
 185 #endif /* HAVE_ICONV_H */
 186
 187  /*
 188   * No iconv() support, so error out...
 189   */
 190
 191   *destptr = '\0';
 192
 193   return (-1);
 194 }
 195
 196
 197 /*
 198  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 199  */
 200
 201 int                                     /* O - Count or -1 on error */
 202 cupsUTF8ToCharset(
 203     char                  *dest,        /* O - Target string */
 204     const cups_utf8_t     *src,         /* I - Source string */
 205     const int             maxout,       /* I - Max output */
 206     const cups_encoding_t encoding)     /* I - Encoding */
 207 {
 208   char          *destptr;               /* Pointer into destination */
 209   size_t        srclen,                 /* Length of source string */
 210                 outBytesLeft;           /* Bytes remaining in output buffer */
 211
 212
 213  /*
 214   * Check for valid arguments...
 215   */
 216
 217   if (!dest || !src || maxout < 1)
 218   {
 219     if (dest)
 220       *dest = '\0';
 221
 222     return (-1);
 223   }
 224
 225  /*
 226   * Handle identity conversions...
 227   */
 228
 229   if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
 230       encoding >= CUPS_ENCODING_VBCS_END)
 231   {
 232     strlcpy(dest, (char *)src, maxout);
 233     return ((int)strlen(dest));
 234   }
 235
 236  /*
 237   * Handle UTF-8 to ISO-8859-1 directly...
 238   */
 239
 240   destptr = dest;
 241
 242   if (encoding == CUPS_ISO8859_1)
 243   {
 244     int         ch;                     /* Character from string */
 245     char        *destend;               /* End of ISO-8859-1 buffer */
 246
 247
 248     destend = dest + maxout - 1;
 249
 250     while (*src && destptr < destend)
 251     {
 252       ch = *src++;
 253
 254       if ((ch & 0xe0) == 0xc0)
 255       {
 256         ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
 257
 258         if (ch < 256)
 259           *destptr++ = ch;
 260         else
 261           *destptr++ = '?';
 262       }
 263       else if ((ch & 0xf0) == 0xe0 ||
 264                (ch & 0xf8) == 0xf0)
 265         *destptr++ = '?';
 266       else if (!(ch & 0x80))
 267         *destptr++ = ch;
 268     }
 269
 270     *destptr = '\0';
 271
 272     return ((int)(destptr - dest));
 273   }
 274
 275 #ifdef HAVE_ICONV_H
 276  /*
 277   * Convert input UTF-8 to legacy charset...
 278   */
 279
 280   _cupsMutexLock(&map_mutex);
 281
 282   if (map_encoding != encoding)
 283   {
 284     _cupsCharmapFlush();
 285
 286     map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
 287     map_to_utf8   = iconv_open("UTF-8", _cupsEncodingName(encoding));
 288     map_encoding  = encoding;
 289   }
 290
 291   if (map_from_utf8 != (iconv_t)-1)
 292   {
 293     srclen       = strlen((char *)src);
 294     outBytesLeft = maxout - 1;
 295
 296     iconv(map_from_utf8, (char **)&src, &srclen, &destptr, &outBytesLeft);
 297     *destptr = '\0';
 298
 299     _cupsMutexUnlock(&map_mutex);
 300
 301     return ((int)(destptr - dest));
 302   }
 303
 304   _cupsMutexUnlock(&map_mutex);
 305 #endif /* HAVE_ICONV_H */
 306
 307  /*
 308   * No iconv() support, so error out...
 309   */
 310
 311   *destptr = '\0';
 312
 313   return (-1);
 314 }
 315
 316
 317 /*
 318  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 319  *
 320  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 321  *
 322  *   UTF-32 char     UTF-8 char(s)
 323  *   --------------------------------------------------
 324  *        0 to 127 = 0xxxxxxx (US-ASCII)
 325  *     128 to 2047 = 110xxxxx 10yyyyyy
 326  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 327  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 328  *
 329  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 330  * which would convert to five- or six-octet UTF-8 sequences...
 331  */
 332
 333 int                                     /* O - Count or -1 on error */
 334 cupsUTF8ToUTF32(
 335     cups_utf32_t      *dest,            /* O - Target string */
 336     const cups_utf8_t *src,             /* I - Source string */
 337     const int         maxout)           /* I - Max output */
 338 {
 339   int           i;                      /* Looping variable */
 340   cups_utf8_t   ch;                     /* Character value */
 341   cups_utf8_t   next;                   /* Next character value */
 342   cups_utf32_t  ch32;                   /* UTF-32 character value */
 343
 344
 345  /*
 346   * Check for valid arguments and clear output...
 347   */
 348
 349   DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
 350                 src, maxout));
 351
 352   if (dest)
 353     *dest = 0;
 354
 355   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 356   {
 357     DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
 358
 359     return (-1);
 360   }
 361
 362  /*
 363   * Convert input UTF-8 to output UTF-32...
 364   */
 365
 366   for (i = maxout - 1; *src && i > 0; i --)
 367   {
 368     ch = *src++;
 369
 370    /*
 371     * Convert UTF-8 character(s) to UTF-32 character...
 372     */
 373
 374     if (!(ch & 0x80))
 375     {
 376      /*
 377       * One-octet UTF-8 <= 127 (US-ASCII)...
 378       */
 379
 380       *dest++ = ch;
 381
 382       DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
 383       continue;
 384     }
 385     else if ((ch & 0xe0) == 0xc0)
 386     {
 387      /*
 388       * Two-octet UTF-8 <= 2047 (Latin-x)...
 389       */
 390
 391       next = *src++;
 392       if ((next & 0xc0) != 0x80)
 393       {
 394         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 395
 396         return (-1);
 397       }
 398
 399       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 400
 401      /*
 402       * Check for non-shortest form (invalid UTF-8)...
 403       */
 404
 405       if (ch32 < 0x80)
 406       {
 407         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 408
 409         return (-1);
 410       }
 411
 412       *dest++ = ch32;
 413
 414       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
 415                     src[-2], src[-1], (unsigned)ch32));
 416     }
 417     else if ((ch & 0xf0) == 0xe0)
 418     {
 419      /*
 420       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 421       */
 422
 423       next = *src++;
 424       if ((next & 0xc0) != 0x80)
 425       {
 426         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 427
 428         return (-1);
 429       }
 430
 431       ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
 432
 433       next = *src++;
 434       if ((next & 0xc0) != 0x80)
 435       {
 436         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 437
 438         return (-1);
 439       }
 440
 441       ch32 = (ch32 << 6) | (next & 0x3f);
 442
 443      /*
 444       * Check for non-shortest form (invalid UTF-8)...
 445       */
 446
 447       if (ch32 < 0x800)
 448       {
 449         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 450
 451         return (-1);
 452       }
 453
 454       *dest++ = ch32;
 455
 456       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
 457                     src[-3], src[-2], src[-1], (unsigned)ch32));
 458     }
 459     else if ((ch & 0xf8) == 0xf0)
 460     {
 461      /*
 462       * Four-octet UTF-8...
 463       */
 464
 465       next = *src++;
 466       if ((next & 0xc0) != 0x80)
 467       {
 468         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 469
 470         return (-1);
 471       }
 472
 473       ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
 474
 475       next = *src++;
 476       if ((next & 0xc0) != 0x80)
 477       {
 478         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 479
 480         return (-1);
 481       }
 482
 483       ch32 = (ch32 << 6) | (next & 0x3f);
 484
 485       next = *src++;
 486       if ((next & 0xc0) != 0x80)
 487       {
 488         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 489
 490         return (-1);
 491       }
 492
 493       ch32 = (ch32 << 6) | (next & 0x3f);
 494
 495      /*
 496       * Check for non-shortest form (invalid UTF-8)...
 497       */
 498
 499       if (ch32 < 0x10000)
 500       {
 501         DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 502
 503         return (-1);
 504       }
 505
 506       *dest++ = ch32;
 507
 508       DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
 509                     src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
 510     }
 511     else
 512     {
 513      /*
 514       * More than 4-octet (invalid UTF-8 sequence)...
 515       */
 516
 517       DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
 518
 519       return (-1);
 520     }
 521
 522    /*
 523     * Check for UTF-16 surrogate (illegal UTF-8)...
 524     */
 525
 526     if (ch32 >= 0xd800 && ch32 <= 0xdfff)
 527       return (-1);
 528   }
 529
 530   *dest = 0;
 531
 532   DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
 533
 534   return (maxout - 1 - i);
 535 }
 536
 537
 538 /*
 539  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 540  *
 541  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 542  *
 543  *   UTF-32 char     UTF-8 char(s)
 544  *   --------------------------------------------------
 545  *        0 to 127 = 0xxxxxxx (US-ASCII)
 546  *     128 to 2047 = 110xxxxx 10yyyyyy
 547  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 548  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 549  *
 550  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 551  * which would convert to five- or six-octet UTF-8 sequences...
 552  */
 553
 554 int                                     /* O - Count or -1 on error */
 555 cupsUTF32ToUTF8(
 556     cups_utf8_t        *dest,           /* O - Target string */
 557     const cups_utf32_t *src,            /* I - Source string */
 558     const int          maxout)          /* I - Max output */
 559 {
 560   cups_utf8_t   *start;                 /* Start of destination string */
 561   int           i;                      /* Looping variable */
 562   int           swap;                   /* Byte-swap input to output */
 563   cups_utf32_t  ch;                     /* Character value */
 564
 565
 566  /*
 567   * Check for valid arguments and clear output...
 568   */
 569
 570   DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
 571                 maxout));
 572
 573   if (dest)
 574     *dest = '\0';
 575
 576   if (!dest || !src || maxout < 1)
 577   {
 578     DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
 579
 580     return (-1);
 581   }
 582
 583  /*
 584   * Check for leading BOM in UTF-32 and inverted BOM...
 585   */
 586
 587   start = dest;
 588   swap  = *src == 0xfffe0000;
 589
 590   DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
 591
 592   if (*src == 0xfffe0000 || *src == 0xfeff)
 593     src ++;
 594
 595  /*
 596   * Convert input UTF-32 to output UTF-8...
 597   */
 598
 599   for (i = maxout - 1; *src && i > 0;)
 600   {
 601     ch = *src++;
 602
 603    /*
 604     * Byte swap input UTF-32, if necessary...
 605     * (only byte-swapping 24 of 32 bits)
 606     */
 607
 608     if (swap)
 609       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 610
 611    /*
 612     * Check for beyond Plane 16 (invalid UTF-32)...
 613     */
 614
 615     if (ch > 0x10ffff)
 616     {
 617       DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
 618
 619       return (-1);
 620     }
 621
 622    /*
 623     * Convert UTF-32 character to UTF-8 character(s)...
 624     */
 625
 626     if (ch < 0x80)
 627     {
 628      /*
 629       * One-octet UTF-8 <= 127 (US-ASCII)...
 630       */
 631
 632       *dest++ = (cups_utf8_t)ch;
 633       i --;
 634
 635       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
 636     }
 637     else if (ch < 0x800)
 638     {
 639      /*
 640       * Two-octet UTF-8 <= 2047 (Latin-x)...
 641       */
 642
 643       if (i < 2)
 644       {
 645         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
 646
 647         return (-1);
 648       }
 649
 650       *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
 651       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 652       i -= 2;
 653
 654       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
 655                     dest[-2], dest[-1]));
 656     }
 657     else if (ch < 0x10000)
 658     {
 659      /*
 660       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 661       */
 662
 663       if (i < 3)
 664       {
 665         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
 666
 667         return (-1);
 668       }
 669
 670       *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
 671       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 672       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 673       i -= 3;
 674
 675       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
 676                     dest[-3], dest[-2], dest[-1]));
 677     }
 678     else
 679     {
 680      /*
 681       * Four-octet UTF-8...
 682       */
 683
 684       if (i < 4)
 685       {
 686         DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
 687
 688         return (-1);
 689       }
 690
 691       *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
 692       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
 693       *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
 694       *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
 695       i -= 4;
 696
 697       DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
 698                     (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
 699     }
 700   }
 701
 702   *dest = '\0';
 703
 704   DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
 705
 706   return ((int)(dest - start));
 707 }
 708
 709
 710 /*
 711  * 'compare_wide()' - Compare key for wide (VBCS) match.
 712  */
 713
 714 static int
 715 compare_wide(const void *k1,            /* I - Key char */
 716              const void *k2)            /* I - Map char */
 717 {
 718   cups_vbcs_t   key;                    /* Legacy key character */
 719   cups_vbcs_t   map;                    /* Legacy map character */
 720
 721
 722   key = *((cups_vbcs_t *)k1);
 723   map = ((_cups_wide2uni_t *)k2)->widechar;
 724
 725   return ((int)(key - map));
 726 }
 727
 728
 729 /*
 730  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
 731  */
 732
 733 static int                              /* O - Count or -1 on error */
 734 conv_sbcs_to_utf8(
 735     cups_utf8_t           *dest,        /* O - Target string */
 736     const cups_sbcs_t     *src,         /* I - Source string */
 737     int                   maxout,       /* I - Max output */
 738     const cups_encoding_t encoding)     /* I - Encoding */
 739 {
 740   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 741   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
 742   cups_sbcs_t   legchar;                /* Legacy character value */
 743   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 744                 *workptr;               /* Pointer into string */
 745
 746
 747  /*
 748   * Find legacy charset map in cache...
 749   */
 750
 751   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 752     return (-1);
 753
 754  /*
 755   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
 756   */
 757
 758   work[0] = 0xfeff;
 759   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
 760   {
 761     legchar = *src++;
 762
 763    /*
 764     * Convert ASCII verbatim (optimization)...
 765     */
 766
 767     if (legchar < 0x80)
 768       *workptr++ = (cups_utf32_t)legchar;
 769     else
 770     {
 771      /*
 772       * Convert unknown character to Replacement Character...
 773       */
 774
 775       crow = cmap->char2uni + legchar;
 776
 777       if (!*crow)
 778         *workptr++ = 0xfffd;
 779       else
 780         *workptr++ = (cups_utf32_t)*crow;
 781     }
 782   }
 783
 784   *workptr = 0;
 785
 786  /*
 787   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 788   */
 789
 790   cmap->used --;
 791
 792   return (cupsUTF32ToUTF8(dest, work, maxout));
 793 }
 794
 795
 796 /*
 797  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
 798  */
 799
 800 static int                              /* O - Count or -1 on error */
 801 conv_utf8_to_sbcs(
 802     cups_sbcs_t           *dest,        /* O - Target string */
 803     const cups_utf8_t     *src,         /* I - Source string */
 804     int                   maxout,       /* I - Max output */
 805     const cups_encoding_t encoding)     /* I - Encoding */
 806 {
 807   cups_sbcs_t   *start;                 /* Start of destination string */
 808   _cups_cmap_t  *cmap;                  /* Legacy SBCS / Unicode Charset Map */
 809   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
 810   cups_utf32_t  unichar;                /* Character value */
 811   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 812                 *workptr;               /* Pointer into string */
 813
 814
 815  /*
 816   * Find legacy charset map in cache...
 817   */
 818
 819   if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
 820     return (-1);
 821
 822  /*
 823   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 824   */
 825
 826   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 827     return (-1);
 828
 829  /*
 830   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
 831   */
 832
 833   for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
 834   {
 835     unichar = *workptr++;
 836     if (!unichar)
 837       break;
 838
 839    /*
 840     * Convert ASCII verbatim (optimization)...
 841     */
 842
 843     if (unichar < 0x80)
 844     {
 845       *dest++ = (cups_sbcs_t)unichar;
 846       continue;
 847     }
 848
 849    /*
 850     * Convert unknown character to visible replacement...
 851     */
 852
 853     srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
 854
 855     if (srow)
 856       srow += (int)(unichar & 0xff);
 857
 858     if (!srow || !*srow)
 859       *dest++ = '?';
 860     else
 861       *dest++ = *srow;
 862   }
 863
 864   *dest = '\0';
 865
 866   cmap->used --;
 867
 868   return ((int)(dest - start));
 869 }
 870
 871
 872 /*
 873  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
 874  */
 875
 876 static int                              /* O - Count or -1 on error */
 877 conv_utf8_to_vbcs(
 878     cups_sbcs_t           *dest,        /* O - Target string */
 879     const cups_utf8_t     *src,         /* I - Source string */
 880     int                   maxout,       /* I - Max output */
 881     const cups_encoding_t encoding)     /* I - Encoding */
 882 {
 883   cups_sbcs_t   *start;                 /* Start of destination string */
 884   _cups_vmap_t  *vmap;                  /* Legacy DBCS / Unicode Charset Map */
 885   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
 886   cups_utf32_t  unichar;                /* Character value */
 887   cups_vbcs_t   legchar;                /* Legacy character value */
 888   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
 889                 *workptr;               /* Pointer into string */
 890
 891
 892   DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
 893                 "encoding=%d)", dest, src, maxout, encoding));
 894
 895  /*
 896   * Find legacy charset map in cache...
 897   */
 898
 899   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
 900   {
 901     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
 902
 903     return (-1);
 904   }
 905
 906  /*
 907   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 908   */
 909
 910   if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
 911   {
 912     DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
 913
 914     return (-1);
 915   }
 916
 917  /*
 918   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
 919   */
 920
 921   for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
 922   {
 923     unichar = *workptr++;
 924
 925    /*
 926     * Convert ASCII verbatim (optimization)...
 927     */
 928
 929     if (unichar < 0x80)
 930     {
 931       *dest++ = (cups_sbcs_t)unichar;
 932
 933       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
 934                     dest[-1]));
 935
 936       continue;
 937     }
 938
 939    /*
 940     * Convert unknown character to visible replacement...
 941     */
 942
 943     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
 944
 945     if (vrow)
 946       vrow += (int)(unichar & 0xff);
 947
 948     if (!vrow || !*vrow)
 949       legchar = (cups_vbcs_t)'?';
 950     else
 951       legchar = (cups_vbcs_t)*vrow;
 952
 953    /*
 954     * Save n-byte legacy character...
 955     */
 956
 957     if (legchar > 0xffffff)
 958     {
 959       if (maxout < 5)
 960       {
 961         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
 962
 963         return (-1);
 964       }
 965
 966       *dest++ = (cups_sbcs_t)(legchar >> 24);
 967       *dest++ = (cups_sbcs_t)(legchar >> 16);
 968       *dest++ = (cups_sbcs_t)(legchar >> 8);
 969       *dest++ = (cups_sbcs_t)legchar;
 970
 971       maxout -= 3;
 972
 973       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
 974                     (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
 975     }
 976     else if (legchar > 0xffff)
 977     {
 978       if (maxout < 4)
 979       {
 980         DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
 981
 982         return (-1);
 983       }
 984
 985       *dest++ = (cups_sbcs_t)(legchar >> 16);
 986       *dest++ = (cups_sbcs_t)(legchar >> 8);
 987       *dest++ = (cups_sbcs_t)legchar;
 988
 989       maxout -= 2;
 990
 991       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
 992                     (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
 993     }
 994     else if (legchar > 0xff)
 995     {
 996       *dest++ = (cups_sbcs_t)(legchar >> 8);
 997       *dest++ = (cups_sbcs_t)legchar;
 998
 999       maxout --;
1000
1001       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
1002                     (unsigned)unichar, dest[-2], dest[-1]));
1003     }
1004     else
1005     {
1006       *dest++ = (cups_sbcs_t)legchar;
1007
1008       DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
1009                     (unsigned)unichar, dest[-1]));
1010     }
1011   }
1012
1013   *dest = '\0';
1014
1015   vmap->used --;
1016
1017   DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
1018                 (int)(dest - start)));
1019
1020   return ((int)(dest - start));
1021 }
1022
1023
1024 /*
1025  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1026  */
1027
1028 static int                              /* O - Count or -1 on error */
1029 conv_vbcs_to_utf8(
1030     cups_utf8_t           *dest,        /* O - Target string */
1031     const cups_sbcs_t     *src,         /* I - Source string */
1032     int                   maxout,       /* I - Max output */
1033     const cups_encoding_t encoding)     /* I - Encoding */
1034 {
1035   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1036   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1037   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1038   cups_sbcs_t   leadchar;               /* Lead char of n-byte legacy char */
1039   cups_vbcs_t   legchar;                /* Legacy character value */
1040   cups_utf32_t  work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1041                 *workptr;               /* Pointer into string */
1042
1043
1044  /*
1045   * Find legacy charset map in cache...
1046   */
1047
1048   DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
1049                 dest, src, maxout, encoding));
1050
1051   if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1052   {
1053     DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1054
1055     return (-1);
1056   }
1057
1058  /*
1059   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1060   */
1061
1062   work[0] = 0xfeff;
1063   for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1064   {
1065     legchar  = *src++;
1066     leadchar = (cups_sbcs_t)legchar;
1067
1068    /*
1069     * Convert ASCII verbatim (optimization)...
1070     */
1071
1072     if (legchar < 0x80)
1073     {
1074       *workptr++ = (cups_utf32_t)legchar;
1075
1076       DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
1077                     (unsigned)legchar));
1078       continue;
1079     }
1080
1081    /*
1082     * Convert 2-byte legacy character...
1083     */
1084
1085     if (vmap->lead2char[(int)leadchar] == leadchar)
1086     {
1087       if (!*src)
1088       {
1089         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
1090
1091         return (-1);
1092       }
1093
1094       legchar = (legchar << 8) | *src++;
1095
1096      /*
1097       * Convert unknown character to Replacement Character...
1098       */
1099
1100       crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1101       if (crow)
1102         crow += (int) (legchar & 0xff);
1103
1104       if (!crow || !*crow)
1105         *workptr++ = 0xfffd;
1106       else
1107         *workptr++ = (cups_utf32_t)*crow;
1108
1109       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
1110                     src[-2], src[-1], (unsigned)workptr[-1]));
1111       continue;
1112     }
1113
1114    /*
1115     * Fetch 3-byte or 4-byte legacy character...
1116     */
1117
1118     if (vmap->lead3char[(int)leadchar] == leadchar)
1119     {
1120       if (!*src || !src[1])
1121       {
1122         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
1123
1124         return (-1);
1125       }
1126
1127       legchar = (legchar << 8) | *src++;
1128       legchar = (legchar << 8) | *src++;
1129     }
1130     else if (vmap->lead4char[(int)leadchar] == leadchar)
1131     {
1132       if (!*src || !src[1] || !src[2])
1133       {
1134         DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
1135
1136         return (-1);
1137       }
1138
1139       legchar = (legchar << 8) | *src++;
1140       legchar = (legchar << 8) | *src++;
1141       legchar = (legchar << 8) | *src++;
1142     }
1143     else
1144     {
1145       DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
1146
1147       return (-1);
1148     }
1149
1150    /*
1151     * Find 3-byte or 4-byte legacy character...
1152     */
1153
1154     wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1155                                            vmap->wide2uni,
1156                                            vmap->widecount,
1157                                            sizeof(_cups_wide2uni_t),
1158                                            compare_wide);
1159
1160    /*
1161     * Convert unknown character to Replacement Character...
1162     */
1163
1164     if (!wide2uni || !wide2uni->unichar)
1165       *workptr++ = 0xfffd;
1166     else
1167       *workptr++ = wide2uni->unichar;
1168
1169     if (vmap->lead3char[(int)leadchar] == leadchar)
1170       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
1171                     src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1172     else
1173       DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
1174                     src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1175   }
1176
1177   *workptr = 0;
1178
1179   vmap->used --;
1180
1181   DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
1182                 (int)(workptr - work)));
1183
1184  /*
1185   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1186   */
1187
1188   return (cupsUTF32ToUTF8(dest, work, maxout));
1189 }
1190
1191
1192 /*
1193  * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1194  */
1195
1196 static void
1197 free_sbcs_charmap(_cups_cmap_t *cmap)   /* I - Character set */
1198 {
1199   int           i;                      /* Looping variable */
1200
1201
1202   for (i = 0; i < 256; i ++)
1203     if (cmap->uni2char[i])
1204       free(cmap->uni2char[i]);
1205
1206   free(cmap);
1207 }
1208
1209
1210 /*
1211  * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1212  */
1213
1214 static void
1215 free_vbcs_charmap(_cups_vmap_t *vmap)   /* I - Character set */
1216 {
1217   int           i;                      /* Looping variable */
1218
1219
1220   for (i = 0; i < 256; i ++)
1221     if (vmap->char2uni[i])
1222       free(vmap->char2uni[i]);
1223
1224   for (i = 0; i < 256; i ++)
1225     if (vmap->uni2char[i])
1226       free(vmap->uni2char[i]);
1227
1228   if (vmap->wide2uni)
1229     free(vmap->wide2uni);
1230
1231   free(vmap);
1232 }
1233
1234
1235 /*
1236  * 'get_charmap()' - Lookup or get a character set map (private).
1237  *
1238  * This code handles single-byte (SBCS), double-byte (DBCS), and
1239  * variable-byte (VBCS) character sets _without_ charset escapes...
1240  * This code does not handle multiple-byte character sets (MBCS)
1241  * (such as ISO-2022-JP) with charset switching via escapes...
1242  */
1243
1244
1245 static void *                           /* O - Charset map pointer */
1246 get_charmap(
1247     const cups_encoding_t encoding)     /* I - Encoding */
1248 {
1249   char          filename[1024];         /* Filename for charset map file */
1250   _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1251
1252
1253   DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
1254
1255  /*
1256   * Get the data directory and charset map name...
1257   */
1258
1259   snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1260            cg->cups_datadir, _cupsEncodingName(encoding));
1261
1262   DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
1263
1264  /*
1265   * Read charset map input file into cache...
1266   */
1267
1268   if (encoding < CUPS_ENCODING_SBCS_END)
1269     return (get_sbcs_charmap(encoding, filename));
1270   else if (encoding < CUPS_ENCODING_VBCS_END)
1271     return (get_vbcs_charmap(encoding, filename));
1272   else
1273     return (NULL);
1274 }
1275
1276
1277 /*
1278  * 'get_charmap_count()' - Count lines in a charmap file.
1279  */
1280
1281 static int                              /* O - Count or -1 on error */
1282 get_charmap_count(cups_file_t *fp)      /* I - File to read from */
1283 {
1284   int   count;                          /* Number of lines */
1285   char  line[256];                      /* Line from input map file */
1286
1287
1288  /*
1289   * Count lines in map input file...
1290   */
1291
1292   count = 0;
1293
1294   while (cupsFileGets(fp, line, sizeof(line)))
1295     if (line[0] == '0')
1296       count ++;
1297
1298  /*
1299   * Return the number of lines...
1300   */
1301
1302   if (count > 0)
1303     return (count);
1304   else
1305     return (-1);
1306 }
1307
1308
1309 /*
1310  * 'get_sbcs_charmap()' - Get SBCS Charmap.
1311  */
1312
1313 static _cups_cmap_t *                    /* O - Charmap or 0 on error */
1314 get_sbcs_charmap(
1315     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1316     const char            *filename)    /* I - Charmap Filename */
1317 {
1318   unsigned long legchar;                /* Legacy character value */
1319   cups_utf32_t  unichar;                /* Unicode character value */
1320   _cups_cmap_t   *cmap;                 /* Legacy SBCS / Unicode Charset Map */
1321   cups_file_t   *fp;                    /* Charset map file pointer */
1322   char          *s;                     /* Line parsing pointer */
1323   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1324   cups_sbcs_t   *srow;                  /* Pointer to SBCS row in 'uni2char' */
1325   char          line[256];              /* Line from charset map file */
1326
1327
1328  /*
1329   * See if we already have this SBCS charset map loaded...
1330   */
1331
1332   DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
1333                 filename));
1334
1335   for (cmap = cmap_cache; cmap; cmap = cmap->next)
1336   {
1337     if (cmap->encoding == encoding)
1338     {
1339       cmap->used ++;
1340       DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
1341
1342       return ((void *)cmap);
1343     }
1344   }
1345
1346  /*
1347   * Open SBCS charset map input file...
1348   */
1349
1350   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1351   {
1352     DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
1353
1354     return (NULL);
1355   }
1356
1357  /*
1358   * Allocate memory for SBCS charset map...
1359   */
1360
1361   if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1362   {
1363     cupsFileClose(fp);
1364     DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1365
1366     return (NULL);
1367   }
1368
1369   cmap->used ++;
1370   cmap->encoding = encoding;
1371
1372  /*
1373   * Save SBCS charset map into memory for transcoding...
1374   */
1375
1376   while (cupsFileGets(fp, line, sizeof(line)))
1377   {
1378     if (line[0] != '0')
1379       continue;
1380
1381     legchar = strtol(line, &s, 16);
1382     if (legchar < 0 || legchar > 0xff)
1383       goto sbcs_error;
1384
1385     unichar = strtol(s, NULL, 16);
1386     if (unichar < 0 || unichar > 0x10ffff)
1387       goto sbcs_error;
1388
1389    /*
1390     * Save legacy to Unicode mapping in direct lookup table...
1391     */
1392
1393     crow  = cmap->char2uni + legchar;
1394     *crow = (cups_ucs2_t)(unichar & 0xffff);
1395
1396    /*
1397     * Save Unicode to legacy mapping in indirect lookup table...
1398     */
1399
1400     srow = cmap->uni2char[(unichar >> 8) & 0xff];
1401     if (!srow)
1402     {
1403       srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1404       if (!srow)
1405         goto sbcs_error;
1406
1407       cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1408     }
1409
1410     srow += unichar & 0xff;
1411
1412    /*
1413     * Convert Replacement Character to visible replacement...
1414     */
1415
1416     if (unichar == 0xfffd)
1417       legchar = (unsigned long)'?';
1418
1419    /*
1420     * First (oldest) legacy character uses Unicode mapping cell...
1421     */
1422
1423     if (!*srow)
1424       *srow = (cups_sbcs_t)legchar;
1425   }
1426
1427   cupsFileClose(fp);
1428
1429  /*
1430   * Add it to the cache and return...
1431   */
1432
1433   cmap->next = cmap_cache;
1434   cmap_cache = cmap;
1435
1436   DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
1437
1438   return (cmap);
1439
1440  /*
1441   * If we get here, there was an error in the cmap file...
1442   */
1443
1444   sbcs_error:
1445
1446   free_sbcs_charmap(cmap);
1447
1448   cupsFileClose(fp);
1449
1450   DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
1451
1452   return (NULL);
1453 }
1454
1455
1456 /*
1457  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1458  */
1459
1460 static _cups_vmap_t *                   /* O - Charmap or 0 on error */
1461 get_vbcs_charmap(
1462     const cups_encoding_t encoding,     /* I - Charmap Encoding */
1463     const char            *filename)    /* I - Charmap Filename */
1464 {
1465   _cups_vmap_t  *vmap;                  /* Legacy VBCS / Unicode Charset Map */
1466   cups_ucs2_t   *crow;                  /* Pointer to UCS-2 row in 'char2uni' */
1467   cups_vbcs_t   *vrow;                  /* Pointer to VBCS row in 'uni2char' */
1468   _cups_wide2uni_t *wide2uni;           /* Pointer to row in 'wide2uni' */
1469   cups_sbcs_t   leadchar;               /* Lead char of 2-byte legacy char */
1470   unsigned long legchar;                /* Legacy character value */
1471   cups_utf32_t  unichar;                /* Unicode character value */
1472   int           mapcount;               /* Count of lines in charmap file */
1473   cups_file_t   *fp;                    /* Charset map file pointer */
1474   char          *s;                     /* Line parsing pointer */
1475   char          line[256];              /* Line from charset map file */
1476   int           i;                      /* Loop variable */
1477   int           legacy;                 /* 32-bit legacy char */
1478
1479
1480   DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1481                 encoding, filename));
1482
1483  /*
1484   * See if we already have this DBCS/VBCS charset map loaded...
1485   */
1486
1487   for (vmap = vmap_cache; vmap; vmap = vmap->next)
1488   {
1489     if (vmap->encoding == encoding)
1490     {
1491       vmap->used ++;
1492       DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
1493
1494       return ((void *)vmap);
1495     }
1496   }
1497
1498  /*
1499   * Open VBCS charset map input file...
1500   */
1501
1502   if ((fp = cupsFileOpen(filename, "r")) == NULL)
1503   {
1504     DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
1505
1506     return (NULL);
1507   }
1508
1509  /*
1510   * Count lines in charmap file...
1511   */
1512
1513   if ((mapcount = get_charmap_count(fp)) <= 0)
1514   {
1515     DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
1516
1517     cupsFileClose(fp);
1518
1519     return (NULL);
1520   }
1521
1522   DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
1523
1524  /*
1525   * Allocate memory for DBCS/VBCS charset map...
1526   */
1527
1528   if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1529   {
1530     DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
1531
1532     cupsFileClose(fp);
1533
1534     return (NULL);
1535   }
1536
1537   vmap->used ++;
1538   vmap->encoding = encoding;
1539
1540  /*
1541   * Save DBCS/VBCS charset map into memory for transcoding...
1542   */
1543
1544   wide2uni = NULL;
1545
1546   cupsFileRewind(fp);
1547
1548   i      = 0;
1549   legacy = 0;
1550
1551   while (cupsFileGets(fp, line, sizeof(line)))
1552   {
1553     if (line[0] != '0')
1554       continue;
1555
1556     legchar = strtoul(line, &s, 16);
1557     if (legchar == ULONG_MAX)
1558       goto vbcs_error;
1559
1560     unichar = strtol(s, NULL, 16);
1561     if (unichar < 0 || unichar > 0x10ffff)
1562       goto vbcs_error;
1563
1564     i ++;
1565
1566     DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1567                   legchar, (unsigned)unichar));
1568
1569    /*
1570     * Save lead char of 2/3/4-byte legacy char...
1571     */
1572
1573     if (legchar > 0xffffff)
1574     {
1575       leadchar                  = (cups_sbcs_t)(legchar >> 24);
1576       vmap->lead4char[leadchar] = leadchar;
1577     }
1578     else if (legchar > 0xffff)
1579     {
1580       leadchar                  = (cups_sbcs_t)(legchar >> 16);
1581       vmap->lead3char[leadchar] = leadchar;
1582     }
1583     else
1584     {
1585       leadchar                  = (cups_sbcs_t)(legchar >> 8);
1586       vmap->lead2char[leadchar] = leadchar;
1587     }
1588
1589    /*
1590     * Save Legacy to Unicode mapping...
1591     */
1592
1593     if (legchar <= 0xffff)
1594     {
1595      /*
1596       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1597       */
1598
1599       crow = vmap->char2uni[(int)leadchar];
1600       if (!crow)
1601       {
1602         crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1603         if (!crow)
1604           goto vbcs_error;
1605
1606         vmap->char2uni[(int)leadchar] = crow;
1607       }
1608
1609       crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1610     }
1611     else
1612     {
1613      /*
1614       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1615       */
1616
1617       if (!legacy)
1618       {
1619         legacy          = 1;
1620         vmap->widecount = (mapcount - i + 1);
1621         wide2uni        = (_cups_wide2uni_t *)calloc(vmap->widecount,
1622                                                      sizeof(_cups_wide2uni_t));
1623         if (!wide2uni)
1624           goto vbcs_error;
1625
1626         vmap->wide2uni = wide2uni;
1627       }
1628
1629       wide2uni->widechar = (cups_vbcs_t)legchar;
1630       wide2uni->unichar  = (cups_ucs2_t)unichar;
1631       wide2uni ++;
1632     }
1633
1634    /*
1635     * Save Unicode to legacy mapping in indirect lookup table...
1636     */
1637
1638     vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1639     if (!vrow)
1640     {
1641       vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1642       if (!vrow)
1643         goto vbcs_error;
1644
1645       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1646     }
1647
1648     vrow += (int)(unichar & 0xff);
1649
1650    /*
1651     * Convert Replacement Character to visible replacement...
1652     */
1653
1654     if (unichar == 0xfffd)
1655       legchar = (unsigned long)'?';
1656
1657    /*
1658     * First (oldest) legacy character uses Unicode mapping cell...
1659     */
1660
1661     if (!*vrow)
1662       *vrow = (cups_vbcs_t)legchar;
1663   }
1664
1665   vmap->charcount = (i - vmap->widecount);
1666
1667   cupsFileClose(fp);
1668
1669  /*
1670   * Add it to the cache and return...
1671   */
1672
1673   vmap->next = vmap_cache;
1674   vmap_cache = vmap;
1675
1676   DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
1677
1678   return (vmap);
1679
1680  /*
1681   * If we get here, the file contains errors...
1682   */
1683
1684   vbcs_error:
1685
1686   free_vbcs_charmap(vmap);
1687
1688   cupsFileClose(fp);
1689
1690   DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
1691
1692   return (NULL);
1693 }
1694
1695
1696 /*
1697  * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1698  */