cups/transcode.c

   1 /*
   2  * "$Id$"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 1997-2005 by Easy Software Products.
   7  *
   8  *   These coded instructions, statements, and computer programs are
   9  *   the property of Easy Software Products and are protected by Federal
  10  *   copyright law.  Distribution and use rights are outlined in the
  11  *   file "LICENSE.txt" which should have been included with this file.
  12  *   If this file is missing or damaged please contact Easy Software
  13  *   Products at:
  14  *
  15  *       Attn: CUPS Licensing Information
  16  *       Easy Software Products
  17  *       44141 Airport View Drive, Suite 204
  18  *       Hollywood, Maryland 20636 USA
  19  *
  20  *       Voice: (301) 373-9600
  21  *       EMail: cups-info@cups.org
  22  *         WWW: http://www.cups.org
  23  *
  24  * Contents:
  25  *
  26  *   cupsCharmapGet()    - Get a character set map.
  27  *   cupsCharmapFree()   - Free a character set map.
  28  *   cupsCharmapFlush()  - Flush all character set maps out of cache.
  29  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  30  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  31  *   cupsUTF8ToUTF16()   - Convert UTF-8 to UTF-16.
  32  *   cupsUTF16ToUTF8()   - Convert UTF-16 to UTF-8.
  33  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  34  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  35  *   cupsUTF16ToUTF32()  - Convert UTF-16 to UTF-32.
  36  *   cupsUTF32ToUTF16()  - Convert UTF-32 to UTF-16.
  37  *   get_charmap_count() - Count lines in a charmap file.
  38  *   get_sbcs_charmap()  - Get SBCS Charmap.
  39  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  40  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  41  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  42  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  43  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  44  *   compare_wide()      - Compare key for wide (VBCS) match.
  45  */
  46
  47 /*
  48  * Include necessary headers...
  49  */
  50
  51 #include <stdio.h>
  52 #include <stdlib.h>
  53 #include <string.h>
  54 #include <errno.h>
  55 #include <ctype.h>
  56 #include <time.h>
  57
  58 #include "language.h"
  59 #include "string.h"
  60 #include "transcode.h"
  61
  62
  63 /*
  64  * Local Globals...
  65  */
  66
  67 static cups_cmap_t      *cmap_cache = NULL;    /* SBCS Charmap Cache */
  68 static cups_vmap_t      *vmap_cache = NULL;    /* VBCS Charmap Cache */
  69
  70 /*
  71  * Prototypes...
  72  */
  73
  74 static int get_charmap_count(const char *filename);
  75 static cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
  76                                      const char *filename);
  77 static cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
  78                                      const char *filename);
  79
  80 static int conv_utf8_to_sbcs(char *dest,
  81                              const cups_utf8_t *src,
  82                              const int maxout,
  83                              const cups_encoding_t encoding);
  84 static int conv_utf8_to_vbcs(char *dest,
  85                              const cups_utf8_t *src,
  86                              const int maxout,
  87                              const cups_encoding_t encoding);
  88
  89 static int conv_sbcs_to_utf8(cups_utf8_t *dest,
  90                              const char *src,
  91                              const int maxout,
  92                              const cups_encoding_t encoding);
  93 static int conv_vbcs_to_utf8(cups_utf8_t *dest,
  94                              const char *src,
  95                              const int maxout,
  96                              const cups_encoding_t encoding);
  97
  98 static int compare_wide(const void *k1, const void *k2);
  99
 100 /*
 101  * 'cupsCharmapGet()' - Get a character set map.
 102  *
 103  * This code handles single-byte (SBCS), double-byte (DBCS), and
 104  * variable-byte (VBCS) character sets _without_ charset escapes...
 105  * This code does not handle multiple-byte character sets (MBCS)
 106  * (such as ISO-2022-JP) with charset switching via escapes...
 107  */
 108 void *                                  /* O - Charset map pointer */
 109 cupsCharmapGet(const cups_encoding_t encoding)
 110                                         /* I - Encoding */
 111 {
 112   char          *datadir;       /* CUPS_DATADIR environment variable */
 113   char          mapname[80];    /* Name of charset map */
 114   char          filename[256];  /* Filename for charset map file */
 115
 116  /*
 117   * Check for valid arguments...
 118   */
 119   if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END))
 120     return (NULL);
 121
 122  /*
 123   * Get the data directory and charset map name...
 124   */
 125   if ((datadir = getenv("CUPS_DATADIR")) == NULL)
 126     datadir = CUPS_DATADIR;
 127   snprintf(mapname, sizeof(mapname), "%s.txt", cupsEncodingName(encoding));
 128   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
 129            datadir, mapname);
 130
 131  /*
 132   * Read charset map input file into cache...
 133   */
 134   if (encoding < CUPS_ENCODING_SBCS_END)
 135     return (get_sbcs_charmap(encoding, filename));
 136   else if (encoding < CUPS_ENCODING_VBCS_END)
 137     return (get_vbcs_charmap(encoding, filename));
 138   else
 139     return (NULL);
 140 }
 141
 142 /*
 143  * 'cupsCharmapFree()' - Free a character set map.
 144  *
 145  * This does not actually free; use 'cupsCharmapFlush()' for that.
 146  */
 147 void
 148 cupsCharmapFree(const cups_encoding_t encoding)
 149                                         /* I - Encoding */
 150 {
 151   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
 152   cups_vmap_t   *vmap;          /* Legacy VBCS / Unicode Charset Map */
 153
 154  /*
 155   * See if we already have this SBCS charset map loaded...
 156   */
 157   for (cmap = cmap_cache; cmap != NULL; cmap = cmap->next)
 158   {
 159     if (cmap->encoding == encoding)
 160     {
 161       if (cmap->used > 0)
 162         cmap->used --;
 163       return;
 164     }
 165   }
 166
 167  /*
 168   * See if we already have this DBCS/VBCS charset map loaded...
 169   */
 170   for (vmap = vmap_cache; vmap != NULL; vmap = vmap->next)
 171   {
 172     if (vmap->encoding == encoding)
 173     {
 174       if (vmap->used > 0)
 175         vmap->used --;
 176       return;
 177     }
 178   }
 179   return;
 180 }
 181
 182 /*
 183  * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
 184  */
 185 void
 186 cupsCharmapFlush(void)
 187 {
 188   int           i;              /* Looping variable */
 189   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
 190   cups_vmap_t   *vmap;          /* Legacy VBCS / Unicode Charset Map */
 191   cups_cmap_t   *cnext;         /* Next Legacy SBCS Charset Map */
 192   cups_vmap_t   *vnext;         /* Next Legacy VBCS Charset Map */
 193   cups_ucs2_t   *crow;          /* Pointer to UCS-2 row in 'char2uni' */
 194   cups_sbcs_t   *srow;          /* Pointer to SBCS row in 'uni2char' */
 195   cups_vbcs_t   *vrow;          /* Pointer to VBCS row in 'uni2char' */
 196
 197  /*
 198   * Loop through SBCS charset map cache, free all memory...
 199   */
 200   for (cmap = cmap_cache; cmap != NULL; cmap = cnext)
 201   {
 202     for (i = 0; i < 256; i ++)
 203     {
 204       if ((srow = cmap->uni2char[i]) != NULL)
 205         free(srow);
 206     }
 207     cnext = cmap->next;
 208     free(cmap);
 209   }
 210   cmap_cache = NULL;
 211
 212  /*
 213   * Loop through DBCS/VBCS charset map cache, free all memory...
 214   */
 215   for (vmap = vmap_cache; vmap != NULL; vmap = vnext)
 216   {
 217     for (i = 0; i < 256; i ++)
 218     {
 219       if ((crow = vmap->char2uni[i]) != NULL)
 220         free(crow);
 221     }
 222     for (i = 0; i < 256; i ++)
 223     {
 224       if ((vrow = vmap->uni2char[i]) != NULL)
 225         free(vrow);
 226     }
 227     if (vmap->wide2uni)
 228       free(vmap->wide2uni);
 229     vnext = vmap->next;
 230     free(vmap);
 231   }
 232   vmap_cache = NULL;
 233   return;
 234 }
 235
 236 /*
 237  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 238  *
 239  * This code handles single-byte (SBCS), double-byte (DBCS), and
 240  * variable-byte (VBCS) character sets _without_ charset escapes...
 241  * This code does not handle multiple-byte character sets (MBCS)
 242  * (such as ISO-2022-JP) with charset switching via escapes...
 243  */
 244 int                                     /* O - Count or -1 on error */
 245 cupsUTF8ToCharset(char *dest,           /* O - Target string */
 246     const cups_utf8_t *src,             /* I - Source string */
 247     const int maxout,                   /* I - Max output */
 248     const cups_encoding_t encoding)     /* I - Encoding */
 249 {
 250  /*
 251   * Check for valid arguments...
 252   */
 253   if ((dest == NULL)
 254   || (src == NULL)
 255   || (maxout < 1)
 256   || (maxout > CUPS_MAX_USTRING)
 257   || (encoding < 0)
 258   || (encoding == CUPS_UTF8)
 259   || (encoding >= CUPS_ENCODING_VBCS_END))
 260     return (-1);
 261
 262  /*
 263   * Convert input UTF-8 to legacy charset...
 264   */
 265   if (encoding < CUPS_ENCODING_SBCS_END)
 266     return (conv_utf8_to_sbcs(dest, src, maxout, encoding));
 267   else if (encoding < CUPS_ENCODING_VBCS_END)
 268     return (conv_utf8_to_vbcs(dest, src, maxout, encoding));
 269   else
 270     return (-1);
 271 }
 272
 273 /*
 274  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 275  *
 276  * This code handles single-byte (SBCS), double-byte (DBCS), and
 277  * variable-byte (VBCS) character sets _without_ charset escapes...
 278  * This code does not handle multiple-byte character sets (MBCS)
 279  * (such as ISO-2022-JP) with charset switching via escapes...
 280  */
 281 int                                     /* O - Count or -1 on error */
 282 cupsCharsetToUTF8(cups_utf8_t *dest,    /* O - Target string */
 283     const char *src,                    /* I - Source string */
 284     const int maxout,                   /* I - Max output */
 285     const cups_encoding_t encoding)     /* I - Encoding */
 286 {
 287  /*
 288   * Check for valid arguments...
 289   */
 290   if ((dest == NULL)
 291   || (src == NULL)
 292   || (maxout < 1)
 293   || (maxout > CUPS_MAX_USTRING)
 294   || (encoding < 0)
 295   || (encoding == CUPS_UTF8)
 296   || (encoding >= CUPS_ENCODING_VBCS_END))
 297     return (-1);
 298
 299  /*
 300   * Convert input legacy charset to UTF-8...
 301   */
 302   if (encoding < CUPS_ENCODING_SBCS_END)
 303     return (conv_sbcs_to_utf8(dest, src, maxout, encoding));
 304   else if (encoding < CUPS_ENCODING_VBCS_END)
 305     return (conv_vbcs_to_utf8(dest, src, maxout, encoding));
 306   else
 307     return (-1);
 308 }
 309
 310 /*
 311  * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
 312  *
 313  * This code does not support Unicode beyond 16-bits (Plane 0)...
 314  */
 315 int                                     /* O - Count or -1 on error */
 316 cupsUTF8ToUTF16(cups_utf16_t *dest,     /* O - Target string */
 317     const cups_utf8_t *src,             /* I - Source string */
 318     const int maxout)                   /* I - Max output */
 319 {
 320   int           worklen;        /* Internal UCS-4 string length */
 321   cups_utf32_t  work[CUPS_MAX_USTRING];
 322                                 /* Internal UCS-4 string */
 323
 324  /*
 325   * Check for valid arguments and clear output...
 326   */
 327   if ((dest == NULL)
 328   || (src == NULL)
 329   || (maxout < 1)
 330   || (maxout > CUPS_MAX_USTRING))
 331     return (-1);
 332   *dest = 0;
 333
 334  /*
 335   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 336   */
 337   worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
 338   if (worklen < 0)
 339     return (-1);
 340
 341  /*
 342   * Convert internal UCS-4 to output UTF-16...
 343   */
 344   worklen = cupsUTF32ToUTF16(dest, work, maxout);
 345   return (worklen);
 346 }
 347
 348 /*
 349  * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
 350  *
 351  * This code does not support Unicode beyond 16-bits (Plane 0)...
 352  */
 353 int                                     /* O - Count or -1 on error */
 354 cupsUTF16ToUTF8(cups_utf8_t *dest,      /* O - Target string */
 355     const cups_utf16_t *src,            /* I - Source string */
 356     const int maxout)                   /* I - Max output */
 357 {
 358   int           worklen;        /* Internal UCS-4 string length */
 359   cups_utf32_t  work[CUPS_MAX_USTRING];
 360                                 /* Internal UCS-4 string */
 361
 362  /*
 363   * Check for valid arguments and clear output...
 364   */
 365   if ((dest == NULL)
 366   || (src == NULL)
 367   || (maxout < 1)
 368   || (maxout > CUPS_MAX_USTRING))
 369     return (-1);
 370   *dest = 0;
 371
 372  /*
 373   * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
 374   */
 375   worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING);
 376   if (worklen < 0)
 377     return (-1);
 378
 379  /*
 380   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 381   */
 382   worklen = cupsUTF32ToUTF8(dest, work, maxout);
 383   return (worklen);
 384 }
 385
 386 /*
 387  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 388  *
 389  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 390  *
 391  *   UTF-32 char     UTF-8 char(s)
 392  *   --------------------------------------------------
 393  *        0 to 127 = 0xxxxxxx (US-ASCII)
 394  *     128 to 2047 = 110xxxxx 10yyyyyy
 395  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 396  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 397  *
 398  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 399  * which would convert to five- or six-octet UTF-8 sequences...
 400  *
 401  * This code does not support Unicode beyond 16-bits (Plane 0)...
 402  */
 403 int                                     /* O - Count or -1 on error */
 404 cupsUTF8ToUTF32(cups_utf32_t *dest,     /* O - Target string */
 405     const cups_utf8_t *src,             /* I - Source string */
 406     const int maxout)                   /* I - Max output */
 407 {
 408   cups_utf8_t   *first = (cups_utf8_t *) src;
 409   int           srclen;         /* Source string length */
 410   int           i;              /* Looping variable */
 411   cups_utf32_t  ch;             /* Character value */
 412   cups_utf32_t  next;           /* Next character value */
 413   cups_utf32_t  ch32;           /* UTF-32 character value */
 414
 415  /*
 416   * Check for valid arguments and clear output...
 417   */
 418   if ((dest == NULL)
 419   || (src == NULL)
 420   || (maxout < 1)
 421   || (maxout > CUPS_MAX_USTRING))
 422     return (-1);
 423   *dest = 0;
 424
 425  /*
 426   * Convert input UTF-8 to output UTF-32 (and insert BOM)...
 427   */
 428   *dest = 0xfeff;
 429   dest ++;
 430   srclen = strlen((char *) src);
 431   for (i = 1; i < (maxout - 1); src ++, dest ++)
 432   {
 433     ch = (cups_utf32_t) *src;
 434     ch &= 0xff;
 435     if (ch == 0)
 436       break;
 437     i ++;
 438
 439    /*
 440     * Convert UTF-8 character(s) to UTF-32 character...
 441     */
 442     if ((ch & 0x7f) == ch)
 443     {
 444      /*
 445       * One-octet UTF-8 <= 127 (US-ASCII)...
 446       */
 447       *dest = ch;
 448     }
 449     else if ((ch & 0xe0) == 0xc0)
 450     {
 451      /*
 452       * Two-octet UTF-8 <= 2047 (Latin-x)...
 453       */
 454       src ++;
 455       next = (cups_utf32_t) *src;
 456       next &= 0xff;
 457       if (next == 0)
 458         return (-1);
 459       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 460
 461      /*
 462       * Check for non-shortest form (invalid UTF-8)...
 463       */
 464       if (ch32 <= 127)
 465         return (-1);
 466       *dest = ch32;
 467     }
 468     else if ((ch & 0xf0) == 0xe0)
 469     {
 470      /*
 471       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 472       */
 473       src ++;
 474       next = (cups_utf32_t) *src;
 475       next &= 0xff;
 476       if (next == 0)
 477         return (-1);
 478       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 479       src ++;
 480       next = (cups_utf32_t) *src;
 481       next &= 0xff;
 482       if (next == 0)
 483         return (-1);
 484       ch32 = ((ch32 << 6) | (next & 0x3f));
 485
 486      /*
 487       * Check for non-shortest form (invalid UTF-8)...
 488       */
 489       if (ch32 <= 2047)
 490         return (-1);
 491       *dest = ch32;
 492     }
 493     else if ((ch & 0xf8) == 0xf0)
 494     {
 495      /*
 496       * Four-octet UTF-8 to Replacement Character...
 497       */
 498       if (((src - first) + 3) >= srclen)
 499         return (-1);
 500       src += 3;
 501       *dest = 0xfffd;
 502     }
 503     else if ((ch & 0xfc) == 0xf8)
 504     {
 505      /*
 506       * Five-octet UTF-8 (invalid strict UTF-32)...
 507       */
 508       return (-1);
 509     }
 510     else if ((ch & 0xfe) == 0xfc)
 511     {
 512      /*
 513       * Six-octet UTF-8 (invalid strict UTF-32)...
 514       */
 515       return (-1);
 516     }
 517     else
 518     {
 519      /*
 520       * More than six-octet (invalid UTF-8 sequence)...
 521       */
 522       return (-1);
 523     }
 524
 525    /*
 526     * Check for UTF-16 surrogate (illegal UTF-8)...
 527     */
 528     if ((*dest >= 0xd800) && (*dest <= 0xdfff))
 529       return (-1);
 530
 531    /*
 532     * Check for beyond Plane 16 (invalid UTF-8)...
 533     */
 534     if (*dest > 0x10ffff)
 535       return (-1);
 536   }
 537   *dest = 0;
 538   return (i);
 539 }
 540
 541 /*
 542  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 543  *
 544  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 545  *
 546  *   UTF-32 char     UTF-8 char(s)
 547  *   --------------------------------------------------
 548  *        0 to 127 = 0xxxxxxx (US-ASCII)
 549  *     128 to 2047 = 110xxxxx 10yyyyyy
 550  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 551  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 552  *
 553  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 554  * which would convert to five- or six-octet UTF-8 sequences...
 555  *
 556  * This code does not support Unicode beyond 16-bits (Plane 0)...
 557  */
 558 int                                     /* O - Count or -1 on error */
 559 cupsUTF32ToUTF8(cups_utf8_t *dest,      /* O - Target string */
 560     const cups_utf32_t *src,            /* I - Source string */
 561     const int maxout)                   /* I - Max output */
 562 {
 563   cups_utf32_t  *first = (cups_utf32_t *) src;
 564                                 /* First source char */
 565   cups_utf8_t   *start = dest;  /* Start of destination string */
 566   int           i;              /* Looping variable */
 567   int           swap = 0;       /* Byte-swap input to output */
 568   cups_utf32_t  ch;             /* Character value */
 569
 570  /*
 571   * Check for valid arguments and clear output...
 572   */
 573   if ((dest == NULL)
 574   || (src == NULL)
 575   || (maxout < 1))
 576     return (-1);
 577   *dest = '\0';
 578
 579  /*
 580   * Check for leading BOM in UTF-32 and inverted BOM...
 581   */
 582   if (*src == 0xfffe0000)
 583     swap = 1;
 584
 585  /*
 586   * Convert input UTF-32 to output UTF-8...
 587   */
 588   for (i = 0; i < (maxout - 1); src ++)
 589   {
 590     ch = *src;
 591     if (ch == 0)
 592       break;
 593
 594    /*
 595     * Byte swap input UTF-32, if necessary...
 596     */
 597     if (swap)
 598       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 599
 600    /*
 601     * Check for leading BOM (and delete from output)...
 602     */
 603     if ((src == first) && (ch == 0xfeff))
 604       continue;
 605
 606    /*
 607     * Check for beyond Plane 16 (invalid UTF-32)...
 608     */
 609     if (ch > 0x10ffff)
 610       return (-1);
 611
 612    /*
 613     * Convert beyond Plane 0 (BMP) to Replacement Character...
 614     */
 615     if (ch > 0xffff)
 616       ch = 0xfffd;
 617
 618    /*
 619     * Convert UTF-32 character to UTF-8 character(s)...
 620     */
 621     if (ch <= 0x7f)
 622     {
 623      /*
 624       * One-octet UTF-8 <= 127 (US-ASCII)...
 625       */
 626       *dest = (cups_utf8_t) ch;
 627       dest ++;
 628       i ++;
 629     }
 630     else if (ch <= 0x7ff)
 631     {
 632      /*
 633       * Two-octet UTF-8 <= 2047 (Latin-x)...
 634       */
 635       if (i > (maxout - 2))
 636         break;
 637       *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f));
 638       dest ++;
 639       i ++;
 640       *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
 641       dest ++;
 642       i ++;
 643     }
 644     else
 645     {
 646      /*
 647       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 648       */
 649       if (i > (maxout - 3))
 650         break;
 651       *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f));
 652       dest ++;
 653       i ++;
 654       *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f));
 655       dest ++;
 656       i ++;
 657       *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
 658       dest ++;
 659       i ++;
 660     }
 661   }
 662   *dest = '\0';
 663   i = (int) (dest - start);
 664   return (i);
 665 }
 666
 667 /*
 668  * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
 669  *
 670  * This code does not support Unicode beyond 16-bits (Plane 0)...
 671  */
 672 int                                     /* O - Count or -1 on error */
 673 cupsUTF16ToUTF32(cups_utf32_t *dest,    /* O - Target string */
 674     const cups_utf16_t *src,            /* I - Source string */
 675     const int maxout)                   /* I - Max output */
 676 {
 677   int           i;              /* Looping variable */
 678   int           swap = 0;       /* Byte-swap input to output */
 679   int           surrogate = 0;  /* Expecting low-half surrogate */
 680   cups_utf32_t  ch;             /* Character value */
 681
 682  /*
 683   * Check for valid arguments and clear output...
 684   */
 685   if ((dest == NULL)
 686   || (src == NULL)
 687   || (maxout < 1)
 688   || (maxout > CUPS_MAX_USTRING))
 689     return (-1);
 690   *dest = 0;
 691
 692  /*
 693   * Check for leading BOM in UTF-16 and inverted BOM...
 694   */
 695   if (*src == 0xfffe)
 696     swap = 1;
 697
 698  /*
 699   * Convert input UTF-16 to output UTF-32...
 700   */
 701   for (i = 0; i < (maxout - 1); src ++)
 702   {
 703     ch = (cups_utf32_t) (*src & 0xffff);
 704     if (ch == 0)
 705       break;
 706     i ++;
 707
 708    /*
 709     * Byte swap input UTF-16, if necessary...
 710     */
 711     if (swap)
 712       ch = (cups_utf32_t) ((ch << 8) | (ch >> 8));
 713
 714    /*
 715     * Discard expected UTF-16 low-half surrogate...
 716     */
 717     if ((ch >= 0xdc00) && (ch <= 0xdfff))
 718     {
 719       if (surrogate == 0)
 720         return (-1);
 721       surrogate = 0;
 722       continue;
 723     }
 724
 725    /*
 726     * Convert UTF-16 high-half surrogate to Replacement Character...
 727     */
 728     if ((ch >= 0xd800) && (ch <= 0xdbff))
 729     {
 730       if (surrogate == 1)
 731         return (-1);
 732       surrogate = 1;
 733       ch = 0xfffd;
 734     }
 735     *dest = ch;
 736     dest ++;
 737   }
 738   *dest = 0;
 739   return (i);
 740 }
 741
 742 /*
 743  * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
 744  *
 745  * This code does not support Unicode beyond 16-bits (Plane 0)...
 746  */
 747 int                                     /* O - Count or -1 on error */
 748 cupsUTF32ToUTF16(cups_utf16_t *dest,    /* O - Target string */
 749     const cups_utf32_t *src,            /* I - Source string */
 750     const int maxout)                   /* I - Max output */
 751 {
 752   int           i;              /* Looping variable */
 753   int           swap = 0;       /* Byte-swap input to output */
 754   cups_utf32_t  ch;             /* Character value */
 755
 756  /*
 757   * Check for valid arguments and clear output...
 758   */
 759   if ((dest == NULL)
 760   || (src == NULL)
 761   || (maxout < 1)
 762   || (maxout > CUPS_MAX_USTRING))
 763     return (-1);
 764   *dest = 0;
 765
 766  /*
 767   * Check for leading BOM in UTF-32 and inverted BOM...
 768   */
 769   if (*src == 0xfffe0000)
 770     swap = 1;
 771
 772  /*
 773   * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
 774   */
 775   for (i = 0; i < (maxout - 1); src ++, dest ++)
 776   {
 777     ch = *src;
 778     if (ch == 0)
 779       break;
 780     i ++;
 781
 782    /*
 783     * Byte swap input UTF-32, if necessary...
 784     */
 785     if (swap)
 786       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 787
 788    /*
 789     * Check for UTF-16 surrogate (illegal UTF-32)...
 790     */
 791     if ((ch >= 0xd800) && (ch <= 0xdfff))
 792       return (-1);
 793
 794    /*
 795     * Check for beyond Plane 16 (invalid UTF-32)...
 796     */
 797     if (ch > 0x10ffff)
 798       return (-1);
 799
 800    /*
 801     * Convert beyond Plane 0 (BMP) to Replacement Character...
 802     */
 803     if (ch > 0xffff)
 804       ch = 0xfffd;
 805     *dest = (cups_utf16_t) ch;
 806   }
 807   *dest = 0;
 808   return (i);
 809 }
 810
 811 /*
 812  * 'get_charmap_count()' - Count lines in a charmap file.
 813  */
 814 static int                              /* O - Count or -1 on error */
 815 get_charmap_count(const char *filename) /* I - Charmap Filename */
 816 {
 817   int           i;              /* Looping variable */
 818   FILE          *fp;            /* Map input file pointer */
 819   char          *s;             /* Line parsing pointer */
 820   char          line[256];      /* Line from input map file */
 821   cups_utf32_t  unichar;        /* Unicode character value */
 822
 823  /*
 824   * Open map input file...
 825   */
 826   if ((filename == NULL) || (*filename == '\0'))
 827     return (-1);
 828   fp = fopen(filename, "r");
 829   if (fp == NULL)
 830     return (-1);
 831
 832  /*
 833   * Count lines in map input file...
 834   */
 835   for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
 836   {
 837     s = fgets(&line[0], sizeof(line), fp);
 838     if (s == NULL)
 839       break;
 840     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
 841       continue;
 842     while ((*s != 0) && (*s != ' ') && (*s != '\t'))
 843         s ++;
 844     while ((*s == ' ') || (*s == '\t'))
 845         s ++;
 846     if (strncmp (s, "0x", 2) == 0)
 847       s += 2;
 848     if ((sscanf(s, "%lx", &unichar) != 1)
 849     || (unichar > 0xffff))
 850     {
 851       fclose(fp);
 852       return (-1);
 853     }
 854     i ++;
 855   }
 856   if (i == 0)
 857     i = -1;
 858
 859  /*
 860   * Close file and return charmap count (non-comment line count)...
 861   */
 862   fclose(fp);
 863   return (i);
 864 }
 865
 866 /*
 867  * 'get_sbcs_charmap()' - Get SBCS Charmap.
 868  */
 869 static cups_cmap_t *                    /* O - Charmap or 0 on error */
 870 get_sbcs_charmap(const cups_encoding_t encoding,
 871                                         /* I - Charmap Encoding */
 872                  const char *filename)  /* I - Charmap Filename */
 873 {
 874   int           i;              /* Loop variable */
 875   unsigned long legchar;        /* Legacy character value */
 876   cups_utf32_t  unichar;        /* Unicode character value */
 877   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
 878   FILE          *fp;            /* Charset map file pointer */
 879   char          *s;             /* Line parsing pointer */
 880   cups_ucs2_t   *crow;          /* Pointer to UCS-2 row in 'char2uni' */
 881   cups_sbcs_t   *srow;          /* Pointer to SBCS row in 'uni2char' */
 882   char          line[256];      /* Line from charset map file */
 883
 884  /*
 885   * Check for valid arguments...
 886   */
 887   if ((encoding < 0) || (filename == NULL))
 888     return (NULL);
 889
 890  /*
 891   * See if we already have this SBCS charset map loaded...
 892   */
 893   for (cmap = cmap_cache; cmap != NULL; cmap = cmap->next)
 894   {
 895     if (cmap->encoding == encoding)
 896     {
 897       cmap->used ++;
 898       return ((void *) cmap);
 899     }
 900   }
 901
 902  /*
 903   * Open SBCS charset map input file...
 904   */
 905   fp = fopen(filename, "r");
 906   if (fp == NULL)
 907     return (NULL);
 908
 909  /*
 910   * Allocate memory for SBCS charset map and add to cache...
 911   */
 912   cmap = (cups_cmap_t *) calloc(1, sizeof(cups_cmap_t));
 913   if (cmap == NULL)
 914   {
 915     fclose(fp);
 916     return (NULL);
 917   }
 918   cmap->next = cmap_cache;
 919   cmap_cache = cmap;
 920   cmap->used ++;
 921   cmap->encoding = encoding;
 922
 923  /*
 924   * Save SBCS charset map into memory for transcoding...
 925   */
 926   for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
 927   {
 928     s = fgets(&line[0], sizeof(line), fp);
 929     if (s == NULL)
 930       break;
 931     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
 932       continue;
 933     if (strncmp (s, "0x", 2) == 0)
 934       s += 2;
 935     if ((sscanf(s, "%lx", &legchar) != 1)
 936     || (legchar > 0xff))
 937     {
 938       fclose(fp);
 939       cupsCharmapFlush();
 940       return (NULL);
 941     }
 942     while ((*s != 0) && (*s != ' ') && (*s != '\t'))
 943       s ++;
 944     while ((*s == ' ') || (*s == '\t'))
 945       s ++;
 946     if (strncmp (s, "0x", 2) == 0)
 947       s += 2;
 948     if (sscanf(s, "%lx", &unichar) != 1)
 949     {
 950       fclose(fp);
 951       cupsCharmapFlush();
 952       return (NULL);
 953     }
 954     i ++;
 955
 956    /*
 957     * Convert beyond Plane 0 (BMP) to Replacement Character...
 958     */
 959     if (unichar > 0xffff)
 960       unichar = 0xfffd;
 961
 962    /*
 963     * Save legacy to Unicode mapping in direct lookup table...
 964     */
 965     crow = &cmap->char2uni[(int) legchar];
 966     *crow = (cups_ucs2_t) (unichar & 0xffff);
 967
 968    /*
 969     * Save Unicode to legacy mapping in indirect lookup table...
 970     */
 971     srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
 972     if (srow == NULL)
 973     {
 974       srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t));
 975       if (srow == NULL)
 976       {
 977         fclose(fp);
 978         cupsCharmapFlush();
 979         return (NULL);
 980       }
 981       cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow;
 982     }
 983     srow += (int) (unichar & 0xff);
 984
 985    /*
 986     * Convert Replacement Character to visible replacement...
 987     */
 988     if (unichar == 0xfffd)
 989       legchar = (unsigned long) '?';
 990
 991    /*
 992     * First (oldest) legacy character uses Unicode mapping cell...
 993     */
 994     if (*srow == 0)
 995       *srow = (cups_sbcs_t) legchar;
 996   }
 997   fclose(fp);
 998   return (cmap);
 999 }
1000
1001 /*
1002  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1003  */
1004 static cups_vmap_t *                    /* O - Charmap or 0 on error */
1005 get_vbcs_charmap(const cups_encoding_t encoding,
1006                                         /* I - Charmap Encoding */
1007                  const char *filename)  /* I - Charmap Filename */
1008 {
1009   cups_vmap_t       *vmap;      /* Legacy VBCS / Unicode Charset Map */
1010   cups_ucs2_t       *crow;      /* Pointer to UCS-2 row in 'char2uni' */
1011   cups_vbcs_t       *vrow;      /* Pointer to VBCS row in 'uni2char' */
1012   cups_wide2uni_t   *wide2uni;  /* Pointer to row in 'wide2uni' */
1013   cups_sbcs_t       leadchar;   /* Lead char of 2-byte legacy char */
1014   unsigned long     legchar;    /* Legacy character value */
1015   cups_utf32_t      unichar;    /* Unicode character value */
1016   int               mapcount;   /* Count of lines in charmap file */
1017   FILE              *fp;        /* Charset map file pointer */
1018   char              *s;         /* Line parsing pointer */
1019   char              line[256];  /* Line from charset map file */
1020   int               i;          /* Loop variable */
1021   int               wide;       /* 32-bit legacy char */
1022
1023  /*
1024   * Check for valid arguments...
1025   */
1026   if ((encoding < 0) || (filename == NULL))
1027     return (NULL);
1028
1029  /*
1030   * See if we already have this DBCS/VBCS charset map loaded...
1031   */
1032   for (vmap = vmap_cache; vmap != NULL; vmap = vmap->next)
1033   {
1034     if (vmap->encoding == encoding)
1035     {
1036       vmap->used ++;
1037       return ((void *) vmap);
1038     }
1039   }
1040
1041  /*
1042   * Count lines in charmap file...
1043   */
1044   mapcount = get_charmap_count(filename);
1045   if (mapcount <= 0)
1046     return (NULL);
1047
1048  /*
1049   * Open VBCS charset map input file...
1050   */
1051   fp = fopen(filename, "r");
1052   if (fp == NULL)
1053     return (NULL);
1054
1055  /*
1056   * Allocate memory for DBCS/VBCS charset map and add to cache...
1057   */
1058   vmap = (cups_vmap_t *) calloc(1, sizeof(cups_vmap_t));
1059   if (vmap == NULL)
1060   {
1061     fclose(fp);
1062     return (NULL);
1063   }
1064   vmap->next = vmap_cache;
1065   vmap_cache = vmap;
1066   vmap->used ++;
1067   vmap->encoding = encoding;
1068
1069  /*
1070   * Save DBCS/VBCS charset map into memory for transcoding...
1071   */
1072   leadchar = 0;
1073   wide2uni = NULL;
1074
1075   for (i = 0, wide = 0; i < mapcount; )
1076   {
1077     s = fgets(&line[0], sizeof(line), fp);
1078     if (s == NULL)
1079       break;
1080     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1081       continue;
1082     if (strncmp (s, "0x", 2) == 0)
1083       s += 2;
1084     if ((sscanf(s, "%lx", &legchar) != 1)
1085     || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END)))
1086     {
1087       fclose(fp);
1088       cupsCharmapFlush();
1089       return (NULL);
1090     }
1091     while ((*s != 0) && (*s != ' ') && (*s != '\t'))
1092       s ++;
1093     while ((*s == ' ') || (*s == '\t'))
1094       s ++;
1095     if (strncmp (s, "0x", 2) == 0)
1096       s += 2;
1097     if (sscanf(s, "%lx", &unichar) != 1)
1098     {
1099       fclose(fp);
1100       cupsCharmapFlush();
1101       return (NULL);
1102     }
1103     i ++;
1104
1105    /*
1106     * Convert beyond Plane 0 (BMP) to Replacement Character...
1107     */
1108     if (unichar > 0xffff)
1109       unichar = 0xfffd;
1110
1111    /*
1112     * Save lead char of 2/3/4-byte legacy char...
1113     */
1114     if ((legchar > 0xff) && (legchar <= 0xffff))
1115     {
1116       leadchar = (cups_sbcs_t) (legchar >> 8);
1117       vmap->lead2char[leadchar] = leadchar;
1118     }
1119     if ((legchar > 0xffff) && (legchar <= 0xffffff))
1120     {
1121       leadchar = (cups_sbcs_t) (legchar >> 16);
1122       vmap->lead3char[leadchar] = leadchar;
1123     }
1124     if (legchar > 0xffffff)
1125     {
1126       leadchar = (cups_sbcs_t) (legchar >> 24);
1127       vmap->lead4char[leadchar] = leadchar;
1128     }
1129
1130    /*
1131     * Save Legacy to Unicode mapping...
1132     */
1133     if (legchar <= 0xffff)
1134     {
1135      /*
1136       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1137       */
1138       crow = vmap->char2uni[(int) leadchar];
1139       if (crow == NULL)
1140       {
1141         crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t));
1142         if (crow == NULL)
1143         {
1144           fclose(fp);
1145           cupsCharmapFlush();
1146           return (NULL);
1147         }
1148         vmap->char2uni[(int) leadchar] = crow;
1149       }
1150       crow += (int) (legchar & 0xff);
1151       *crow = (cups_vbcs_t) unichar;
1152     }
1153     else
1154     {
1155      /*
1156       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1157       */
1158       if (wide == 0)
1159       {
1160         wide = 1;
1161         vmap->widecount = (mapcount - i + 1);
1162         wide2uni = (cups_wide2uni_t *)
1163           calloc(vmap->widecount, sizeof(cups_wide2uni_t));
1164         if (wide2uni == NULL)
1165         {
1166           fclose(fp);
1167           cupsCharmapFlush();
1168           return (NULL);
1169         }
1170         vmap->wide2uni = wide2uni;
1171       }
1172       wide2uni->widechar = (cups_vbcs_t) legchar;
1173       wide2uni->unichar = unichar;
1174       wide2uni ++;
1175     }
1176
1177    /*
1178     * Save Unicode to legacy mapping in indirect lookup table...
1179     */
1180     vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1181     if (vrow == NULL)
1182     {
1183       vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t));
1184       if (vrow == NULL)
1185       {
1186         fclose(fp);
1187         cupsCharmapFlush();
1188         return (NULL);
1189       }
1190       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1191     }
1192     vrow += (int) (unichar & 0xff);
1193
1194    /*
1195     * Convert Replacement Character to visible replacement...
1196     */
1197     if (unichar == 0xfffd)
1198       legchar = (unsigned long) '?';
1199
1200    /*
1201     * First (oldest) legacy character uses Unicode mapping cell...
1202     */
1203     if (*vrow == 0)
1204       *vrow = (cups_vbcs_t) legchar;
1205   }
1206   vmap->charcount = (i - vmap->widecount);
1207   fclose(fp);
1208   return (vmap);
1209 }
1210
1211 /*
1212  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1213  */
1214 static int                              /* O - Count or -1 on error */
1215 conv_utf8_to_sbcs(char *dest,           /* O - Target string */
1216     const cups_utf8_t *src,             /* I - Source string */
1217     const int maxout,                   /* I - Max output */
1218     const cups_encoding_t encoding)     /* I - Encoding */
1219 {
1220   char          *start = dest;  /* Start of destination string */
1221   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
1222   cups_sbcs_t   *srow;          /* Pointer to SBCS row in 'uni2char' */
1223   cups_utf32_t  unichar;        /* Character value */
1224   int           worklen;        /* Internal UCS-4 string length */
1225   cups_utf32_t  work[CUPS_MAX_USTRING];
1226                                 /* Internal UCS-4 string */
1227   int           i;              /* Looping variable */
1228
1229  /*
1230   * Check for valid arguments and clear output...
1231   */
1232   if ((dest == NULL)
1233   || (src == NULL)
1234   || (maxout < 1)
1235   || (maxout > CUPS_MAX_USTRING)
1236   || (encoding == CUPS_UTF8))
1237     return (-1);
1238   *dest = '\0';
1239
1240  /*
1241   * Find legacy charset map in cache...
1242   */
1243   cmap = (cups_cmap_t *) cupsCharmapGet(encoding);
1244   if (cmap == NULL)
1245     return (-1);
1246
1247  /*
1248   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1249   */
1250   worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1251   if (worklen < 0)
1252     return (-1);
1253
1254  /*
1255   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1256   */
1257   for (i = 0; i < worklen;)
1258   {
1259     unichar = work[i];
1260     if (unichar == 0)
1261       break;
1262     i ++;
1263
1264    /*
1265     * Check for leading BOM (and delete from output)...
1266     */
1267     if ((i == 1) && (unichar == 0xfeff))
1268       continue;
1269
1270    /*
1271     * Convert ASCII verbatim (optimization)...
1272     */
1273     if (unichar <= 0x7f)
1274     {
1275       *dest = (char) unichar;
1276       dest ++;
1277       continue;
1278     }
1279
1280    /*
1281     * Convert unknown character to visible replacement...
1282     */
1283     srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1284     if (srow)
1285       srow += (int) (unichar & 0xff);
1286     if ((srow == NULL) || (*srow == 0))
1287       *dest = '?';
1288     else
1289       *dest = (char) (*srow);
1290     dest ++;
1291   }
1292   *dest = '\0';
1293   worklen = (int) (dest - start);
1294   cupsCharmapFree(encoding);
1295   return (worklen);
1296 }
1297
1298 /*
1299  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1300  */
1301 static int                              /* O - Count or -1 on error */
1302 conv_utf8_to_vbcs(char *dest,           /* O - Target string */
1303     const cups_utf8_t *src,             /* I - Source string */
1304     const int maxout,                   /* I - Max output */
1305     const cups_encoding_t encoding)     /* I - Encoding */
1306 {
1307   char          *start = dest;  /* Start of destination string */
1308   cups_vmap_t   *vmap;          /* Legacy DBCS / Unicode Charset Map */
1309   cups_vbcs_t   *vrow;          /* Pointer to VBCS row in 'uni2char' */
1310   cups_utf32_t  unichar;        /* Character value */
1311   cups_vbcs_t   legchar;        /* Legacy character value */
1312   int           worklen;        /* Internal UCS-4 string length */
1313   cups_utf32_t  work[CUPS_MAX_USTRING];
1314                                 /* Internal UCS-4 string */
1315   int           i;              /* Looping variable */
1316
1317  /*
1318   * Check for valid arguments and clear output...
1319   */
1320   if ((dest == NULL)
1321   || (src == NULL)
1322   || (maxout < 1)
1323   || (maxout > CUPS_MAX_USTRING)
1324   || (encoding == CUPS_UTF8))
1325     return (-1);
1326   *dest = '\0';
1327
1328  /*
1329   * Find legacy charset map in cache...
1330   */
1331   vmap = (cups_vmap_t *) cupsCharmapGet(encoding);
1332   if (vmap == NULL)
1333     return (-1);
1334
1335  /*
1336   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1337   */
1338   worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1339   if (worklen < 0)
1340     return (-1);
1341
1342  /*
1343   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1344   */
1345   for (i = 0; i < worklen;)
1346   {
1347     unichar = work[i];
1348     if (unichar == 0)
1349       break;
1350     i ++;
1351
1352    /*
1353     * Check for leading BOM (and delete from output)...
1354     */
1355     if ((i == 1) && (unichar == 0xfeff))
1356       continue;
1357
1358    /*
1359     * Convert ASCII verbatim (optimization)...
1360     */
1361     if (unichar <= 0x7f)
1362     {
1363       *dest = (char) unichar;
1364       dest ++;
1365       continue;
1366     }
1367
1368    /*
1369     * Convert unknown character to visible replacement...
1370     */
1371     vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1372     if (vrow)
1373       vrow += (int) (unichar & 0xff);
1374     if ((vrow == NULL) || (*vrow == 0))
1375       legchar = (cups_vbcs_t) '?';
1376     else
1377       legchar = (cups_vbcs_t) *vrow;
1378
1379    /*
1380     * Save n-byte legacy character...
1381     */
1382     if (legchar > 0xffffff)
1383     {
1384       *dest = (char) ((legchar >> 24) & 0xff);
1385       dest++;
1386     }
1387     if (legchar > 0xffff)
1388     {
1389       *dest = (char) ((legchar >> 16) & 0xff);
1390       dest++;
1391     }
1392     if (legchar > 0xff)
1393     {
1394       *dest = (char) ((legchar >> 8) & 0xff);
1395       dest++;
1396     }
1397     *dest = (char) (legchar & 0xff);
1398     dest ++;
1399   }
1400   *dest = '\0';
1401   worklen = (int) (dest - start);
1402   cupsCharmapFree(encoding);
1403   return (worklen);
1404 }
1405
1406 /*
1407  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1408  */
1409 static int                              /* O - Count or -1 on error */
1410 conv_sbcs_to_utf8(cups_utf8_t *dest,    /* O - Target string */
1411     const char *src,                    /* I - Source string */
1412     const int maxout,                   /* I - Max output */
1413     const cups_encoding_t encoding)     /* I - Encoding */
1414 {
1415   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
1416   cups_ucs2_t   *crow;          /* Pointer to UCS-2 row in 'char2uni' */
1417   unsigned long legchar;        /* Legacy character value */
1418   cups_utf32_t  unichar;        /* Unicode character value */
1419   int           worklen;        /* Internal UCS-4 string length */
1420   cups_utf32_t  work[CUPS_MAX_USTRING];
1421                                 /* Internal UCS-4 string */
1422   int           i;              /* Looping variable */
1423
1424  /*
1425   * Check for valid arguments and clear output...
1426   */
1427   if ((dest == NULL)
1428   || (src == NULL)
1429   || (maxout < 1)
1430   || (maxout > CUPS_MAX_USTRING)
1431   || (encoding == CUPS_UTF8))
1432     return (-1);
1433   *dest = '\0';
1434
1435  /*
1436   * Find legacy charset map in cache...
1437   */
1438   cmap = (cups_cmap_t *) cupsCharmapGet(encoding);
1439   if (cmap == NULL)
1440     return (-1);
1441
1442  /*
1443   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1444   */
1445   work[0] = 0xfeff;
1446   for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1447   {
1448     if (*src == '\0')
1449       break;
1450     legchar = (unsigned long) *src;
1451
1452    /*
1453     * Convert ASCII verbatim (optimization)...
1454     */
1455     if (legchar <= 0x7f)
1456     {
1457       work[i] = (cups_utf32_t) legchar;
1458       i ++;
1459       continue;
1460     }
1461
1462    /*
1463     * Convert unknown character to Replacement Character...
1464     */
1465     crow = &cmap->char2uni[0];
1466     crow += (int) legchar;
1467     if (*crow == 0)
1468       unichar = 0xfffd;
1469     else
1470       unichar = (cups_utf32_t) *crow;
1471     work[i] = unichar;
1472     i ++;
1473   }
1474   work[i] = 0;
1475
1476  /*
1477   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1478   */
1479   worklen = cupsUTF32ToUTF8(dest, work, maxout);
1480   cupsCharmapFree(encoding);
1481   return (worklen);
1482 }
1483
1484
1485 /*
1486  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1487  */
1488 static int                              /* O - Count or -1 on error */
1489 conv_vbcs_to_utf8(cups_utf8_t *dest,    /* O - Target string */
1490     const char *src,                    /* I - Source string */
1491     const int maxout,                   /* I - Max output */
1492     const cups_encoding_t encoding)     /* I - Encoding */
1493 {
1494   cups_vmap_t       *vmap;      /* Legacy VBCS / Unicode Charset Map */
1495   cups_ucs2_t       *crow;      /* Pointer to UCS-2 row in 'char2uni' */
1496   cups_wide2uni_t   *wide2uni;  /* Pointer to row in 'wide2uni' */
1497   cups_sbcs_t       leadchar;   /* Lead char of n-byte legacy char */
1498   cups_vbcs_t       legchar;    /* Legacy character value */
1499   cups_utf32_t      unichar;    /* Unicode character value */
1500   int               i;          /* Looping variable */
1501   int               worklen;    /* Internal UCS-4 string length */
1502   cups_utf32_t      work[CUPS_MAX_USTRING];
1503                                 /* Internal UCS-4 string */
1504
1505  /*
1506   * Check for valid arguments and clear output...
1507   */
1508   if ((dest == NULL)
1509   || (src == NULL)
1510   || (maxout < 1)
1511   || (maxout > CUPS_MAX_USTRING)
1512   || (encoding == CUPS_UTF8))
1513     return (-1);
1514   *dest = '\0';
1515
1516  /*
1517   * Find legacy charset map in cache...
1518   */
1519   vmap = (cups_vmap_t *) cupsCharmapGet(encoding);
1520   if (vmap == NULL)
1521     return (-1);
1522
1523  /*
1524   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1525   */
1526   work[0] = 0xfeff;
1527   for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1528   {
1529     if (*src == '\0')
1530       break;
1531     legchar = (cups_vbcs_t) *src;
1532     leadchar = (cups_sbcs_t) *src;
1533
1534    /*
1535     * Convert ASCII verbatim (optimization)...
1536     */
1537     if (legchar <= 0x7f)
1538     {
1539       work[i] = (cups_utf32_t) legchar;
1540       i ++;
1541       continue;
1542     }
1543
1544    /*
1545     * Convert 2-byte legacy character...
1546     */
1547     if (vmap->lead2char[(int) leadchar] == leadchar)
1548     {
1549       src ++;
1550       if (*src == '\0')
1551         return (-1);
1552       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1553
1554      /*
1555       * Convert unknown character to Replacement Character...
1556       */
1557       crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)];
1558       if (crow)
1559         crow += (int) (legchar & 0xff);
1560       if ((crow == NULL) || (*crow == 0))
1561         unichar = 0xfffd;
1562       else
1563         unichar = (cups_utf32_t) *crow;
1564       work[i] = unichar;
1565       i ++;
1566       continue;
1567     }
1568
1569    /*
1570     * Fetch 3-byte or 4-byte legacy character...
1571     */
1572     if (vmap->lead3char[(int) leadchar] == leadchar)
1573     {
1574       src ++;
1575       if (*src == '\0')
1576         return (-1);
1577       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1578       src ++;
1579       if (*src == '\0')
1580         return (-1);
1581       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1582     }
1583     else if (vmap->lead4char[(int) leadchar] == leadchar)
1584     {
1585       src ++;
1586       if (*src == '\0')
1587         return (-1);
1588       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1589       src ++;
1590       if (*src == '\0')
1591         return (-1);
1592       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1593       src ++;
1594       if (*src == '\0')
1595         return (-1);
1596       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1597     }
1598     else
1599       return (-1);
1600
1601    /*
1602     * Find 3-byte or 4-byte legacy character...
1603     */
1604     wide2uni = vmap->wide2uni;
1605     wide2uni = (cups_wide2uni_t *) bsearch(&legchar,
1606                                            vmap->wide2uni,
1607                                            vmap->widecount,
1608                                            sizeof(cups_wide2uni_t),
1609                                            compare_wide);
1610
1611    /*
1612     * Convert unknown character to Replacement Character...
1613     */
1614     if ((wide2uni == NULL) || (wide2uni->unichar == 0))
1615       unichar = 0xfffd;
1616     else
1617       unichar = wide2uni->unichar;
1618     work[i] = unichar;
1619     i ++;
1620   }
1621   work[i] = 0;
1622
1623  /*
1624   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1625   */
1626   worklen = cupsUTF32ToUTF8(dest, work, maxout);
1627   cupsCharmapFree(encoding);
1628   return (worklen);
1629 }
1630
1631 /*
1632  * 'compare_wide()' - Compare key for wide (VBCS) match.
1633  */
1634 static int
1635 compare_wide(const void *k1,            /* I - Key char */
1636     const void *k2)                     /* I - Map char */
1637 {
1638   cups_vbcs_t       *kp = (cups_vbcs_t *) k1;
1639                                 /* Key char pointer */
1640   cups_wide2uni_t   *mp = (cups_wide2uni_t *) k2;
1641                                 /* Map char pointer */
1642   cups_vbcs_t       key;        /* Legacy key character */
1643   cups_vbcs_t       map;        /* Legacy map character */
1644   int               result;     /* Result Value */
1645
1646   key = *kp;
1647   map = mp->widechar;
1648   if (key >= map)
1649     result = (int) (key - map);
1650   else
1651     result = -1 * ((int) (map - key));
1652   return (result);
1653 }
1654
1655 /*
1656  * End of "$Id$"
1657  */