cups/transcode.c

   1 /*
   2  * "$Id$"
   3  *
   4  *   Transcoding support for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 1997-2005 by Easy Software Products.
   7  *
   8  *   These coded instructions, statements, and computer programs are
   9  *   the property of Easy Software Products and are protected by Federal
  10  *   copyright law.  Distribution and use rights are outlined in the
  11  *   file "LICENSE.txt" which should have been included with this file.
  12  *   If this file is missing or damaged please contact Easy Software
  13  *   Products at:
  14  *
  15  *       Attn: CUPS Licensing Information
  16  *       Easy Software Products
  17  *       44141 Airport View Drive, Suite 204
  18  *       Hollywood, Maryland 20636 USA
  19  *
  20  *       Voice: (301) 373-9600
  21  *       EMail: cups-info@cups.org
  22  *         WWW: http://www.cups.org
  23  *
  24  * Contents:
  25  *
  26  *   cupsCharmapGet()    - Get a character set map.
  27  *   cupsCharmapFree()   - Free a character set map.
  28  *   cupsCharmapFlush()  - Flush all character set maps out of cache.
  29  *   cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
  30  *   cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
  31  *   cupsUTF8ToUTF16()   - Convert UTF-8 to UTF-16.
  32  *   cupsUTF16ToUTF8()   - Convert UTF-16 to UTF-8.
  33  *   cupsUTF8ToUTF32()   - Convert UTF-8 to UTF-32.
  34  *   cupsUTF32ToUTF8()   - Convert UTF-32 to UTF-8.
  35  *   cupsUTF16ToUTF32()  - Convert UTF-16 to UTF-32.
  36  *   cupsUTF32ToUTF16()  - Convert UTF-32 to UTF-16.
  37  *   get_charmap_count() - Count lines in a charmap file.
  38  *   get_sbcs_charmap()  - Get SBCS Charmap.
  39  *   get_vbcs_charmap()  - Get DBCS/VBCS Charmap.
  40  *   conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
  41  *   conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
  42  *   conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
  43  *   conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
  44  *   compare_wide()      - Compare key for wide (VBCS) match.
  45  */
  46
  47 /*
  48  * Include necessary headers...
  49  */
  50
  51 #include "globals.h"
  52 #include <stdlib.h>
  53 #include <errno.h>
  54 #include <time.h>
  55
  56
  57 /*
  58  * Prototypes...
  59  */
  60
  61 static int get_charmap_count(const char *filename);
  62 static cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
  63                                      const char *filename);
  64 static cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
  65                                      const char *filename);
  66
  67 static int conv_utf8_to_sbcs(char *dest,
  68                              const cups_utf8_t *src,
  69                              const int maxout,
  70                              const cups_encoding_t encoding);
  71 static int conv_utf8_to_vbcs(char *dest,
  72                              const cups_utf8_t *src,
  73                              const int maxout,
  74                              const cups_encoding_t encoding);
  75
  76 static int conv_sbcs_to_utf8(cups_utf8_t *dest,
  77                              const char *src,
  78                              const int maxout,
  79                              const cups_encoding_t encoding);
  80 static int conv_vbcs_to_utf8(cups_utf8_t *dest,
  81                              const char *src,
  82                              const int maxout,
  83                              const cups_encoding_t encoding);
  84
  85 static int compare_wide(const void *k1, const void *k2);
  86
  87 /*
  88  * 'cupsCharmapGet()' - Get a character set map.
  89  *
  90  * This code handles single-byte (SBCS), double-byte (DBCS), and
  91  * variable-byte (VBCS) character sets _without_ charset escapes...
  92  * This code does not handle multiple-byte character sets (MBCS)
  93  * (such as ISO-2022-JP) with charset switching via escapes...
  94  */
  95 void *                                  /* O - Charset map pointer */
  96 cupsCharmapGet(const cups_encoding_t encoding)
  97                                         /* I - Encoding */
  98 {
  99   char          *datadir;       /* CUPS_DATADIR environment variable */
 100   char          mapname[80];    /* Name of charset map */
 101   char          filename[1024];  /* Filename for charset map file */
 102
 103  /*
 104   * Check for valid arguments...
 105   */
 106   if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END))
 107     return (NULL);
 108
 109  /*
 110   * Get the data directory and charset map name...
 111   */
 112   if ((datadir = getenv("CUPS_DATADIR")) == NULL)
 113     datadir = CUPS_DATADIR;
 114   snprintf(mapname, sizeof(mapname), "%s.txt", cupsEncodingName(encoding));
 115   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
 116            datadir, mapname);
 117
 118  /*
 119   * Read charset map input file into cache...
 120   */
 121   if (encoding < CUPS_ENCODING_SBCS_END)
 122     return (get_sbcs_charmap(encoding, filename));
 123   else if (encoding < CUPS_ENCODING_VBCS_END)
 124     return (get_vbcs_charmap(encoding, filename));
 125   else
 126     return (NULL);
 127 }
 128
 129 /*
 130  * 'cupsCharmapFree()' - Free a character set map.
 131  *
 132  * This does not actually free; use 'cupsCharmapFlush()' for that.
 133  */
 134 void
 135 cupsCharmapFree(const cups_encoding_t encoding)
 136                                         /* I - Encoding */
 137 {
 138   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
 139   cups_vmap_t   *vmap;          /* Legacy VBCS / Unicode Charset Map */
 140   cups_globals_t *cg = _cupsGlobals();
 141                                 /* Pointer to library globals */
 142
 143  /*
 144   * See if we already have this SBCS charset map loaded...
 145   */
 146   for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
 147   {
 148     if (cmap->encoding == encoding)
 149     {
 150       if (cmap->used > 0)
 151         cmap->used --;
 152       return;
 153     }
 154   }
 155
 156  /*
 157   * See if we already have this DBCS/VBCS charset map loaded...
 158   */
 159   for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
 160   {
 161     if (vmap->encoding == encoding)
 162     {
 163       if (vmap->used > 0)
 164         vmap->used --;
 165       return;
 166     }
 167   }
 168   return;
 169 }
 170
 171 /*
 172  * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
 173  */
 174 void
 175 cupsCharmapFlush(void)
 176 {
 177   int           i;              /* Looping variable */
 178   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
 179   cups_vmap_t   *vmap;          /* Legacy VBCS / Unicode Charset Map */
 180   cups_cmap_t   *cnext;         /* Next Legacy SBCS Charset Map */
 181   cups_vmap_t   *vnext;         /* Next Legacy VBCS Charset Map */
 182   cups_ucs2_t   *crow;          /* Pointer to UCS-2 row in 'char2uni' */
 183   cups_sbcs_t   *srow;          /* Pointer to SBCS row in 'uni2char' */
 184   cups_vbcs_t   *vrow;          /* Pointer to VBCS row in 'uni2char' */
 185   cups_globals_t *cg = _cupsGlobals();
 186                                 /* Pointer to library globals */
 187
 188  /*
 189   * Loop through SBCS charset map cache, free all memory...
 190   */
 191   for (cmap = cg->cmap_cache; cmap != NULL; cmap = cnext)
 192   {
 193     for (i = 0; i < 256; i ++)
 194     {
 195       if ((srow = cmap->uni2char[i]) != NULL)
 196         free(srow);
 197     }
 198     cnext = cmap->next;
 199     free(cmap);
 200   }
 201   cg->cmap_cache = NULL;
 202
 203  /*
 204   * Loop through DBCS/VBCS charset map cache, free all memory...
 205   */
 206   for (vmap = cg->vmap_cache; vmap != NULL; vmap = vnext)
 207   {
 208     for (i = 0; i < 256; i ++)
 209     {
 210       if ((crow = vmap->char2uni[i]) != NULL)
 211         free(crow);
 212     }
 213     for (i = 0; i < 256; i ++)
 214     {
 215       if ((vrow = vmap->uni2char[i]) != NULL)
 216         free(vrow);
 217     }
 218     if (vmap->wide2uni)
 219       free(vmap->wide2uni);
 220     vnext = vmap->next;
 221     free(vmap);
 222   }
 223   cg->vmap_cache = NULL;
 224   return;
 225 }
 226
 227 /*
 228  * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
 229  *
 230  * This code handles single-byte (SBCS), double-byte (DBCS), and
 231  * variable-byte (VBCS) character sets _without_ charset escapes...
 232  * This code does not handle multiple-byte character sets (MBCS)
 233  * (such as ISO-2022-JP) with charset switching via escapes...
 234  */
 235 int                                     /* O - Count or -1 on error */
 236 cupsUTF8ToCharset(char *dest,           /* O - Target string */
 237     const cups_utf8_t *src,             /* I - Source string */
 238     const int maxout,                   /* I - Max output */
 239     const cups_encoding_t encoding)     /* I - Encoding */
 240 {
 241  /*
 242   * Check for valid arguments...
 243   */
 244   if ((dest == NULL)
 245   || (src == NULL)
 246   || (maxout < 1)
 247   || (maxout > CUPS_MAX_USTRING)
 248   || (encoding < 0)
 249   || (encoding == CUPS_UTF8)
 250   || (encoding >= CUPS_ENCODING_VBCS_END))
 251     return (-1);
 252
 253  /*
 254   * Convert input UTF-8 to legacy charset...
 255   */
 256   if (encoding < CUPS_ENCODING_SBCS_END)
 257     return (conv_utf8_to_sbcs(dest, src, maxout, encoding));
 258   else if (encoding < CUPS_ENCODING_VBCS_END)
 259     return (conv_utf8_to_vbcs(dest, src, maxout, encoding));
 260   else
 261     return (-1);
 262 }
 263
 264 /*
 265  * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
 266  *
 267  * This code handles single-byte (SBCS), double-byte (DBCS), and
 268  * variable-byte (VBCS) character sets _without_ charset escapes...
 269  * This code does not handle multiple-byte character sets (MBCS)
 270  * (such as ISO-2022-JP) with charset switching via escapes...
 271  */
 272 int                                     /* O - Count or -1 on error */
 273 cupsCharsetToUTF8(cups_utf8_t *dest,    /* O - Target string */
 274     const char *src,                    /* I - Source string */
 275     const int maxout,                   /* I - Max output */
 276     const cups_encoding_t encoding)     /* I - Encoding */
 277 {
 278  /*
 279   * Check for valid arguments...
 280   */
 281   if ((dest == NULL)
 282   || (src == NULL)
 283   || (maxout < 1)
 284   || (maxout > CUPS_MAX_USTRING)
 285   || (encoding < 0)
 286   || (encoding == CUPS_UTF8)
 287   || (encoding >= CUPS_ENCODING_VBCS_END))
 288     return (-1);
 289
 290  /*
 291   * Convert input legacy charset to UTF-8...
 292   */
 293   if (encoding < CUPS_ENCODING_SBCS_END)
 294     return (conv_sbcs_to_utf8(dest, src, maxout, encoding));
 295   else if (encoding < CUPS_ENCODING_VBCS_END)
 296     return (conv_vbcs_to_utf8(dest, src, maxout, encoding));
 297   else
 298     return (-1);
 299 }
 300
 301 /*
 302  * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
 303  *
 304  * This code does not support Unicode beyond 16-bits (Plane 0)...
 305  */
 306 int                                     /* O - Count or -1 on error */
 307 cupsUTF8ToUTF16(cups_utf16_t *dest,     /* O - Target string */
 308     const cups_utf8_t *src,             /* I - Source string */
 309     const int maxout)                   /* I - Max output */
 310 {
 311   int           worklen;        /* Internal UCS-4 string length */
 312   cups_utf32_t  work[CUPS_MAX_USTRING];
 313                                 /* Internal UCS-4 string */
 314
 315  /*
 316   * Check for valid arguments and clear output...
 317   */
 318   if ((dest == NULL)
 319   || (src == NULL)
 320   || (maxout < 1)
 321   || (maxout > CUPS_MAX_USTRING))
 322     return (-1);
 323   *dest = 0;
 324
 325  /*
 326   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 327   */
 328   worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
 329   if (worklen < 0)
 330     return (-1);
 331
 332  /*
 333   * Convert internal UCS-4 to output UTF-16...
 334   */
 335   worklen = cupsUTF32ToUTF16(dest, work, maxout);
 336   return (worklen);
 337 }
 338
 339 /*
 340  * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
 341  *
 342  * This code does not support Unicode beyond 16-bits (Plane 0)...
 343  */
 344 int                                     /* O - Count or -1 on error */
 345 cupsUTF16ToUTF8(cups_utf8_t *dest,      /* O - Target string */
 346     const cups_utf16_t *src,            /* I - Source string */
 347     const int maxout)                   /* I - Max output */
 348 {
 349   int           worklen;        /* Internal UCS-4 string length */
 350   cups_utf32_t  work[CUPS_MAX_USTRING];
 351                                 /* Internal UCS-4 string */
 352
 353  /*
 354   * Check for valid arguments and clear output...
 355   */
 356   if ((dest == NULL)
 357   || (src == NULL)
 358   || (maxout < 1)
 359   || (maxout > CUPS_MAX_USTRING))
 360     return (-1);
 361   *dest = 0;
 362
 363  /*
 364   * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
 365   */
 366   worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING);
 367   if (worklen < 0)
 368     return (-1);
 369
 370  /*
 371   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 372   */
 373   worklen = cupsUTF32ToUTF8(dest, work, maxout);
 374   return (worklen);
 375 }
 376
 377 /*
 378  * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
 379  *
 380  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 381  *
 382  *   UTF-32 char     UTF-8 char(s)
 383  *   --------------------------------------------------
 384  *        0 to 127 = 0xxxxxxx (US-ASCII)
 385  *     128 to 2047 = 110xxxxx 10yyyyyy
 386  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 387  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 388  *
 389  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 390  * which would convert to five- or six-octet UTF-8 sequences...
 391  *
 392  * This code does not support Unicode beyond 16-bits (Plane 0)...
 393  */
 394 int                                     /* O - Count or -1 on error */
 395 cupsUTF8ToUTF32(cups_utf32_t *dest,     /* O - Target string */
 396     const cups_utf8_t *src,             /* I - Source string */
 397     const int maxout)                   /* I - Max output */
 398 {
 399   cups_utf8_t   *first = (cups_utf8_t *) src;
 400   int           srclen;         /* Source string length */
 401   int           i;              /* Looping variable */
 402   cups_utf32_t  ch;             /* Character value */
 403   cups_utf32_t  next;           /* Next character value */
 404   cups_utf32_t  ch32;           /* UTF-32 character value */
 405
 406  /*
 407   * Check for valid arguments and clear output...
 408   */
 409   if ((dest == NULL)
 410   || (src == NULL)
 411   || (maxout < 1)
 412   || (maxout > CUPS_MAX_USTRING))
 413     return (-1);
 414   *dest = 0;
 415
 416  /*
 417   * Convert input UTF-8 to output UTF-32 (and insert BOM)...
 418   */
 419   *dest = 0xfeff;
 420   dest ++;
 421   srclen = strlen((char *) src);
 422   for (i = 1; i < (maxout - 1); src ++, dest ++)
 423   {
 424     ch = (cups_utf32_t) *src;
 425     ch &= 0xff;
 426     if (ch == 0)
 427       break;
 428     i ++;
 429
 430    /*
 431     * Convert UTF-8 character(s) to UTF-32 character...
 432     */
 433     if ((ch & 0x7f) == ch)
 434     {
 435      /*
 436       * One-octet UTF-8 <= 127 (US-ASCII)...
 437       */
 438       *dest = ch;
 439     }
 440     else if ((ch & 0xe0) == 0xc0)
 441     {
 442      /*
 443       * Two-octet UTF-8 <= 2047 (Latin-x)...
 444       */
 445       src ++;
 446       next = (cups_utf32_t) *src;
 447       next &= 0xff;
 448       if (next == 0)
 449         return (-1);
 450       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 451
 452      /*
 453       * Check for non-shortest form (invalid UTF-8)...
 454       */
 455       if (ch32 <= 127)
 456         return (-1);
 457       *dest = ch32;
 458     }
 459     else if ((ch & 0xf0) == 0xe0)
 460     {
 461      /*
 462       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 463       */
 464       src ++;
 465       next = (cups_utf32_t) *src;
 466       next &= 0xff;
 467       if (next == 0)
 468         return (-1);
 469       ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
 470       src ++;
 471       next = (cups_utf32_t) *src;
 472       next &= 0xff;
 473       if (next == 0)
 474         return (-1);
 475       ch32 = ((ch32 << 6) | (next & 0x3f));
 476
 477      /*
 478       * Check for non-shortest form (invalid UTF-8)...
 479       */
 480       if (ch32 <= 2047)
 481         return (-1);
 482       *dest = ch32;
 483     }
 484     else if ((ch & 0xf8) == 0xf0)
 485     {
 486      /*
 487       * Four-octet UTF-8 to Replacement Character...
 488       */
 489       if (((src - first) + 3) >= srclen)
 490         return (-1);
 491       src += 3;
 492       *dest = 0xfffd;
 493     }
 494     else if ((ch & 0xfc) == 0xf8)
 495     {
 496      /*
 497       * Five-octet UTF-8 (invalid strict UTF-32)...
 498       */
 499       return (-1);
 500     }
 501     else if ((ch & 0xfe) == 0xfc)
 502     {
 503      /*
 504       * Six-octet UTF-8 (invalid strict UTF-32)...
 505       */
 506       return (-1);
 507     }
 508     else
 509     {
 510      /*
 511       * More than six-octet (invalid UTF-8 sequence)...
 512       */
 513       return (-1);
 514     }
 515
 516    /*
 517     * Check for UTF-16 surrogate (illegal UTF-8)...
 518     */
 519     if ((*dest >= 0xd800) && (*dest <= 0xdfff))
 520       return (-1);
 521
 522    /*
 523     * Check for beyond Plane 16 (invalid UTF-8)...
 524     */
 525     if (*dest > 0x10ffff)
 526       return (-1);
 527   }
 528   *dest = 0;
 529   return (i);
 530 }
 531
 532 /*
 533  * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
 534  *
 535  * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
 536  *
 537  *   UTF-32 char     UTF-8 char(s)
 538  *   --------------------------------------------------
 539  *        0 to 127 = 0xxxxxxx (US-ASCII)
 540  *     128 to 2047 = 110xxxxx 10yyyyyy
 541  *   2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
 542  *         > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
 543  *
 544  * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
 545  * which would convert to five- or six-octet UTF-8 sequences...
 546  *
 547  * This code does not support Unicode beyond 16-bits (Plane 0)...
 548  */
 549 int                                     /* O - Count or -1 on error */
 550 cupsUTF32ToUTF8(cups_utf8_t *dest,      /* O - Target string */
 551     const cups_utf32_t *src,            /* I - Source string */
 552     const int maxout)                   /* I - Max output */
 553 {
 554   cups_utf32_t  *first = (cups_utf32_t *) src;
 555                                 /* First source char */
 556   cups_utf8_t   *start = dest;  /* Start of destination string */
 557   int           i;              /* Looping variable */
 558   int           swap = 0;       /* Byte-swap input to output */
 559   cups_utf32_t  ch;             /* Character value */
 560
 561  /*
 562   * Check for valid arguments and clear output...
 563   */
 564   if ((dest == NULL)
 565   || (src == NULL)
 566   || (maxout < 1))
 567     return (-1);
 568   *dest = '\0';
 569
 570  /*
 571   * Check for leading BOM in UTF-32 and inverted BOM...
 572   */
 573   if (*src == 0xfffe0000)
 574     swap = 1;
 575
 576  /*
 577   * Convert input UTF-32 to output UTF-8...
 578   */
 579   for (i = 0; i < (maxout - 1); src ++)
 580   {
 581     ch = *src;
 582     if (ch == 0)
 583       break;
 584
 585    /*
 586     * Byte swap input UTF-32, if necessary...
 587     */
 588     if (swap)
 589       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 590
 591    /*
 592     * Check for leading BOM (and delete from output)...
 593     */
 594     if ((src == first) && (ch == 0xfeff))
 595       continue;
 596
 597    /*
 598     * Check for beyond Plane 16 (invalid UTF-32)...
 599     */
 600     if (ch > 0x10ffff)
 601       return (-1);
 602
 603    /*
 604     * Convert beyond Plane 0 (BMP) to Replacement Character...
 605     */
 606     if (ch > 0xffff)
 607       ch = 0xfffd;
 608
 609    /*
 610     * Convert UTF-32 character to UTF-8 character(s)...
 611     */
 612     if (ch <= 0x7f)
 613     {
 614      /*
 615       * One-octet UTF-8 <= 127 (US-ASCII)...
 616       */
 617       *dest = (cups_utf8_t) ch;
 618       dest ++;
 619       i ++;
 620     }
 621     else if (ch <= 0x7ff)
 622     {
 623      /*
 624       * Two-octet UTF-8 <= 2047 (Latin-x)...
 625       */
 626       if (i > (maxout - 2))
 627         break;
 628       *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f));
 629       dest ++;
 630       i ++;
 631       *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
 632       dest ++;
 633       i ++;
 634     }
 635     else
 636     {
 637      /*
 638       * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
 639       */
 640       if (i > (maxout - 3))
 641         break;
 642       *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f));
 643       dest ++;
 644       i ++;
 645       *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f));
 646       dest ++;
 647       i ++;
 648       *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
 649       dest ++;
 650       i ++;
 651     }
 652   }
 653   *dest = '\0';
 654   i = (int) (dest - start);
 655   return (i);
 656 }
 657
 658 /*
 659  * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
 660  *
 661  * This code does not support Unicode beyond 16-bits (Plane 0)...
 662  */
 663 int                                     /* O - Count or -1 on error */
 664 cupsUTF16ToUTF32(cups_utf32_t *dest,    /* O - Target string */
 665     const cups_utf16_t *src,            /* I - Source string */
 666     const int maxout)                   /* I - Max output */
 667 {
 668   int           i;              /* Looping variable */
 669   int           swap = 0;       /* Byte-swap input to output */
 670   int           surrogate = 0;  /* Expecting low-half surrogate */
 671   cups_utf32_t  ch;             /* Character value */
 672
 673  /*
 674   * Check for valid arguments and clear output...
 675   */
 676   if ((dest == NULL)
 677   || (src == NULL)
 678   || (maxout < 1)
 679   || (maxout > CUPS_MAX_USTRING))
 680     return (-1);
 681   *dest = 0;
 682
 683  /*
 684   * Check for leading BOM in UTF-16 and inverted BOM...
 685   */
 686   if (*src == 0xfffe)
 687     swap = 1;
 688
 689  /*
 690   * Convert input UTF-16 to output UTF-32...
 691   */
 692   for (i = 0; i < (maxout - 1); src ++)
 693   {
 694     ch = (cups_utf32_t) (*src & 0xffff);
 695     if (ch == 0)
 696       break;
 697     i ++;
 698
 699    /*
 700     * Byte swap input UTF-16, if necessary...
 701     */
 702     if (swap)
 703       ch = (cups_utf32_t) ((ch << 8) | (ch >> 8));
 704
 705    /*
 706     * Discard expected UTF-16 low-half surrogate...
 707     */
 708     if ((ch >= 0xdc00) && (ch <= 0xdfff))
 709     {
 710       if (surrogate == 0)
 711         return (-1);
 712       surrogate = 0;
 713       continue;
 714     }
 715
 716    /*
 717     * Convert UTF-16 high-half surrogate to Replacement Character...
 718     */
 719     if ((ch >= 0xd800) && (ch <= 0xdbff))
 720     {
 721       if (surrogate == 1)
 722         return (-1);
 723       surrogate = 1;
 724       ch = 0xfffd;
 725     }
 726     *dest = ch;
 727     dest ++;
 728   }
 729   *dest = 0;
 730   return (i);
 731 }
 732
 733 /*
 734  * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
 735  *
 736  * This code does not support Unicode beyond 16-bits (Plane 0)...
 737  */
 738 int                                     /* O - Count or -1 on error */
 739 cupsUTF32ToUTF16(cups_utf16_t *dest,    /* O - Target string */
 740     const cups_utf32_t *src,            /* I - Source string */
 741     const int maxout)                   /* I - Max output */
 742 {
 743   int           i;              /* Looping variable */
 744   int           swap = 0;       /* Byte-swap input to output */
 745   cups_utf32_t  ch;             /* Character value */
 746
 747  /*
 748   * Check for valid arguments and clear output...
 749   */
 750   if ((dest == NULL)
 751   || (src == NULL)
 752   || (maxout < 1)
 753   || (maxout > CUPS_MAX_USTRING))
 754     return (-1);
 755   *dest = 0;
 756
 757  /*
 758   * Check for leading BOM in UTF-32 and inverted BOM...
 759   */
 760   if (*src == 0xfffe0000)
 761     swap = 1;
 762
 763  /*
 764   * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
 765   */
 766   for (i = 0; i < (maxout - 1); src ++, dest ++)
 767   {
 768     ch = *src;
 769     if (ch == 0)
 770       break;
 771     i ++;
 772
 773    /*
 774     * Byte swap input UTF-32, if necessary...
 775     */
 776     if (swap)
 777       ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
 778
 779    /*
 780     * Check for UTF-16 surrogate (illegal UTF-32)...
 781     */
 782     if ((ch >= 0xd800) && (ch <= 0xdfff))
 783       return (-1);
 784
 785    /*
 786     * Check for beyond Plane 16 (invalid UTF-32)...
 787     */
 788     if (ch > 0x10ffff)
 789       return (-1);
 790
 791    /*
 792     * Convert beyond Plane 0 (BMP) to Replacement Character...
 793     */
 794     if (ch > 0xffff)
 795       ch = 0xfffd;
 796     *dest = (cups_utf16_t) ch;
 797   }
 798   *dest = 0;
 799   return (i);
 800 }
 801
 802 /*
 803  * 'get_charmap_count()' - Count lines in a charmap file.
 804  */
 805 static int                              /* O - Count or -1 on error */
 806 get_charmap_count(const char *filename) /* I - Charmap Filename */
 807 {
 808   int           i;              /* Looping variable */
 809   cups_file_t   *fp;            /* Map input file pointer */
 810   char          *s;             /* Line parsing pointer */
 811   char          line[256];      /* Line from input map file */
 812   cups_utf32_t  unichar;        /* Unicode character value */
 813
 814  /*
 815   * Open map input file...
 816   */
 817   if ((filename == NULL) || (*filename == '\0'))
 818     return (-1);
 819   fp = cupsFileOpen(filename, "r");
 820   if (fp == NULL)
 821     return (-1);
 822
 823  /*
 824   * Count lines in map input file...
 825   */
 826   for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
 827   {
 828     s = cupsFileGets(fp, line, sizeof(line));
 829     if (s == NULL)
 830       break;
 831     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
 832       continue;
 833     while ((*s != 0) && (*s != ' ') && (*s != '\t'))
 834         s ++;
 835     while ((*s == ' ') || (*s == '\t'))
 836         s ++;
 837     if (strncmp (s, "0x", 2) == 0)
 838       s += 2;
 839     if ((sscanf(s, "%lx", &unichar) != 1)
 840     || (unichar > 0xffff))
 841     {
 842       cupsFileClose(fp);
 843       return (-1);
 844     }
 845     i ++;
 846   }
 847   if (i == 0)
 848     i = -1;
 849
 850  /*
 851   * Close file and return charmap count (non-comment line count)...
 852   */
 853   cupsFileClose(fp);
 854   return (i);
 855 }
 856
 857 /*
 858  * 'get_sbcs_charmap()' - Get SBCS Charmap.
 859  */
 860 static cups_cmap_t *                    /* O - Charmap or 0 on error */
 861 get_sbcs_charmap(const cups_encoding_t encoding,
 862                                         /* I - Charmap Encoding */
 863                  const char *filename)  /* I - Charmap Filename */
 864 {
 865   int           i;              /* Loop variable */
 866   unsigned long legchar;        /* Legacy character value */
 867   cups_utf32_t  unichar;        /* Unicode character value */
 868   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
 869   cups_file_t   *fp;            /* Charset map file pointer */
 870   char          *s;             /* Line parsing pointer */
 871   cups_ucs2_t   *crow;          /* Pointer to UCS-2 row in 'char2uni' */
 872   cups_sbcs_t   *srow;          /* Pointer to SBCS row in 'uni2char' */
 873   char          line[256];      /* Line from charset map file */
 874   cups_globals_t *cg = _cupsGlobals();
 875                                 /* Pointer to library globals */
 876
 877  /*
 878   * Check for valid arguments...
 879   */
 880   if ((encoding < 0) || (filename == NULL))
 881     return (NULL);
 882
 883  /*
 884   * See if we already have this SBCS charset map loaded...
 885   */
 886   for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
 887   {
 888     if (cmap->encoding == encoding)
 889     {
 890       cmap->used ++;
 891       return ((void *) cmap);
 892     }
 893   }
 894
 895  /*
 896   * Open SBCS charset map input file...
 897   */
 898   fp = cupsFileOpen(filename, "r");
 899   if (fp == NULL)
 900     return (NULL);
 901
 902  /*
 903   * Allocate memory for SBCS charset map and add to cache...
 904   */
 905   cmap = (cups_cmap_t *) calloc(1, sizeof(cups_cmap_t));
 906   if (cmap == NULL)
 907   {
 908     cupsFileClose(fp);
 909     return (NULL);
 910   }
 911   cmap->next = cg->cmap_cache;
 912   cg->cmap_cache = cmap;
 913   cmap->used ++;
 914   cmap->encoding = encoding;
 915
 916  /*
 917   * Save SBCS charset map into memory for transcoding...
 918   */
 919   for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
 920   {
 921     s = cupsFileGets(fp, line, sizeof(line));
 922     if (s == NULL)
 923       break;
 924     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
 925       continue;
 926     if (strncmp (s, "0x", 2) == 0)
 927       s += 2;
 928     if ((sscanf(s, "%lx", &legchar) != 1)
 929     || (legchar > 0xff))
 930     {
 931       cupsFileClose(fp);
 932       cupsCharmapFlush();
 933       return (NULL);
 934     }
 935     while ((*s != 0) && (*s != ' ') && (*s != '\t'))
 936       s ++;
 937     while ((*s == ' ') || (*s == '\t'))
 938       s ++;
 939     if (strncmp (s, "0x", 2) == 0)
 940       s += 2;
 941     if (sscanf(s, "%lx", &unichar) != 1)
 942     {
 943       cupsFileClose(fp);
 944       cupsCharmapFlush();
 945       return (NULL);
 946     }
 947     i ++;
 948
 949    /*
 950     * Convert beyond Plane 0 (BMP) to Replacement Character...
 951     */
 952     if (unichar > 0xffff)
 953       unichar = 0xfffd;
 954
 955    /*
 956     * Save legacy to Unicode mapping in direct lookup table...
 957     */
 958     crow = &cmap->char2uni[(int) legchar];
 959     *crow = (cups_ucs2_t) (unichar & 0xffff);
 960
 961    /*
 962     * Save Unicode to legacy mapping in indirect lookup table...
 963     */
 964     srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
 965     if (srow == NULL)
 966     {
 967       srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t));
 968       if (srow == NULL)
 969       {
 970         cupsFileClose(fp);
 971         cupsCharmapFlush();
 972         return (NULL);
 973       }
 974       cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow;
 975     }
 976     srow += (int) (unichar & 0xff);
 977
 978    /*
 979     * Convert Replacement Character to visible replacement...
 980     */
 981     if (unichar == 0xfffd)
 982       legchar = (unsigned long) '?';
 983
 984    /*
 985     * First (oldest) legacy character uses Unicode mapping cell...
 986     */
 987     if (*srow == 0)
 988       *srow = (cups_sbcs_t) legchar;
 989   }
 990   cupsFileClose(fp);
 991   return (cmap);
 992 }
 993
 994 /*
 995  * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
 996  */
 997 static cups_vmap_t *                    /* O - Charmap or 0 on error */
 998 get_vbcs_charmap(const cups_encoding_t encoding,
 999                                         /* I - Charmap Encoding */
1000                  const char *filename)  /* I - Charmap Filename */
1001 {
1002   cups_vmap_t       *vmap;      /* Legacy VBCS / Unicode Charset Map */
1003   cups_ucs2_t       *crow;      /* Pointer to UCS-2 row in 'char2uni' */
1004   cups_vbcs_t       *vrow;      /* Pointer to VBCS row in 'uni2char' */
1005   cups_wide2uni_t   *wide2uni;  /* Pointer to row in 'wide2uni' */
1006   cups_sbcs_t       leadchar;   /* Lead char of 2-byte legacy char */
1007   unsigned long     legchar;    /* Legacy character value */
1008   cups_utf32_t      unichar;    /* Unicode character value */
1009   int               mapcount;   /* Count of lines in charmap file */
1010   cups_file_t       *fp;        /* Charset map file pointer */
1011   char              *s;         /* Line parsing pointer */
1012   char              line[256];  /* Line from charset map file */
1013   int               i;          /* Loop variable */
1014   int               wide;       /* 32-bit legacy char */
1015   cups_globals_t    *cg = _cupsGlobals();
1016                                 /* Pointer to library globals */
1017
1018  /*
1019   * Check for valid arguments...
1020   */
1021   if ((encoding < 0) || (filename == NULL))
1022     return (NULL);
1023
1024  /*
1025   * See if we already have this DBCS/VBCS charset map loaded...
1026   */
1027   for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
1028   {
1029     if (vmap->encoding == encoding)
1030     {
1031       vmap->used ++;
1032       return ((void *) vmap);
1033     }
1034   }
1035
1036  /*
1037   * Count lines in charmap file...
1038   */
1039   mapcount = get_charmap_count(filename);
1040   if (mapcount <= 0)
1041     return (NULL);
1042
1043  /*
1044   * Open VBCS charset map input file...
1045   */
1046   fp = cupsFileOpen(filename, "r");
1047   if (fp == NULL)
1048     return (NULL);
1049
1050  /*
1051   * Allocate memory for DBCS/VBCS charset map and add to cache...
1052   */
1053   vmap = (cups_vmap_t *) calloc(1, sizeof(cups_vmap_t));
1054   if (vmap == NULL)
1055   {
1056     cupsFileClose(fp);
1057     return (NULL);
1058   }
1059   vmap->next = cg->vmap_cache;
1060   cg->vmap_cache = vmap;
1061   vmap->used ++;
1062   vmap->encoding = encoding;
1063
1064  /*
1065   * Save DBCS/VBCS charset map into memory for transcoding...
1066   */
1067   leadchar = 0;
1068   wide2uni = NULL;
1069
1070   for (i = 0, wide = 0; i < mapcount; )
1071   {
1072     s = cupsFileGets(fp, line, sizeof(line));
1073     if (s == NULL)
1074       break;
1075     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1076       continue;
1077     if (strncmp (s, "0x", 2) == 0)
1078       s += 2;
1079     if ((sscanf(s, "%lx", &legchar) != 1)
1080     || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END)))
1081     {
1082       cupsFileClose(fp);
1083       cupsCharmapFlush();
1084       return (NULL);
1085     }
1086     while ((*s != 0) && (*s != ' ') && (*s != '\t'))
1087       s ++;
1088     while ((*s == ' ') || (*s == '\t'))
1089       s ++;
1090     if (strncmp (s, "0x", 2) == 0)
1091       s += 2;
1092     if (sscanf(s, "%lx", &unichar) != 1)
1093     {
1094       cupsFileClose(fp);
1095       cupsCharmapFlush();
1096       return (NULL);
1097     }
1098     i ++;
1099
1100    /*
1101     * Convert beyond Plane 0 (BMP) to Replacement Character...
1102     */
1103     if (unichar > 0xffff)
1104       unichar = 0xfffd;
1105
1106    /*
1107     * Save lead char of 2/3/4-byte legacy char...
1108     */
1109     if ((legchar > 0xff) && (legchar <= 0xffff))
1110     {
1111       leadchar = (cups_sbcs_t) (legchar >> 8);
1112       vmap->lead2char[leadchar] = leadchar;
1113     }
1114     if ((legchar > 0xffff) && (legchar <= 0xffffff))
1115     {
1116       leadchar = (cups_sbcs_t) (legchar >> 16);
1117       vmap->lead3char[leadchar] = leadchar;
1118     }
1119     if (legchar > 0xffffff)
1120     {
1121       leadchar = (cups_sbcs_t) (legchar >> 24);
1122       vmap->lead4char[leadchar] = leadchar;
1123     }
1124
1125    /*
1126     * Save Legacy to Unicode mapping...
1127     */
1128     if (legchar <= 0xffff)
1129     {
1130      /*
1131       * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1132       */
1133       crow = vmap->char2uni[(int) leadchar];
1134       if (crow == NULL)
1135       {
1136         crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t));
1137         if (crow == NULL)
1138         {
1139           cupsFileClose(fp);
1140           cupsCharmapFlush();
1141           return (NULL);
1142         }
1143         vmap->char2uni[(int) leadchar] = crow;
1144       }
1145       crow += (int) (legchar & 0xff);
1146       *crow = (cups_vbcs_t) unichar;
1147     }
1148     else
1149     {
1150      /*
1151       * Save VBCS 32-bit to Unicode mapping in sorted list table...
1152       */
1153       if (wide == 0)
1154       {
1155         wide = 1;
1156         vmap->widecount = (mapcount - i + 1);
1157         wide2uni = (cups_wide2uni_t *)
1158           calloc(vmap->widecount, sizeof(cups_wide2uni_t));
1159         if (wide2uni == NULL)
1160         {
1161           cupsFileClose(fp);
1162           cupsCharmapFlush();
1163           return (NULL);
1164         }
1165         vmap->wide2uni = wide2uni;
1166       }
1167       wide2uni->widechar = (cups_vbcs_t) legchar;
1168       wide2uni->unichar = unichar;
1169       wide2uni ++;
1170     }
1171
1172    /*
1173     * Save Unicode to legacy mapping in indirect lookup table...
1174     */
1175     vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1176     if (vrow == NULL)
1177     {
1178       vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t));
1179       if (vrow == NULL)
1180       {
1181         cupsFileClose(fp);
1182         cupsCharmapFlush();
1183         return (NULL);
1184       }
1185       vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1186     }
1187     vrow += (int) (unichar & 0xff);
1188
1189    /*
1190     * Convert Replacement Character to visible replacement...
1191     */
1192     if (unichar == 0xfffd)
1193       legchar = (unsigned long) '?';
1194
1195    /*
1196     * First (oldest) legacy character uses Unicode mapping cell...
1197     */
1198     if (*vrow == 0)
1199       *vrow = (cups_vbcs_t) legchar;
1200   }
1201   vmap->charcount = (i - vmap->widecount);
1202   cupsFileClose(fp);
1203   return (vmap);
1204 }
1205
1206 /*
1207  * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1208  */
1209 static int                              /* O - Count or -1 on error */
1210 conv_utf8_to_sbcs(char *dest,           /* O - Target string */
1211     const cups_utf8_t *src,             /* I - Source string */
1212     const int maxout,                   /* I - Max output */
1213     const cups_encoding_t encoding)     /* I - Encoding */
1214 {
1215   char          *start = dest;  /* Start of destination string */
1216   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
1217   cups_sbcs_t   *srow;          /* Pointer to SBCS row in 'uni2char' */
1218   cups_utf32_t  unichar;        /* Character value */
1219   int           worklen;        /* Internal UCS-4 string length */
1220   cups_utf32_t  work[CUPS_MAX_USTRING];
1221                                 /* Internal UCS-4 string */
1222   int           i;              /* Looping variable */
1223
1224  /*
1225   * Check for valid arguments and clear output...
1226   */
1227   if ((dest == NULL)
1228   || (src == NULL)
1229   || (maxout < 1)
1230   || (maxout > CUPS_MAX_USTRING)
1231   || (encoding == CUPS_UTF8))
1232     return (-1);
1233   *dest = '\0';
1234
1235  /*
1236   * Find legacy charset map in cache...
1237   */
1238   cmap = (cups_cmap_t *) cupsCharmapGet(encoding);
1239   if (cmap == NULL)
1240     return (-1);
1241
1242  /*
1243   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1244   */
1245   worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1246   if (worklen < 0)
1247     return (-1);
1248
1249  /*
1250   * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1251   */
1252   for (i = 0; i < worklen;)
1253   {
1254     unichar = work[i];
1255     if (unichar == 0)
1256       break;
1257     i ++;
1258
1259    /*
1260     * Check for leading BOM (and delete from output)...
1261     */
1262     if ((i == 1) && (unichar == 0xfeff))
1263       continue;
1264
1265    /*
1266     * Convert ASCII verbatim (optimization)...
1267     */
1268     if (unichar <= 0x7f)
1269     {
1270       *dest = (char) unichar;
1271       dest ++;
1272       continue;
1273     }
1274
1275    /*
1276     * Convert unknown character to visible replacement...
1277     */
1278     srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1279     if (srow)
1280       srow += (int) (unichar & 0xff);
1281     if ((srow == NULL) || (*srow == 0))
1282       *dest = '?';
1283     else
1284       *dest = (char) (*srow);
1285     dest ++;
1286   }
1287   *dest = '\0';
1288   worklen = (int) (dest - start);
1289   cupsCharmapFree(encoding);
1290   return (worklen);
1291 }
1292
1293 /*
1294  * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1295  */
1296 static int                              /* O - Count or -1 on error */
1297 conv_utf8_to_vbcs(char *dest,           /* O - Target string */
1298     const cups_utf8_t *src,             /* I - Source string */
1299     const int maxout,                   /* I - Max output */
1300     const cups_encoding_t encoding)     /* I - Encoding */
1301 {
1302   char          *start = dest;  /* Start of destination string */
1303   cups_vmap_t   *vmap;          /* Legacy DBCS / Unicode Charset Map */
1304   cups_vbcs_t   *vrow;          /* Pointer to VBCS row in 'uni2char' */
1305   cups_utf32_t  unichar;        /* Character value */
1306   cups_vbcs_t   legchar;        /* Legacy character value */
1307   int           worklen;        /* Internal UCS-4 string length */
1308   cups_utf32_t  work[CUPS_MAX_USTRING];
1309                                 /* Internal UCS-4 string */
1310   int           i;              /* Looping variable */
1311
1312  /*
1313   * Check for valid arguments and clear output...
1314   */
1315   if ((dest == NULL)
1316   || (src == NULL)
1317   || (maxout < 1)
1318   || (maxout > CUPS_MAX_USTRING)
1319   || (encoding == CUPS_UTF8))
1320     return (-1);
1321   *dest = '\0';
1322
1323  /*
1324   * Find legacy charset map in cache...
1325   */
1326   vmap = (cups_vmap_t *) cupsCharmapGet(encoding);
1327   if (vmap == NULL)
1328     return (-1);
1329
1330  /*
1331   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1332   */
1333   worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1334   if (worklen < 0)
1335     return (-1);
1336
1337  /*
1338   * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1339   */
1340   for (i = 0; i < worklen;)
1341   {
1342     unichar = work[i];
1343     if (unichar == 0)
1344       break;
1345     i ++;
1346
1347    /*
1348     * Check for leading BOM (and delete from output)...
1349     */
1350     if ((i == 1) && (unichar == 0xfeff))
1351       continue;
1352
1353    /*
1354     * Convert ASCII verbatim (optimization)...
1355     */
1356     if (unichar <= 0x7f)
1357     {
1358       *dest = (char) unichar;
1359       dest ++;
1360       continue;
1361     }
1362
1363    /*
1364     * Convert unknown character to visible replacement...
1365     */
1366     vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1367     if (vrow)
1368       vrow += (int) (unichar & 0xff);
1369     if ((vrow == NULL) || (*vrow == 0))
1370       legchar = (cups_vbcs_t) '?';
1371     else
1372       legchar = (cups_vbcs_t) *vrow;
1373
1374    /*
1375     * Save n-byte legacy character...
1376     */
1377     if (legchar > 0xffffff)
1378     {
1379       *dest = (char) ((legchar >> 24) & 0xff);
1380       dest++;
1381     }
1382     if (legchar > 0xffff)
1383     {
1384       *dest = (char) ((legchar >> 16) & 0xff);
1385       dest++;
1386     }
1387     if (legchar > 0xff)
1388     {
1389       *dest = (char) ((legchar >> 8) & 0xff);
1390       dest++;
1391     }
1392     *dest = (char) (legchar & 0xff);
1393     dest ++;
1394   }
1395   *dest = '\0';
1396   worklen = (int) (dest - start);
1397   cupsCharmapFree(encoding);
1398   return (worklen);
1399 }
1400
1401 /*
1402  * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1403  */
1404 static int                              /* O - Count or -1 on error */
1405 conv_sbcs_to_utf8(cups_utf8_t *dest,    /* O - Target string */
1406     const char *src,                    /* I - Source string */
1407     const int maxout,                   /* I - Max output */
1408     const cups_encoding_t encoding)     /* I - Encoding */
1409 {
1410   cups_cmap_t   *cmap;          /* Legacy SBCS / Unicode Charset Map */
1411   cups_ucs2_t   *crow;          /* Pointer to UCS-2 row in 'char2uni' */
1412   unsigned long legchar;        /* Legacy character value */
1413   cups_utf32_t  unichar;        /* Unicode character value */
1414   int           worklen;        /* Internal UCS-4 string length */
1415   cups_utf32_t  work[CUPS_MAX_USTRING];
1416                                 /* Internal UCS-4 string */
1417   int           i;              /* Looping variable */
1418
1419  /*
1420   * Check for valid arguments and clear output...
1421   */
1422   if ((dest == NULL)
1423   || (src == NULL)
1424   || (maxout < 1)
1425   || (maxout > CUPS_MAX_USTRING)
1426   || (encoding == CUPS_UTF8))
1427     return (-1);
1428   *dest = '\0';
1429
1430  /*
1431   * Find legacy charset map in cache...
1432   */
1433   cmap = (cups_cmap_t *) cupsCharmapGet(encoding);
1434   if (cmap == NULL)
1435     return (-1);
1436
1437  /*
1438   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1439   */
1440   work[0] = 0xfeff;
1441   for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1442   {
1443     if (*src == '\0')
1444       break;
1445     legchar = (unsigned long) *src;
1446
1447    /*
1448     * Convert ASCII verbatim (optimization)...
1449     */
1450     if (legchar <= 0x7f)
1451     {
1452       work[i] = (cups_utf32_t) legchar;
1453       i ++;
1454       continue;
1455     }
1456
1457    /*
1458     * Convert unknown character to Replacement Character...
1459     */
1460     crow = &cmap->char2uni[0];
1461     crow += (int) legchar;
1462     if (*crow == 0)
1463       unichar = 0xfffd;
1464     else
1465       unichar = (cups_utf32_t) *crow;
1466     work[i] = unichar;
1467     i ++;
1468   }
1469   work[i] = 0;
1470
1471  /*
1472   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1473   */
1474   worklen = cupsUTF32ToUTF8(dest, work, maxout);
1475   cupsCharmapFree(encoding);
1476   return (worklen);
1477 }
1478
1479
1480 /*
1481  * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1482  */
1483 static int                              /* O - Count or -1 on error */
1484 conv_vbcs_to_utf8(cups_utf8_t *dest,    /* O - Target string */
1485     const char *src,                    /* I - Source string */
1486     const int maxout,                   /* I - Max output */
1487     const cups_encoding_t encoding)     /* I - Encoding */
1488 {
1489   cups_vmap_t       *vmap;      /* Legacy VBCS / Unicode Charset Map */
1490   cups_ucs2_t       *crow;      /* Pointer to UCS-2 row in 'char2uni' */
1491   cups_wide2uni_t   *wide2uni;  /* Pointer to row in 'wide2uni' */
1492   cups_sbcs_t       leadchar;   /* Lead char of n-byte legacy char */
1493   cups_vbcs_t       legchar;    /* Legacy character value */
1494   cups_utf32_t      unichar;    /* Unicode character value */
1495   int               i;          /* Looping variable */
1496   int               worklen;    /* Internal UCS-4 string length */
1497   cups_utf32_t      work[CUPS_MAX_USTRING];
1498                                 /* Internal UCS-4 string */
1499
1500  /*
1501   * Check for valid arguments and clear output...
1502   */
1503   if ((dest == NULL)
1504   || (src == NULL)
1505   || (maxout < 1)
1506   || (maxout > CUPS_MAX_USTRING)
1507   || (encoding == CUPS_UTF8))
1508     return (-1);
1509   *dest = '\0';
1510
1511  /*
1512   * Find legacy charset map in cache...
1513   */
1514   vmap = (cups_vmap_t *) cupsCharmapGet(encoding);
1515   if (vmap == NULL)
1516     return (-1);
1517
1518  /*
1519   * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1520   */
1521   work[0] = 0xfeff;
1522   for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1523   {
1524     if (*src == '\0')
1525       break;
1526     legchar = (cups_vbcs_t) *src;
1527     leadchar = (cups_sbcs_t) *src;
1528
1529    /*
1530     * Convert ASCII verbatim (optimization)...
1531     */
1532     if (legchar <= 0x7f)
1533     {
1534       work[i] = (cups_utf32_t) legchar;
1535       i ++;
1536       continue;
1537     }
1538
1539    /*
1540     * Convert 2-byte legacy character...
1541     */
1542     if (vmap->lead2char[(int) leadchar] == leadchar)
1543     {
1544       src ++;
1545       if (*src == '\0')
1546         return (-1);
1547       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1548
1549      /*
1550       * Convert unknown character to Replacement Character...
1551       */
1552       crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)];
1553       if (crow)
1554         crow += (int) (legchar & 0xff);
1555       if ((crow == NULL) || (*crow == 0))
1556         unichar = 0xfffd;
1557       else
1558         unichar = (cups_utf32_t) *crow;
1559       work[i] = unichar;
1560       i ++;
1561       continue;
1562     }
1563
1564    /*
1565     * Fetch 3-byte or 4-byte legacy character...
1566     */
1567     if (vmap->lead3char[(int) leadchar] == leadchar)
1568     {
1569       src ++;
1570       if (*src == '\0')
1571         return (-1);
1572       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1573       src ++;
1574       if (*src == '\0')
1575         return (-1);
1576       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1577     }
1578     else if (vmap->lead4char[(int) leadchar] == leadchar)
1579     {
1580       src ++;
1581       if (*src == '\0')
1582         return (-1);
1583       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1584       src ++;
1585       if (*src == '\0')
1586         return (-1);
1587       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1588       src ++;
1589       if (*src == '\0')
1590         return (-1);
1591       legchar = (legchar << 8) | (cups_vbcs_t) *src;
1592     }
1593     else
1594       return (-1);
1595
1596    /*
1597     * Find 3-byte or 4-byte legacy character...
1598     */
1599     wide2uni = vmap->wide2uni;
1600     wide2uni = (cups_wide2uni_t *) bsearch(&legchar,
1601                                            vmap->wide2uni,
1602                                            vmap->widecount,
1603                                            sizeof(cups_wide2uni_t),
1604                                            compare_wide);
1605
1606    /*
1607     * Convert unknown character to Replacement Character...
1608     */
1609     if ((wide2uni == NULL) || (wide2uni->unichar == 0))
1610       unichar = 0xfffd;
1611     else
1612       unichar = wide2uni->unichar;
1613     work[i] = unichar;
1614     i ++;
1615   }
1616   work[i] = 0;
1617
1618  /*
1619   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1620   */
1621   worklen = cupsUTF32ToUTF8(dest, work, maxout);
1622   cupsCharmapFree(encoding);
1623   return (worklen);
1624 }
1625
1626 /*
1627  * 'compare_wide()' - Compare key for wide (VBCS) match.
1628  */
1629 static int
1630 compare_wide(const void *k1,            /* I - Key char */
1631     const void *k2)                     /* I - Map char */
1632 {
1633   cups_vbcs_t       *kp = (cups_vbcs_t *) k1;
1634                                 /* Key char pointer */
1635   cups_wide2uni_t   *mp = (cups_wide2uni_t *) k2;
1636                                 /* Map char pointer */
1637   cups_vbcs_t       key;        /* Legacy key character */
1638   cups_vbcs_t       map;        /* Legacy map character */
1639   int               result;     /* Result Value */
1640
1641   key = *kp;
1642   map = mp->widechar;
1643   if (key >= map)
1644     result = (int) (key - map);
1645   else
1646     result = -1 * ((int) (map - key));
1647   return (result);
1648 }
1649
1650 /*
1651  * End of "$Id$"
1652  */