/*
- * "$Id: transcode.c 4967 2006-01-24 03:42:15Z mike $"
+ * "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $"
*
* Transcoding support for the Common UNIX Printing System (CUPS).
*
*
* Contents:
*
- * cupsCharmapGet() - Get a character set map.
- * cupsCharmapFree() - Free a character set map.
- * cupsCharmapFlush() - Flush all character set maps out of cache.
* _cupsCharmapFlush() - Flush all character set maps out of cache.
- * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
+ * _cupsCharmapFree() - Free a character set map.
+ * _cupsCharmapGet() - Get a character set map.
* cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
- * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16.
- * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8.
+ * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
* cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
* cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
- * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32.
- * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16.
- * get_charmap_count() - Count lines in a charmap file.
- * get_sbcs_charmap() - Get SBCS Charmap.
- * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
+ * compare_wide() - Compare key for wide (VBCS) match.
+ * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
* conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
* conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
- * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
* conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
- * compare_wide() - Compare key for wide (VBCS) match.
+ * free_sbcs_charmap() - Free memory used by a single byte character set.
+ * free_vbcs_charmap() - Free memory used by a variable byte character set.
+ * get_charmap_count() - Count lines in a charmap file.
+ * get_sbcs_charmap() - Get SBCS Charmap.
+ * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
*/
/*
*/
#include "globals.h"
+#include "debug.h"
#include <stdlib.h>
#include <errno.h>
#include <time.h>
/*
- * Prototypes...
+ * Local functions...
*/
-static int get_charmap_count(const char *filename);
-static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
- const char *filename);
-static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
- const char *filename);
-
-static int conv_utf8_to_sbcs(char *dest,
- const cups_utf8_t *src,
- const int maxout,
- const cups_encoding_t encoding);
-static int conv_utf8_to_vbcs(char *dest,
- const cups_utf8_t *src,
- const int maxout,
- const cups_encoding_t encoding);
-
-static int conv_sbcs_to_utf8(cups_utf8_t *dest,
- const char *src,
- const int maxout,
- const cups_encoding_t encoding);
-static int conv_vbcs_to_utf8(cups_utf8_t *dest,
- const char *src,
- const int maxout,
- const cups_encoding_t encoding);
-
-static int compare_wide(const void *k1, const void *k2);
+static int compare_wide(const void *k1, const void *k2);
+static int conv_sbcs_to_utf8(cups_utf8_t *dest,
+ const cups_sbcs_t *src,
+ int maxout,
+ const cups_encoding_t encoding);
+static int conv_utf8_to_sbcs(cups_sbcs_t *dest,
+ const cups_utf8_t *src,
+ int maxout,
+ const cups_encoding_t encoding);
+static int conv_utf8_to_vbcs(cups_sbcs_t *dest,
+ const cups_utf8_t *src,
+ int maxout,
+ const cups_encoding_t encoding);
+static int conv_vbcs_to_utf8(cups_utf8_t *dest,
+ const cups_sbcs_t *src,
+ int maxout,
+ const cups_encoding_t encoding);
+static void free_sbcs_charmap(_cups_cmap_t *sbcs);
+static void free_vbcs_charmap(_cups_vmap_t *vbcs);
+static int get_charmap_count(cups_file_t *fp);
+static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
+ const char *filename);
+static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
+ const char *filename);
+
/*
- * 'cupsCharmapGet()' - Get a character set map.
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
+ * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
*/
-void * /* O - Charset map pointer */
-cupsCharmapGet(
- const cups_encoding_t encoding) /* I - Encoding */
+void
+_cupsCharmapFlush(_cups_globals_t *cg) /* I - Global data */
{
- char mapname[80]; /* Name of charset map */
- char filename[1024]; /* Filename for charset map file */
- _cups_globals_t *cg = _cupsGlobals(); /* Global data */
+ _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */
+ *cnext; /* Next Legacy SBCS Charset Map */
+ _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */
+ *vnext; /* Next Legacy VBCS Charset Map */
/*
- * Check for valid arguments...
+ * Loop through SBCS charset map cache, free all memory...
*/
- if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END))
- return (NULL);
+ for (cmap = cg->cmap_cache; cmap; cmap = cnext)
+ {
+ cnext = cmap->next;
- /*
- * Get the data directory and charset map name...
- */
+ free_sbcs_charmap(cmap);
+ }
- snprintf(mapname, sizeof(mapname), "%s.txt", _cupsEncodingName(encoding));
- snprintf(filename, sizeof(filename), "%s/charmaps/%s",
- cg->cups_datadir, mapname);
+ cg->cmap_cache = NULL;
/*
- * Read charset map input file into cache...
+ * Loop through DBCS/VBCS charset map cache, free all memory...
*/
- if (encoding < CUPS_ENCODING_SBCS_END)
- return (get_sbcs_charmap(encoding, filename));
- else if (encoding < CUPS_ENCODING_VBCS_END)
- return (get_vbcs_charmap(encoding, filename));
- else
- return (NULL);
+ for (vmap = cg->vmap_cache; vmap; vmap = vnext)
+ {
+ vnext = vmap->next;
+
+ free_vbcs_charmap(vmap);
+
+ free(vmap);
+ }
+
+ cg->vmap_cache = NULL;
}
+
/*
- * 'cupsCharmapFree()' - Free a character set map.
+ * '_cupsCharmapFree()' - Free a character set map.
*
- * This does not actually free; use 'cupsCharmapFlush()' for that.
+ * This does not actually free; use '_cupsCharmapFlush()' for that.
*/
+
void
-cupsCharmapFree(const cups_encoding_t encoding)
- /* I - Encoding */
+_cupsCharmapFree(
+ const cups_encoding_t encoding) /* I - Encoding */
{
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
- _cups_globals_t *cg = _cupsGlobals();
- /* Pointer to library globals */
+ _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
+ _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
+ _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */
+
/*
* See if we already have this SBCS charset map loaded...
*/
- for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
+
+ for (cmap = cg->cmap_cache; cmap; cmap = cmap->next)
{
if (cmap->encoding == encoding)
{
if (cmap->used > 0)
cmap->used --;
+
return;
}
}
/*
* See if we already have this DBCS/VBCS charset map loaded...
*/
- for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
+
+ for (vmap = cg->vmap_cache; vmap; vmap = vmap->next)
{
if (vmap->encoding == encoding)
{
return;
}
}
- return;
-}
-
-
-/*
- * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
- */
-void
-cupsCharmapFlush(void)
-{
- _cupsCharmapFlush(_cupsGlobals());
}
/*
- * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
+ * '_cupsCharmapGet()' - Get a character set map.
+ *
+ * This code handles single-byte (SBCS), double-byte (DBCS), and
+ * variable-byte (VBCS) character sets _without_ charset escapes...
+ * This code does not handle multiple-byte character sets (MBCS)
+ * (such as ISO-2022-JP) with charset switching via escapes...
*/
-void
-_cupsCharmapFlush(_cups_globals_t *cg) /* I - Global data */
+void * /* O - Charset map pointer */
+_cupsCharmapGet(
+ const cups_encoding_t encoding) /* I - Encoding */
{
- int i; /* Looping variable */
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
- _cups_cmap_t *cnext; /* Next Legacy SBCS Charset Map */
- _cups_vmap_t *vnext; /* Next Legacy VBCS Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
- cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
+ char filename[1024]; /* Filename for charset map file */
+ _cups_globals_t *cg = _cupsGlobals(); /* Global data */
+
+ DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
/*
- * Loop through SBCS charset map cache, free all memory...
+ * Check for valid arguments...
*/
- for (cmap = cg->cmap_cache; cmap != NULL; cmap = cnext)
+
+ if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
{
- for (i = 0; i < 256; i ++)
- {
- if ((srow = cmap->uni2char[i]) != NULL)
- free(srow);
- }
- cnext = cmap->next;
- free(cmap);
+ DEBUG_puts(" Bad encoding, returning NULL!");
+ return (NULL);
}
- cg->cmap_cache = NULL;
/*
- * Loop through DBCS/VBCS charset map cache, free all memory...
+ * Get the data directory and charset map name...
*/
- for (vmap = cg->vmap_cache; vmap != NULL; vmap = vnext)
- {
- for (i = 0; i < 256; i ++)
- {
- if ((crow = vmap->char2uni[i]) != NULL)
- free(crow);
- }
- for (i = 0; i < 256; i ++)
- {
- if ((vrow = vmap->uni2char[i]) != NULL)
- free(vrow);
- }
- if (vmap->wide2uni)
- free(vmap->wide2uni);
- vnext = vmap->next;
- free(vmap);
- }
- cg->vmap_cache = NULL;
- return;
+
+ snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
+ cg->cups_datadir, _cupsEncodingName(encoding));
+
+ DEBUG_printf((" filename=\"%s\"\n", filename));
+
+ /*
+ * Read charset map input file into cache...
+ */
+
+ if (encoding < CUPS_ENCODING_SBCS_END)
+ return (get_sbcs_charmap(encoding, filename));
+ else if (encoding < CUPS_ENCODING_VBCS_END)
+ return (get_vbcs_charmap(encoding, filename));
+ else
+ return (NULL);
}
+
/*
- * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
+ * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
*
* This code handles single-byte (SBCS), double-byte (DBCS), and
* variable-byte (VBCS) character sets _without_ charset escapes...
* This code does not handle multiple-byte character sets (MBCS)
* (such as ISO-2022-JP) with charset switching via escapes...
*/
-int /* O - Count or -1 on error */
-cupsUTF8ToCharset(char *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- const int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
+
+int /* O - Count or -1 on error */
+cupsCharsetToUTF8(
+ cups_utf8_t *dest, /* O - Target string */
+ const char *src, /* I - Source string */
+ const int maxout, /* I - Max output */
+ const cups_encoding_t encoding) /* I - Encoding */
{
/*
* Check for valid arguments...
*/
+ DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
+ dest, src, maxout, encoding));
+
+ if (dest)
+ *dest = '\0';
+
if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
+ {
+ DEBUG_puts(" Bad arguments, returning -1");
return (-1);
+ }
/*
* Handle identity conversions...
if (encoding == CUPS_UTF8 ||
encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
{
- strlcpy(dest, (char *)src, maxout);
- return (strlen(dest));
+ strlcpy((char *)dest, src, maxout);
+ return (strlen((char *)dest));
}
/*
- * Convert input UTF-8 to legacy charset...
+ * Convert input legacy charset to UTF-8...
*/
+
if (encoding < CUPS_ENCODING_SBCS_END)
- return (conv_utf8_to_sbcs(dest, src, maxout, encoding));
+ return (conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding));
else if (encoding < CUPS_ENCODING_VBCS_END)
- return (conv_utf8_to_vbcs(dest, src, maxout, encoding));
+ return (conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding));
else
+ {
+ puts(" Bad encoding, returning -1");
return (-1);
+ }
}
+
/*
- * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
+ * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
*
* This code handles single-byte (SBCS), double-byte (DBCS), and
* variable-byte (VBCS) character sets _without_ charset escapes...
* This code does not handle multiple-byte character sets (MBCS)
* (such as ISO-2022-JP) with charset switching via escapes...
*/
-int /* O - Count or -1 on error */
-cupsCharsetToUTF8(cups_utf8_t *dest, /* O - Target string */
- const char *src, /* I - Source string */
- const int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
+
+int /* O - Count or -1 on error */
+cupsUTF8ToCharset(
+ char *dest, /* O - Target string */
+ const cups_utf8_t *src, /* I - Source string */
+ const int maxout, /* I - Max output */
+ const cups_encoding_t encoding) /* I - Encoding */
{
/*
* Check for valid arguments...
*/
if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
+ {
+ if (dest)
+ *dest = '\0';
+
return (-1);
+ }
/*
* Handle identity conversions...
if (encoding == CUPS_UTF8 ||
encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
{
- strlcpy((char *)dest, src, maxout);
- return (strlen((char *)dest));
+ strlcpy(dest, (char *)src, maxout);
+ return (strlen(dest));
}
/*
- * Convert input legacy charset to UTF-8...
+ * Convert input UTF-8 to legacy charset...
*/
+
if (encoding < CUPS_ENCODING_SBCS_END)
- return (conv_sbcs_to_utf8(dest, src, maxout, encoding));
+ return (conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding));
else if (encoding < CUPS_ENCODING_VBCS_END)
- return (conv_vbcs_to_utf8(dest, src, maxout, encoding));
+ return (conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding));
else
return (-1);
}
-/*
- * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
- *
- * This code does not support Unicode beyond 16-bits (Plane 0)...
- */
-int /* O - Count or -1 on error */
-cupsUTF8ToUTF16(cups_utf16_t *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- const int maxout) /* I - Max output */
-{
- int worklen; /* Internal UCS-4 string length */
- cups_utf32_t work[CUPS_MAX_USTRING];
- /* Internal UCS-4 string */
-
- /*
- * Check for valid arguments and clear output...
- */
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING))
- return (-1);
- *dest = 0;
-
- /*
- * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
- */
- worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
- if (worklen < 0)
- return (-1);
-
- /*
- * Convert internal UCS-4 to output UTF-16...
- */
- worklen = cupsUTF32ToUTF16(dest, work, maxout);
- return (worklen);
-}
-
-/*
- * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
- *
- * This code does not support Unicode beyond 16-bits (Plane 0)...
- */
-int /* O - Count or -1 on error */
-cupsUTF16ToUTF8(cups_utf8_t *dest, /* O - Target string */
- const cups_utf16_t *src, /* I - Source string */
- const int maxout) /* I - Max output */
-{
- int worklen; /* Internal UCS-4 string length */
- cups_utf32_t work[CUPS_MAX_USTRING];
- /* Internal UCS-4 string */
-
- /*
- * Check for valid arguments and clear output...
- */
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING))
- return (-1);
- *dest = 0;
-
- /*
- * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
- */
- worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING);
- if (worklen < 0)
- return (-1);
-
- /*
- * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
- */
- worklen = cupsUTF32ToUTF8(dest, work, maxout);
- return (worklen);
-}
/*
* 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
*
* UTF-32 char UTF-8 char(s)
* --------------------------------------------------
- * 0 to 127 = 0xxxxxxx (US-ASCII)
+ * 0 to 127 = 0xxxxxxx (US-ASCII)
* 128 to 2047 = 110xxxxx 10yyyyyy
* 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
- * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
+ * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
*
* UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
* which would convert to five- or six-octet UTF-8 sequences...
- *
- * This code does not support Unicode beyond 16-bits (Plane 0)...
*/
-int /* O - Count or -1 on error */
-cupsUTF8ToUTF32(cups_utf32_t *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- const int maxout) /* I - Max output */
+
+int /* O - Count or -1 on error */
+cupsUTF8ToUTF32(
+ cups_utf32_t *dest, /* O - Target string */
+ const cups_utf8_t *src, /* I - Source string */
+ const int maxout) /* I - Max output */
{
- cups_utf8_t *first = (cups_utf8_t *) src;
- size_t srclen; /* Source string length */
- int i; /* Looping variable */
- cups_utf32_t ch; /* Character value */
- cups_utf32_t next; /* Next character value */
- cups_utf32_t ch32; /* UTF-32 character value */
+ size_t srclen; /* Source string length */
+ int i; /* Looping variable */
+ cups_utf8_t ch; /* Character value */
+ cups_utf8_t next; /* Next character value */
+ cups_utf32_t ch32; /* UTF-32 character value */
+
/*
* Check for valid arguments and clear output...
*/
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING))
+
+ if (dest)
+ *dest = 0;
+
+ if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
return (-1);
- *dest = 0;
/*
* Convert input UTF-8 to output UTF-32 (and insert BOM)...
*/
- *dest = 0xfeff;
- dest ++;
- srclen = strlen((char *) src);
- for (i = 1; i < (maxout - 1); src ++, dest ++)
+
+ *dest++ = 0xfeff;
+ srclen = strlen((char *)src);
+
+ for (i = maxout - 1; *src && i > 0; i --)
{
- ch = (cups_utf32_t) *src;
- ch &= 0xff;
- if (ch == 0)
- break;
- i ++;
+ ch = *src++;
/*
* Convert UTF-8 character(s) to UTF-32 character...
*/
- if ((ch & 0x7f) == ch)
+
+ if (!(ch & 0x80))
{
/*
* One-octet UTF-8 <= 127 (US-ASCII)...
*/
- *dest = ch;
+
+ *dest++ = ch;
}
else if ((ch & 0xe0) == 0xc0)
{
/*
* Two-octet UTF-8 <= 2047 (Latin-x)...
*/
- src ++;
- next = (cups_utf32_t) *src;
- next &= 0xff;
- if (next == 0)
+
+ next = *src++;
+ if (!next)
return (-1);
+
ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
/*
* Check for non-shortest form (invalid UTF-8)...
*/
- if (ch32 <= 127)
+
+ if (ch32 < 0x80)
return (-1);
- *dest = ch32;
+
+ *dest++ = ch32;
}
else if ((ch & 0xf0) == 0xe0)
{
/*
* Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
*/
- src ++;
- next = (cups_utf32_t) *src;
- next &= 0xff;
- if (next == 0)
+
+ next = *src++;
+ if (!next)
return (-1);
- ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
- src ++;
- next = (cups_utf32_t) *src;
- next &= 0xff;
- if (next == 0)
+
+ ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
+
+ next = *src++;
+ if (!next)
return (-1);
- ch32 = ((ch32 << 6) | (next & 0x3f));
+
+ ch32 = (ch32 << 6) | (next & 0x3f);
/*
* Check for non-shortest form (invalid UTF-8)...
*/
- if (ch32 <= 2047)
+
+ if (ch32 < 0x800)
return (-1);
- *dest = ch32;
+
+ *dest++ = ch32;
}
else if ((ch & 0xf8) == 0xf0)
{
/*
- * Four-octet UTF-8 to Replacement Character...
+ * Four-octet UTF-8...
*/
- if (((src - first) + 3) >= srclen)
+
+ next = *src++;
+ if (!next)
return (-1);
- src += 3;
- *dest = 0xfffd;
- }
- else if ((ch & 0xfc) == 0xf8)
- {
- /*
- * Five-octet UTF-8 (invalid strict UTF-32)...
- */
- return (-1);
- }
- else if ((ch & 0xfe) == 0xfc)
- {
+
+ ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
+
+ next = *src++;
+ if (!next)
+ return (-1);
+
+ ch32 = (ch32 << 6) | (next & 0x3f);
+
+ next = *src++;
+ if (!next)
+ return (-1);
+
+ ch32 = (ch32 << 6) | (next & 0x3f);
+
/*
- * Six-octet UTF-8 (invalid strict UTF-32)...
+ * Check for non-shortest form (invalid UTF-8)...
*/
- return (-1);
+
+ if (ch32 < 0x10000)
+ return (-1);
+
+ *dest++ = ch32;
}
else
{
/*
- * More than six-octet (invalid UTF-8 sequence)...
+ * More than 4-octet (invalid UTF-8 sequence)...
*/
+
return (-1);
}
/*
* Check for UTF-16 surrogate (illegal UTF-8)...
*/
- if ((*dest >= 0xd800) && (*dest <= 0xdfff))
- return (-1);
- /*
- * Check for beyond Plane 16 (invalid UTF-8)...
- */
- if (*dest > 0x10ffff)
+ if (*dest >= 0xd800 && *dest <= 0xdfff)
return (-1);
}
+
*dest = 0;
+
return (i);
}
+
/*
* 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
*
*
* UTF-32 char UTF-8 char(s)
* --------------------------------------------------
- * 0 to 127 = 0xxxxxxx (US-ASCII)
+ * 0 to 127 = 0xxxxxxx (US-ASCII)
* 128 to 2047 = 110xxxxx 10yyyyyy
* 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
- * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
+ * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
*
* UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
* which would convert to five- or six-octet UTF-8 sequences...
- *
- * This code does not support Unicode beyond 16-bits (Plane 0)...
*/
-int /* O - Count or -1 on error */
-cupsUTF32ToUTF8(cups_utf8_t *dest, /* O - Target string */
- const cups_utf32_t *src, /* I - Source string */
- const int maxout) /* I - Max output */
+
+int /* O - Count or -1 on error */
+cupsUTF32ToUTF8(
+ cups_utf8_t *dest, /* O - Target string */
+ const cups_utf32_t *src, /* I - Source string */
+ const int maxout) /* I - Max output */
{
- cups_utf32_t *first = (cups_utf32_t *) src;
- /* First source char */
- cups_utf8_t *start = dest; /* Start of destination string */
- int i; /* Looping variable */
- int swap = 0; /* Byte-swap input to output */
- cups_utf32_t ch; /* Character value */
+ cups_utf8_t *start; /* Start of destination string */
+ int i; /* Looping variable */
+ int swap; /* Byte-swap input to output */
+ cups_utf32_t ch; /* Character value */
+
/*
* Check for valid arguments and clear output...
*/
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1))
+
+ if (dest)
+ *dest = '\0';
+
+ if (!dest || !src || maxout < 1)
return (-1);
- *dest = '\0';
/*
* Check for leading BOM in UTF-32 and inverted BOM...
*/
- if (*src == 0xfffe0000)
- swap = 1;
+
+ start = dest;
+ swap = *src == 0xfffe0000;
+
+ if (*src == 0xfffe0000 || *src == 0xfeff)
+ src ++;
/*
* Convert input UTF-32 to output UTF-8...
*/
- for (i = 0; i < (maxout - 1); src ++)
+
+ for (i = maxout - 1; *src && i > 0;)
{
- ch = *src;
- if (ch == 0)
- break;
+ ch = *src++;
/*
* Byte swap input UTF-32, if necessary...
+ * (only byte-swapping 24 of 32 bits)
*/
+
if (swap)
ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
/*
- * Check for leading BOM (and delete from output)...
+ * Check for beyond Plane 16 (invalid UTF-32)...
*/
- if ((src == first) && (ch == 0xfeff))
- continue;
- /*
- * Check for beyond Plane 16 (invalid UTF-32)...
- */
if (ch > 0x10ffff)
return (-1);
- /*
- * Convert beyond Plane 0 (BMP) to Replacement Character...
- */
- if (ch > 0xffff)
- ch = 0xfffd;
-
/*
* Convert UTF-32 character to UTF-8 character(s)...
*/
- if (ch <= 0x7f)
+
+ if (ch < 0x80)
{
/*
* One-octet UTF-8 <= 127 (US-ASCII)...
*/
- *dest = (cups_utf8_t) ch;
- dest ++;
- i ++;
+
+ *dest++ = (cups_utf8_t)ch;
+ i --;
}
- else if (ch <= 0x7ff)
+ else if (ch < 0x800)
{
/*
* Two-octet UTF-8 <= 2047 (Latin-x)...
*/
- if (i > (maxout - 2))
- break;
- *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f));
- dest ++;
- i ++;
- *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
- dest ++;
- i ++;
+
+ if (i < 2)
+ return (-1);
+
+ *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
+ *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
+ i -= 2;
}
- else
+ else if (ch < 0x10000)
{
/*
* Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
*/
- if (i > (maxout - 3))
- break;
- *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f));
- dest ++;
- i ++;
- *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f));
- dest ++;
- i ++;
- *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
- dest ++;
- i ++;
+
+ if (i < 3)
+ return (-1);
+
+ *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
+ *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
+ *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
+ i -= 3;
+ }
+ else
+ {
+ /*
+ * Four-octet UTF-8...
+ */
+
+ if (i < 4)
+ return (-1);
+
+ *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
+ *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
+ *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
+ *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
+ i -= 4;
}
}
+
*dest = '\0';
- i = (int) (dest - start);
- return (i);
+
+ return ((int)(dest - start));
}
+
/*
- * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
- *
- * This code does not support Unicode beyond 16-bits (Plane 0)...
+ * 'compare_wide()' - Compare key for wide (VBCS) match.
+ */
+
+static int
+compare_wide(const void *k1, /* I - Key char */
+ const void *k2) /* I - Map char */
+{
+ cups_vbcs_t key; /* Legacy key character */
+ cups_vbcs_t map; /* Legacy map character */
+
+
+ key = *((cups_vbcs_t *)k1);
+ map = ((_cups_wide2uni_t *)k2)->widechar;
+
+ return ((int)(key - map));
+}
+
+
+/*
+ * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
*/
-int /* O - Count or -1 on error */
-cupsUTF16ToUTF32(cups_utf32_t *dest, /* O - Target string */
- const cups_utf16_t *src, /* I - Source string */
- const int maxout) /* I - Max output */
+
+static int /* O - Count or -1 on error */
+conv_sbcs_to_utf8(
+ cups_utf8_t *dest, /* O - Target string */
+ const cups_sbcs_t *src, /* I - Source string */
+ int maxout, /* I - Max output */
+ const cups_encoding_t encoding) /* I - Encoding */
{
- int i; /* Looping variable */
- int swap = 0; /* Byte-swap input to output */
- int surrogate = 0; /* Expecting low-half surrogate */
- cups_utf32_t ch; /* Character value */
+ _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
+ cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
+ cups_sbcs_t legchar; /* Legacy character value */
+ cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
+ *workptr; /* Pointer into string */
+
/*
- * Check for valid arguments and clear output...
+ * Find legacy charset map in cache...
*/
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING))
+
+ if ((cmap = (_cups_cmap_t *)_cupsCharmapGet(encoding)) == NULL)
return (-1);
- *dest = 0;
/*
- * Check for leading BOM in UTF-16 and inverted BOM...
+ * Convert input legacy charset to internal UCS-4 (and insert BOM)...
*/
- if (*src == 0xfffe)
- swap = 1;
- /*
- * Convert input UTF-16 to output UTF-32...
- */
- for (i = 0; i < (maxout - 1); src ++)
+ work[0] = 0xfeff;
+ for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
{
- ch = (cups_utf32_t) (*src & 0xffff);
- if (ch == 0)
- break;
- i ++;
+ legchar = *src++;
/*
- * Byte swap input UTF-16, if necessary...
+ * Convert ASCII verbatim (optimization)...
*/
- if (swap)
- ch = (cups_utf32_t) ((ch << 8) | (ch >> 8));
- /*
- * Discard expected UTF-16 low-half surrogate...
- */
- if ((ch >= 0xdc00) && (ch <= 0xdfff))
+ if (legchar < 0x80)
+ *workptr++ = (cups_utf32_t)legchar;
+ else
{
- if (surrogate == 0)
- return (-1);
- surrogate = 0;
- continue;
- }
+ /*
+ * Convert unknown character to Replacement Character...
+ */
- /*
- * Convert UTF-16 high-half surrogate to Replacement Character...
- */
- if ((ch >= 0xd800) && (ch <= 0xdbff))
- {
- if (surrogate == 1)
- return (-1);
- surrogate = 1;
- ch = 0xfffd;
+ crow = cmap->char2uni + legchar;
+
+ if (!*crow)
+ *workptr++ = 0xfffd;
+ else
+ *workptr++ = (cups_utf32_t)*crow;
}
- *dest = ch;
- dest ++;
}
- *dest = 0;
- return (i);
+
+ *workptr = 0;
+
+ /*
+ * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
+ */
+
+ _cupsCharmapFree(encoding);
+
+ return (cupsUTF32ToUTF8(dest, work, maxout));
}
+
/*
- * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
- *
- * This code does not support Unicode beyond 16-bits (Plane 0)...
+ * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
*/
-int /* O - Count or -1 on error */
-cupsUTF32ToUTF16(cups_utf16_t *dest, /* O - Target string */
- const cups_utf32_t *src, /* I - Source string */
- const int maxout) /* I - Max output */
+
+static int /* O - Count or -1 on error */
+conv_utf8_to_sbcs(
+ cups_sbcs_t *dest, /* O - Target string */
+ const cups_utf8_t *src, /* I - Source string */
+ int maxout, /* I - Max output */
+ const cups_encoding_t encoding) /* I - Encoding */
{
- int i; /* Looping variable */
- int swap = 0; /* Byte-swap input to output */
- cups_utf32_t ch; /* Character value */
+ cups_sbcs_t *start; /* Start of destination string */
+ _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
+ cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
+ cups_utf32_t unichar; /* Character value */
+ cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
+ *workptr; /* Pointer into string */
+
/*
- * Check for valid arguments and clear output...
+ * Find legacy charset map in cache...
*/
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING))
+
+ if ((cmap = (_cups_cmap_t *) _cupsCharmapGet(encoding)) == NULL)
return (-1);
- *dest = 0;
/*
- * Check for leading BOM in UTF-32 and inverted BOM...
+ * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
*/
- if (*src == 0xfffe0000)
- swap = 1;
+
+ if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
+ return (-1);
/*
- * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
+ * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
*/
- for (i = 0; i < (maxout - 1); src ++, dest ++)
+
+ for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
{
- ch = *src;
- if (ch == 0)
+ unichar = *workptr++;
+ if (!unichar)
break;
- i ++;
-
- /*
- * Byte swap input UTF-32, if necessary...
- */
- if (swap)
- ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
/*
- * Check for UTF-16 surrogate (illegal UTF-32)...
+ * Convert ASCII verbatim (optimization)...
*/
- if ((ch >= 0xd800) && (ch <= 0xdfff))
- return (-1);
- /*
- * Check for beyond Plane 16 (invalid UTF-32)...
- */
- if (ch > 0x10ffff)
- return (-1);
+ if (unichar < 0x80)
+ {
+ *dest++ = (cups_sbcs_t)unichar;
+ continue;
+ }
/*
- * Convert beyond Plane 0 (BMP) to Replacement Character...
+ * Convert unknown character to visible replacement...
*/
- if (ch > 0xffff)
- ch = 0xfffd;
- *dest = (cups_utf16_t) ch;
- }
- *dest = 0;
- return (i);
-}
-/*
- * 'get_charmap_count()' - Count lines in a charmap file.
- */
-static int /* O - Count or -1 on error */
-get_charmap_count(const char *filename) /* I - Charmap Filename */
-{
- int i; /* Looping variable */
- cups_file_t *fp; /* Map input file pointer */
- char *s; /* Line parsing pointer */
- char line[256]; /* Line from input map file */
- cups_utf32_t unichar; /* Unicode character value */
+ srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
- /*
- * Open map input file...
- */
- if ((filename == NULL) || (*filename == '\0'))
- return (-1);
- fp = cupsFileOpen(filename, "r");
- if (fp == NULL)
- return (-1);
+ if (srow)
+ srow += (int)(unichar & 0xff);
- /*
- * Count lines in map input file...
- */
- for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
- {
- s = cupsFileGets(fp, line, sizeof(line));
- if (s == NULL)
- break;
- if ((*s == '#') || (*s == '\n') || (*s == '\0'))
- continue;
- while ((*s != 0) && (*s != ' ') && (*s != '\t'))
- s ++;
- while ((*s == ' ') || (*s == '\t'))
- s ++;
- if (strncmp (s, "0x", 2) == 0)
- s += 2;
- if ((sscanf(s, "%lx", &unichar) != 1)
- || (unichar > 0xffff))
- {
- cupsFileClose(fp);
- return (-1);
- }
- i ++;
+ if (!srow || !*srow)
+ *dest++ = '?';
+ else
+ *dest++ = *srow;
}
- if (i == 0)
- i = -1;
- /*
- * Close file and return charmap count (non-comment line count)...
- */
- cupsFileClose(fp);
- return (i);
+ *dest = '\0';
+
+ _cupsCharmapFree(encoding);
+
+ return ((int)(dest - start));
}
+
/*
- * 'get_sbcs_charmap()' - Get SBCS Charmap.
+ * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
*/
-static _cups_cmap_t * /* O - Charmap or 0 on error */
-get_sbcs_charmap(const cups_encoding_t encoding,
- /* I - Charmap Encoding */
- const char *filename) /* I - Charmap Filename */
+
+static int /* O - Count or -1 on error */
+conv_utf8_to_vbcs(
+ cups_sbcs_t *dest, /* O - Target string */
+ const cups_utf8_t *src, /* I - Source string */
+ int maxout, /* I - Max output */
+ const cups_encoding_t encoding) /* I - Encoding */
{
- int i; /* Loop variable */
- unsigned long legchar; /* Legacy character value */
- cups_utf32_t unichar; /* Unicode character value */
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_file_t *fp; /* Charset map file pointer */
- char *s; /* Line parsing pointer */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
- char line[256]; /* Line from charset map file */
- _cups_globals_t *cg = _cupsGlobals();
- /* Pointer to library globals */
+ cups_sbcs_t *start; /* Start of destination string */
+ _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
+ cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
+ cups_utf32_t unichar; /* Character value */
+ cups_vbcs_t legchar; /* Legacy character value */
+ cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
+ *workptr; /* Pointer into string */
- /*
- * Check for valid arguments...
- */
- if ((encoding < 0) || (filename == NULL))
- return (NULL);
/*
- * See if we already have this SBCS charset map loaded...
+ * Find legacy charset map in cache...
*/
- for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
- {
- if (cmap->encoding == encoding)
- {
- cmap->used ++;
- return ((void *) cmap);
- }
- }
- /*
- * Open SBCS charset map input file...
- */
- fp = cupsFileOpen(filename, "r");
- if (fp == NULL)
- return (NULL);
+ if ((vmap = (_cups_vmap_t *)_cupsCharmapGet(encoding)) == NULL)
+ return (-1);
/*
- * Allocate memory for SBCS charset map and add to cache...
+ * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
*/
- cmap = (_cups_cmap_t *) calloc(1, sizeof(_cups_cmap_t));
- if (cmap == NULL)
- {
- cupsFileClose(fp);
- return (NULL);
- }
- cmap->next = cg->cmap_cache;
- cg->cmap_cache = cmap;
- cmap->used ++;
- cmap->encoding = encoding;
+
+ if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
+ return (-1);
/*
- * Save SBCS charset map into memory for transcoding...
+ * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
*/
- for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
+
+ for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
{
- s = cupsFileGets(fp, line, sizeof(line));
- if (s == NULL)
+ unichar = *workptr++;
+ if (!unichar)
break;
- if ((*s == '#') || (*s == '\n') || (*s == '\0'))
- continue;
- if (strncmp (s, "0x", 2) == 0)
- s += 2;
- if ((sscanf(s, "%lx", &legchar) != 1)
- || (legchar > 0xff))
- {
- cupsFileClose(fp);
- cupsCharmapFlush();
- return (NULL);
- }
- while ((*s != 0) && (*s != ' ') && (*s != '\t'))
- s ++;
- while ((*s == ' ') || (*s == '\t'))
- s ++;
- if (strncmp (s, "0x", 2) == 0)
- s += 2;
- if (sscanf(s, "%lx", &unichar) != 1)
- {
- cupsFileClose(fp);
- cupsCharmapFlush();
- return (NULL);
- }
- i ++;
/*
- * Convert beyond Plane 0 (BMP) to Replacement Character...
+ * Convert ASCII verbatim (optimization)...
*/
- if (unichar > 0xffff)
- unichar = 0xfffd;
+
+ if (unichar < 0x80)
+ {
+ *dest++ = (cups_vbcs_t)unichar;
+ continue;
+ }
/*
- * Save legacy to Unicode mapping in direct lookup table...
+ * Convert unknown character to visible replacement...
*/
- crow = &cmap->char2uni[(int) legchar];
- *crow = (cups_ucs2_t) (unichar & 0xffff);
+
+ vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
+
+ if (vrow)
+ vrow += (int)(unichar & 0xff);
+
+ if (!vrow || !*vrow)
+ legchar = (cups_vbcs_t)'?';
+ else
+ legchar = (cups_vbcs_t)*vrow;
/*
- * Save Unicode to legacy mapping in indirect lookup table...
+ * Save n-byte legacy character...
*/
- srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
- if (srow == NULL)
+
+ if (legchar > 0xffffff)
{
- srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t));
- if (srow == NULL)
- {
- cupsFileClose(fp);
- cupsCharmapFlush();
- return (NULL);
- }
- cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow;
+ if (maxout < 5)
+ return (-1);
+
+ *dest++ = (cups_sbcs_t)(legchar >> 24);
+ *dest++ = (cups_sbcs_t)(legchar >> 16);
+ *dest++ = (cups_sbcs_t)(legchar >> 8);
+ *dest++ = (cups_sbcs_t)legchar;
+
+ maxout -= 3;
}
- srow += (int) (unichar & 0xff);
+ else if (legchar > 0xffff)
+ {
+ if (maxout < 4)
+ return (-1);
- /*
- * Convert Replacement Character to visible replacement...
- */
- if (unichar == 0xfffd)
- legchar = (unsigned long) '?';
+ *dest++ = (cups_sbcs_t)(legchar >> 16);
+ *dest++ = (cups_sbcs_t)(legchar >> 8);
+ *dest++ = (cups_sbcs_t)legchar;
- /*
- * First (oldest) legacy character uses Unicode mapping cell...
- */
- if (*srow == 0)
- *srow = (cups_sbcs_t) legchar;
+ maxout -= 2;
+ }
+ else if (legchar > 0xff)
+ {
+ *dest++ = (cups_sbcs_t)(legchar >> 8);
+ *dest++ = (cups_sbcs_t)legchar;
+
+ maxout --;
+ }
}
- cupsFileClose(fp);
- return (cmap);
+
+ *dest = '\0';
+
+ _cupsCharmapFree(encoding);
+
+ return ((int)(dest - start));
}
+
/*
- * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
+ * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
*/
-static _cups_vmap_t * /* O - Charmap or 0 on error */
-get_vbcs_charmap(const cups_encoding_t encoding,
- /* I - Charmap Encoding */
- const char *filename) /* I - Charmap Filename */
+
+static int /* O - Count or -1 on error */
+conv_vbcs_to_utf8(
+ cups_utf8_t *dest, /* O - Target string */
+ const cups_sbcs_t *src, /* I - Source string */
+ int maxout, /* I - Max output */
+ const cups_encoding_t encoding) /* I - Encoding */
{
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
- _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
- cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
- unsigned long legchar; /* Legacy character value */
- cups_utf32_t unichar; /* Unicode character value */
- int mapcount; /* Count of lines in charmap file */
- cups_file_t *fp; /* Charset map file pointer */
- char *s; /* Line parsing pointer */
- char line[256]; /* Line from charset map file */
- int i; /* Loop variable */
- int wide; /* 32-bit legacy char */
- _cups_globals_t *cg = _cupsGlobals();
- /* Pointer to library globals */
+ _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
+ cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
+ _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
+ cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
+ cups_vbcs_t legchar; /* Legacy character value */
+ cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
+ *workptr; /* Pointer into string */
- /*
- * Check for valid arguments...
- */
- if ((encoding < 0) || (filename == NULL))
- return (NULL);
/*
- * See if we already have this DBCS/VBCS charset map loaded...
+ * Find legacy charset map in cache...
*/
- for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
- {
- if (vmap->encoding == encoding)
- {
- vmap->used ++;
- return ((void *) vmap);
- }
- }
- /*
- * Count lines in charmap file...
- */
- mapcount = get_charmap_count(filename);
- if (mapcount <= 0)
- return (NULL);
+ if ((vmap = (_cups_vmap_t *)_cupsCharmapGet(encoding)) == NULL)
+ return (-1);
/*
- * Open VBCS charset map input file...
+ * Convert input legacy charset to internal UCS-4 (and insert BOM)...
*/
- fp = cupsFileOpen(filename, "r");
- if (fp == NULL)
- return (NULL);
- /*
- * Allocate memory for DBCS/VBCS charset map and add to cache...
- */
- vmap = (_cups_vmap_t *) calloc(1, sizeof(_cups_vmap_t));
- if (vmap == NULL)
+ work[0] = 0xfeff;
+ for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
{
- cupsFileClose(fp);
- return (NULL);
- }
- vmap->next = cg->vmap_cache;
- cg->vmap_cache = vmap;
- vmap->used ++;
- vmap->encoding = encoding;
-
- /*
- * Save DBCS/VBCS charset map into memory for transcoding...
- */
- leadchar = 0;
- wide2uni = NULL;
-
- for (i = 0, wide = 0; i < mapcount; )
- {
- s = cupsFileGets(fp, line, sizeof(line));
- if (s == NULL)
- break;
- if ((*s == '#') || (*s == '\n') || (*s == '\0'))
- continue;
- if (strncmp (s, "0x", 2) == 0)
- s += 2;
- if ((sscanf(s, "%lx", &legchar) != 1)
- || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END)))
- {
- cupsFileClose(fp);
- cupsCharmapFlush();
- return (NULL);
- }
- while ((*s != 0) && (*s != ' ') && (*s != '\t'))
- s ++;
- while ((*s == ' ') || (*s == '\t'))
- s ++;
- if (strncmp (s, "0x", 2) == 0)
- s += 2;
- if (sscanf(s, "%lx", &unichar) != 1)
- {
- cupsFileClose(fp);
- cupsCharmapFlush();
- return (NULL);
- }
- i ++;
+ legchar = *src++;
+ leadchar = (cups_sbcs_t)legchar;
/*
- * Convert beyond Plane 0 (BMP) to Replacement Character...
+ * Convert ASCII verbatim (optimization)...
*/
- if (unichar > 0xffff)
- unichar = 0xfffd;
- /*
- * Save lead char of 2/3/4-byte legacy char...
- */
- if ((legchar > 0xff) && (legchar <= 0xffff))
- {
- leadchar = (cups_sbcs_t) (legchar >> 8);
- vmap->lead2char[leadchar] = leadchar;
- }
- if ((legchar > 0xffff) && (legchar <= 0xffffff))
+ if (legchar < 0x80)
{
- leadchar = (cups_sbcs_t) (legchar >> 16);
- vmap->lead3char[leadchar] = leadchar;
- }
- if (legchar > 0xffffff)
- {
- leadchar = (cups_sbcs_t) (legchar >> 24);
- vmap->lead4char[leadchar] = leadchar;
+ *workptr++ = (cups_utf32_t)legchar;
+ continue;
}
/*
- * Save Legacy to Unicode mapping...
+ * Convert 2-byte legacy character...
*/
- if (legchar <= 0xffff)
- {
- /*
- * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
- */
- crow = vmap->char2uni[(int) leadchar];
- if (crow == NULL)
- {
- crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t));
- if (crow == NULL)
- {
- cupsFileClose(fp);
- cupsCharmapFlush();
- return (NULL);
- }
- vmap->char2uni[(int) leadchar] = crow;
- }
- crow += (int) (legchar & 0xff);
- *crow = (cups_ucs2_t) unichar;
- }
- else
+
+ if (vmap->lead2char[(int)leadchar] == leadchar)
{
+ if (!*src)
+ return (-1);
+
+ legchar = (legchar << 8) | *src++;
+
/*
- * Save VBCS 32-bit to Unicode mapping in sorted list table...
+ * Convert unknown character to Replacement Character...
*/
- if (wide == 0)
- {
- wide = 1;
- vmap->widecount = (mapcount - i + 1);
- wide2uni = (_cups_wide2uni_t *)
- calloc(vmap->widecount, sizeof(_cups_wide2uni_t));
- if (wide2uni == NULL)
- {
- cupsFileClose(fp);
- cupsCharmapFlush();
- return (NULL);
- }
- vmap->wide2uni = wide2uni;
- }
- wide2uni->widechar = (cups_vbcs_t) legchar;
- wide2uni->unichar = (cups_ucs2_t)unichar;
- wide2uni ++;
+
+ crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
+ if (crow)
+ crow += (int) (legchar & 0xff);
+
+ if (!crow || !*crow)
+ *workptr++ = 0xfffd;
+ else
+ *workptr++ = (cups_utf32_t)*crow;
+ continue;
}
/*
- * Save Unicode to legacy mapping in indirect lookup table...
+ * Fetch 3-byte or 4-byte legacy character...
*/
- vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
- if (vrow == NULL)
+
+ if (vmap->lead3char[(int)leadchar] == leadchar)
{
- vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t));
- if (vrow == NULL)
- {
- cupsFileClose(fp);
- cupsCharmapFlush();
- return (NULL);
- }
- vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
+ if (!*src || !src[1])
+ return (-1);
+
+ legchar = (legchar << 8) | *src++;
+ legchar = (legchar << 8) | *src++;
}
- vrow += (int) (unichar & 0xff);
+ else if (vmap->lead4char[(int)leadchar] == leadchar)
+ {
+ if (!*src || !src[1] || !src[2])
+ return (-1);
+
+ legchar = (legchar << 8) | *src++;
+ legchar = (legchar << 8) | *src++;
+ legchar = (legchar << 8) | *src++;
+ }
+ else
+ return (-1);
/*
- * Convert Replacement Character to visible replacement...
+ * Find 3-byte or 4-byte legacy character...
*/
- if (unichar == 0xfffd)
- legchar = (unsigned long) '?';
+
+ wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
+ vmap->wide2uni,
+ vmap->widecount,
+ sizeof(_cups_wide2uni_t),
+ compare_wide);
/*
- * First (oldest) legacy character uses Unicode mapping cell...
+ * Convert unknown character to Replacement Character...
*/
- if (*vrow == 0)
- *vrow = (cups_vbcs_t) legchar;
+
+ if (!wide2uni || !wide2uni->unichar)
+ *workptr++ = 0xfffd;
+ else
+ *workptr++ = wide2uni->unichar;
}
- vmap->charcount = (i - vmap->widecount);
- cupsFileClose(fp);
- return (vmap);
+
+ *workptr = 0;
+
+ _cupsCharmapFree(encoding);
+
+ /*
+ * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
+ */
+
+ return (cupsUTF32ToUTF8(dest, work, maxout));
}
+
/*
- * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
+ * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
*/
-static int /* O - Count or -1 on error */
-conv_utf8_to_sbcs(char *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- const int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
+
+static void
+free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
{
- char *start = dest; /* Start of destination string */
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
- cups_utf32_t unichar; /* Character value */
- int worklen; /* Internal UCS-4 string length */
- cups_utf32_t work[CUPS_MAX_USTRING];
- /* Internal UCS-4 string */
- int i; /* Looping variable */
+ int i; /* Looping variable */
- /*
- * Check for valid arguments and clear output...
- */
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING)
- || (encoding == CUPS_UTF8))
- return (-1);
- *dest = '\0';
- /*
- * Find legacy charset map in cache...
- */
- cmap = (_cups_cmap_t *) cupsCharmapGet(encoding);
- if (cmap == NULL)
- return (-1);
+ for (i = 0; i < 256; i ++)
+ if (cmap->uni2char[i])
+ free(cmap->uni2char[i]);
+
+ free(cmap);
+}
+
+
+/*
+ * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
+ */
+
+static void
+free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
+{
+ int i; /* Looping variable */
+
+
+ for (i = 0; i < 256; i ++)
+ if (vmap->char2uni[i])
+ free(vmap->char2uni[i]);
+
+ for (i = 0; i < 256; i ++)
+ if (vmap->uni2char[i])
+ free(vmap->uni2char[i]);
+
+ if (vmap->wide2uni)
+ free(vmap->wide2uni);
+
+ free(vmap);
+}
+
+
+/*
+ * 'get_charmap_count()' - Count lines in a charmap file.
+ */
+
+static int /* O - Count or -1 on error */
+get_charmap_count(cups_file_t *fp) /* I - File to read from */
+{
+ int count; /* Number of lines */
+ char line[256]; /* Line from input map file */
- /*
- * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
- */
- worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
- if (worklen < 0)
- return (-1);
/*
- * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
+ * Count lines in map input file...
*/
- for (i = 0; i < worklen;)
- {
- unichar = work[i];
- if (unichar == 0)
- break;
- i ++;
- /*
- * Check for leading BOM (and delete from output)...
- */
- if ((i == 1) && (unichar == 0xfeff))
- continue;
+ count = 0;
- /*
- * Convert ASCII verbatim (optimization)...
- */
- if (unichar <= 0x7f)
- {
- *dest = (char) unichar;
- dest ++;
- continue;
- }
+ while (cupsFileGets(fp, line, sizeof(line)))
+ if (line[0] == '0')
+ count ++;
- /*
- * Convert unknown character to visible replacement...
- */
- srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
- if (srow)
- srow += (int) (unichar & 0xff);
- if ((srow == NULL) || (*srow == 0))
- *dest = '?';
- else
- *dest = (char) (*srow);
- dest ++;
- }
- *dest = '\0';
- worklen = (int) (dest - start);
- cupsCharmapFree(encoding);
- return (worklen);
+ /*
+ * Return the number of lines...
+ */
+
+ if (count > 0)
+ return (count);
+ else
+ return (-1);
}
+
/*
- * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
+ * 'get_sbcs_charmap()' - Get SBCS Charmap.
*/
-static int /* O - Count or -1 on error */
-conv_utf8_to_vbcs(char *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- const int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
+
+static _cups_cmap_t * /* O - Charmap or 0 on error */
+get_sbcs_charmap(
+ const cups_encoding_t encoding, /* I - Charmap Encoding */
+ const char *filename) /* I - Charmap Filename */
{
- char *start = dest; /* Start of destination string */
- _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
- cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
- cups_utf32_t unichar; /* Character value */
- cups_vbcs_t legchar; /* Legacy character value */
- int worklen; /* Internal UCS-4 string length */
- cups_utf32_t work[CUPS_MAX_USTRING];
- /* Internal UCS-4 string */
- int i; /* Looping variable */
+ unsigned long legchar; /* Legacy character value */
+ cups_utf32_t unichar; /* Unicode character value */
+ _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
+ cups_file_t *fp; /* Charset map file pointer */
+ char *s; /* Line parsing pointer */
+ cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
+ cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
+ char line[256]; /* Line from charset map file */
+ _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */
+
/*
- * Check for valid arguments and clear output...
+ * See if we already have this SBCS charset map loaded...
*/
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING)
- || (encoding == CUPS_UTF8))
- return (-1);
- *dest = '\0';
+
+ for (cmap = cg->cmap_cache; cmap; cmap = cmap->next)
+ {
+ if (cmap->encoding == encoding)
+ {
+ cmap->used ++;
+ DEBUG_printf((" returning existing cmap=%p\n", cmap));
+ return ((void *)cmap);
+ }
+ }
/*
- * Find legacy charset map in cache...
+ * Open SBCS charset map input file...
*/
- vmap = (_cups_vmap_t *) cupsCharmapGet(encoding);
- if (vmap == NULL)
- return (-1);
+
+ if ((fp = cupsFileOpen(filename, "r")) == NULL)
+ return (NULL);
/*
- * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
+ * Allocate memory for SBCS charset map...
*/
- worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
- if (worklen < 0)
- return (-1);
+
+ if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
+ {
+ cupsFileClose(fp);
+ DEBUG_puts(" Unable to allocate memory!");
+ return (NULL);
+ }
+
+ cmap->used ++;
+ cmap->encoding = encoding;
/*
- * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
+ * Save SBCS charset map into memory for transcoding...
*/
- for (i = 0; i < worklen;)
+
+ while (cupsFileGets(fp, line, sizeof(line)))
{
- unichar = work[i];
- if (unichar == 0)
- break;
- i ++;
+ if (line[0] != '0')
+ continue;
+
+ legchar = strtol(line, &s, 16);
+ if (legchar < 0 || legchar > 0xff)
+ goto sbcs_error;
+
+ unichar = strtol(s, NULL, 16);
+ if (unichar < 0 || unichar > 0xffff)
+ goto sbcs_error;
/*
- * Check for leading BOM (and delete from output)...
+ * Save legacy to Unicode mapping in direct lookup table...
*/
- if ((i == 1) && (unichar == 0xfeff))
- continue;
+
+ crow = cmap->char2uni + legchar;
+ *crow = (cups_ucs2_t)(unichar & 0xffff);
/*
- * Convert ASCII verbatim (optimization)...
+ * Save Unicode to legacy mapping in indirect lookup table...
*/
- if (unichar <= 0x7f)
+
+ srow = cmap->uni2char[(unichar >> 8) & 0xff];
+ if (!srow)
{
- *dest = (char) unichar;
- dest ++;
- continue;
+ srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
+ if (!srow)
+ goto sbcs_error;
+
+ cmap->uni2char[(unichar >> 8) & 0xff] = srow;
}
+ srow += unichar & 0xff;
+
/*
- * Convert unknown character to visible replacement...
+ * Convert Replacement Character to visible replacement...
*/
- vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
- if (vrow)
- vrow += (int) (unichar & 0xff);
- if ((vrow == NULL) || (*vrow == 0))
- legchar = (cups_vbcs_t) '?';
- else
- legchar = (cups_vbcs_t) *vrow;
+
+ if (unichar == 0xfffd)
+ legchar = (unsigned long)'?';
/*
- * Save n-byte legacy character...
+ * First (oldest) legacy character uses Unicode mapping cell...
*/
- if (legchar > 0xffffff)
- {
- *dest = (char) ((legchar >> 24) & 0xff);
- dest++;
- }
- if (legchar > 0xffff)
- {
- *dest = (char) ((legchar >> 16) & 0xff);
- dest++;
- }
- if (legchar > 0xff)
- {
- *dest = (char) ((legchar >> 8) & 0xff);
- dest++;
- }
- *dest = (char) (legchar & 0xff);
- dest ++;
- }
- *dest = '\0';
- worklen = (int) (dest - start);
- cupsCharmapFree(encoding);
- return (worklen);
-}
-/*
- * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
- */
-static int /* O - Count or -1 on error */
-conv_sbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
- const char *src, /* I - Source string */
- const int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- unsigned long legchar; /* Legacy character value */
- cups_utf32_t unichar; /* Unicode character value */
- int worklen; /* Internal UCS-4 string length */
- cups_utf32_t work[CUPS_MAX_USTRING];
- /* Internal UCS-4 string */
- int i; /* Looping variable */
+ if (!*srow)
+ *srow = (cups_sbcs_t)legchar;
+ }
+ cupsFileClose(fp);
+
/*
- * Check for valid arguments and clear output...
+ * Add it to the cache and return...
*/
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING)
- || (encoding == CUPS_UTF8))
- return (-1);
- *dest = '\0';
+
+ cmap->next = cg->cmap_cache;
+ cg->cmap_cache = cmap;
+
+ DEBUG_printf((" returning new cmap=%p\n", cmap));
+
+ return (cmap);
/*
- * Find legacy charset map in cache...
+ * If we get here, there was an error in the cmap file...
*/
- cmap = (_cups_cmap_t *) cupsCharmapGet(encoding);
- if (cmap == NULL)
- return (-1);
+
+ sbcs_error:
+
+ free_sbcs_charmap(cmap);
+
+ cupsFileClose(fp);
+
+ DEBUG_puts(" Error, returning NULL!");
+
+ return (NULL);
+}
+
+
+/*
+ * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
+ */
+
+static _cups_vmap_t * /* O - Charmap or 0 on error */
+get_vbcs_charmap(
+ const cups_encoding_t encoding, /* I - Charmap Encoding */
+ const char *filename) /* I - Charmap Filename */
+{
+ _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
+ cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
+ cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
+ _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
+ cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
+ unsigned long legchar; /* Legacy character value */
+ cups_utf32_t unichar; /* Unicode character value */
+ int mapcount; /* Count of lines in charmap file */
+ cups_file_t *fp; /* Charset map file pointer */
+ char *s; /* Line parsing pointer */
+ char line[256]; /* Line from charset map file */
+ int i; /* Loop variable */
+ int wide; /* 32-bit legacy char */
+ _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */
+
+
+ DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
+ encoding, filename));
/*
- * Convert input legacy charset to internal UCS-4 (and insert BOM)...
+ * See if we already have this DBCS/VBCS charset map loaded...
*/
- work[0] = 0xfeff;
- for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
- {
- if (*src == '\0')
- break;
- legchar = (unsigned long) *src;
- /*
- * Convert ASCII verbatim (optimization)...
- */
- if (legchar <= 0x7f)
+ for (vmap = cg->vmap_cache; vmap; vmap = vmap->next)
+ {
+ if (vmap->encoding == encoding)
{
- work[i] = (cups_utf32_t) legchar;
- i ++;
- continue;
+ vmap->used ++;
+ DEBUG_printf((" returning existing vmap=%p\n", vmap));
+ return ((void *)vmap);
}
-
- /*
- * Convert unknown character to Replacement Character...
- */
- crow = &cmap->char2uni[0];
- crow += (int) legchar;
- if (*crow == 0)
- unichar = 0xfffd;
- else
- unichar = (cups_utf32_t) *crow;
- work[i] = unichar;
- i ++;
}
- work[i] = 0;
/*
- * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
+ * Open VBCS charset map input file...
*/
- worklen = cupsUTF32ToUTF8(dest, work, maxout);
- cupsCharmapFree(encoding);
- return (worklen);
-}
-
-/*
- * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
- */
-static int /* O - Count or -1 on error */
-conv_vbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
- const char *src, /* I - Source string */
- const int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
- cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
- cups_vbcs_t legchar; /* Legacy character value */
- cups_utf32_t unichar; /* Unicode character value */
- int i; /* Looping variable */
- int worklen; /* Internal UCS-4 string length */
- cups_utf32_t work[CUPS_MAX_USTRING];
- /* Internal UCS-4 string */
+ if ((fp = cupsFileOpen(filename, "r")) == NULL)
+ {
+ DEBUG_printf((" Unable to open file: %s\n", strerror(errno)));
+ return (NULL);
+ }
/*
- * Check for valid arguments and clear output...
+ * Count lines in charmap file...
*/
- if ((dest == NULL)
- || (src == NULL)
- || (maxout < 1)
- || (maxout > CUPS_MAX_USTRING)
- || (encoding == CUPS_UTF8))
- return (-1);
- *dest = '\0';
+
+ if ((mapcount = get_charmap_count(fp)) <= 0)
+ {
+ DEBUG_puts(" Unable to get charmap count!");
+ return (NULL);
+ }
+
+ DEBUG_printf((" mapcount=%d\n", mapcount));
/*
- * Find legacy charset map in cache...
+ * Allocate memory for DBCS/VBCS charset map...
*/
- vmap = (_cups_vmap_t *) cupsCharmapGet(encoding);
- if (vmap == NULL)
- return (-1);
+
+ if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
+ {
+ cupsFileClose(fp);
+ DEBUG_puts(" Unable to allocate memory!");
+ return (NULL);
+ }
+
+ vmap->used ++;
+ vmap->encoding = encoding;
/*
- * Convert input legacy charset to internal UCS-4 (and insert BOM)...
+ * Save DBCS/VBCS charset map into memory for transcoding...
*/
- work[0] = 0xfeff;
- for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
+
+ leadchar = 0;
+ wide2uni = NULL;
+
+ cupsFileRewind(fp);
+
+ i = 0;
+ wide = 0;
+
+ while (cupsFileGets(fp, line, sizeof(line)))
{
- if (*src == '\0')
- break;
- legchar = (cups_vbcs_t) *src;
- leadchar = (cups_sbcs_t) *src;
+ if (line[0] != '0')
+ continue;
+
+ legchar = strtoul(line, &s, 16);
+ if (legchar == ULONG_MAX)
+ goto vbcs_error;
+
+ unichar = strtol(s, NULL, 16);
+ if (unichar < 0 || unichar > 0xffff)
+ goto vbcs_error;
+
+ i ++;
+
+/* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
+ legchar, (unsigned)unichar)); */
/*
- * Convert ASCII verbatim (optimization)...
+ * Save lead char of 2/3/4-byte legacy char...
*/
- if (legchar <= 0x7f)
+
+ if (legchar > 0xff && legchar <= 0xffff)
{
- work[i] = (cups_utf32_t) legchar;
- i ++;
- continue;
+ leadchar = (cups_sbcs_t)(legchar >> 8);
+ vmap->lead2char[leadchar] = leadchar;
+ }
+
+ if (legchar > 0xffff && legchar <= 0xffffff)
+ {
+ leadchar = (cups_sbcs_t)(legchar >> 16);
+ vmap->lead3char[leadchar] = leadchar;
+ }
+
+ if (legchar > 0xffffff)
+ {
+ leadchar = (cups_sbcs_t)(legchar >> 24);
+ vmap->lead4char[leadchar] = leadchar;
}
/*
- * Convert 2-byte legacy character...
+ * Save Legacy to Unicode mapping...
*/
- if (vmap->lead2char[(int) leadchar] == leadchar)
+
+ if (legchar <= 0xffff)
{
- src ++;
- if (*src == '\0')
- return (-1);
- legchar = (legchar << 8) | (cups_vbcs_t) *src;
-
/*
- * Convert unknown character to Replacement Character...
+ * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
*/
- crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)];
- if (crow)
- crow += (int) (legchar & 0xff);
- if ((crow == NULL) || (*crow == 0))
- unichar = 0xfffd;
- else
- unichar = (cups_utf32_t) *crow;
- work[i] = unichar;
- i ++;
- continue;
+
+ crow = vmap->char2uni[(int)leadchar];
+ if (!crow)
+ {
+ crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
+ if (!crow)
+ goto vbcs_error;
+
+ vmap->char2uni[(int)leadchar] = crow;
+ }
+
+ crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
+ }
+ else
+ {
+ /*
+ * Save VBCS 32-bit to Unicode mapping in sorted list table...
+ */
+
+ if (!wide)
+ {
+ wide = 1;
+ vmap->widecount = (mapcount - i + 1);
+ wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
+ sizeof(_cups_wide2uni_t));
+ if (!wide2uni)
+ goto vbcs_error;
+
+ vmap->wide2uni = wide2uni;
+ }
+
+ wide2uni->widechar = (cups_vbcs_t)legchar;
+ wide2uni->unichar = (cups_ucs2_t)unichar;
+ wide2uni ++;
}
/*
- * Fetch 3-byte or 4-byte legacy character...
+ * Save Unicode to legacy mapping in indirect lookup table...
*/
- if (vmap->lead3char[(int) leadchar] == leadchar)
- {
- src ++;
- if (*src == '\0')
- return (-1);
- legchar = (legchar << 8) | (cups_vbcs_t) *src;
- src ++;
- if (*src == '\0')
- return (-1);
- legchar = (legchar << 8) | (cups_vbcs_t) *src;
- }
- else if (vmap->lead4char[(int) leadchar] == leadchar)
+
+ vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
+ if (!vrow)
{
- src ++;
- if (*src == '\0')
- return (-1);
- legchar = (legchar << 8) | (cups_vbcs_t) *src;
- src ++;
- if (*src == '\0')
- return (-1);
- legchar = (legchar << 8) | (cups_vbcs_t) *src;
- src ++;
- if (*src == '\0')
- return (-1);
- legchar = (legchar << 8) | (cups_vbcs_t) *src;
+ vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
+ if (!vrow)
+ goto vbcs_error;
+
+ vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
}
- else
- return (-1);
+
+ vrow += (int)(unichar & 0xff);
/*
- * Find 3-byte or 4-byte legacy character...
+ * Convert Replacement Character to visible replacement...
*/
- wide2uni = vmap->wide2uni;
- wide2uni = (_cups_wide2uni_t *) bsearch(&legchar,
- vmap->wide2uni,
- vmap->widecount,
- sizeof(_cups_wide2uni_t),
- compare_wide);
+
+ if (unichar == 0xfffd)
+ legchar = (unsigned long)'?';
/*
- * Convert unknown character to Replacement Character...
+ * First (oldest) legacy character uses Unicode mapping cell...
*/
- if ((wide2uni == NULL) || (wide2uni->unichar == 0))
- unichar = 0xfffd;
- else
- unichar = wide2uni->unichar;
- work[i] = unichar;
- i ++;
+
+ if (!*vrow)
+ *vrow = (cups_vbcs_t)legchar;
}
- work[i] = 0;
+
+ vmap->charcount = (i - vmap->widecount);
+
+ cupsFileClose(fp);
/*
- * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
+ * Add it to the cache and return...
*/
- worklen = cupsUTF32ToUTF8(dest, work, maxout);
- cupsCharmapFree(encoding);
- return (worklen);
-}
-/*
- * 'compare_wide()' - Compare key for wide (VBCS) match.
- */
-static int
-compare_wide(const void *k1, /* I - Key char */
- const void *k2) /* I - Map char */
-{
- cups_vbcs_t *kp = (cups_vbcs_t *) k1;
- /* Key char pointer */
- _cups_wide2uni_t *mp = (_cups_wide2uni_t *) k2;
- /* Map char pointer */
- cups_vbcs_t key; /* Legacy key character */
- cups_vbcs_t map; /* Legacy map character */
- int result; /* Result Value */
-
- key = *kp;
- map = mp->widechar;
- if (key >= map)
- result = (int) (key - map);
- else
- result = -1 * ((int) (map - key));
- return (result);
+ vmap->next = cg->vmap_cache;
+ cg->vmap_cache = vmap;
+
+ DEBUG_printf((" returning new vmap=%p\n", vmap));
+
+ return (vmap);
+
+ /*
+ * If we get here, the file contains errors...
+ */
+
+ vbcs_error:
+
+ free_vbcs_charmap(vmap);
+
+ cupsFileClose(fp);
+
+ DEBUG_puts(" Error, returning NULL!");
+
+ return (NULL);
}
/*
- * End of "$Id: transcode.c 4967 2006-01-24 03:42:15Z mike $"
+ * End of "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $"
*/