X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=cups%2Ftranscode.c;h=4267813192d76adac64d831bd629084368da9c6d;hb=fb863569eb4f51ae62a066dc8049d3ffc0efb0bb;hp=04f4f8bddffc3e082ce3bcb65ac439aa580bd99b;hpb=e1d6a77454308ff30d6da778be9d7b570e4f00b0;p=thirdparty%2Fcups.git diff --git a/cups/transcode.c b/cups/transcode.c index 04f4f8bdd..426781319 100644 --- a/cups/transcode.c +++ b/cups/transcode.c @@ -1,86 +1,39 @@ /* - * "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $" + * Transcoding support for CUPS. * - * Transcoding support for the Common UNIX Printing System (CUPS). + * Copyright 2007-2014 by Apple Inc. + * Copyright 1997-2007 by Easy Software Products. * - * Copyright 1997-2006 by Easy Software Products. - * - * These coded instructions, statements, and computer programs are - * the property of Easy Software Products and are protected by Federal - * copyright law. Distribution and use rights are outlined in the - * file "LICENSE.txt" which should have been included with this file. - * If this file is missing or damaged please contact Easy Software - * Products at: - * - * Attn: CUPS Licensing Information - * Easy Software Products - * 44141 Airport View Drive, Suite 204 - * Hollywood, Maryland 20636 USA - * - * Voice: (301) 373-9600 - * EMail: cups-info@cups.org - * WWW: http://www.cups.org - * - * Contents: - * - * _cupsCharmapFlush() - Flush all character set maps out of cache. - * _cupsCharmapFree() - Free a character set map. - * _cupsCharmapGet() - Get a character set map. - * cupsCharsetToUTF8() - Convert legacy character set to UTF-8. - * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set. - * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32. - * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8. - * compare_wide() - Compare key for wide (VBCS) match. - * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8. - * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS. - * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS. - * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8. - * free_sbcs_charmap() - Free memory used by a single byte character set. - * free_vbcs_charmap() - Free memory used by a variable byte character set. - * get_charmap_count() - Count lines in a charmap file. - * get_sbcs_charmap() - Get SBCS Charmap. - * get_vbcs_charmap() - Get DBCS/VBCS Charmap. + * Licensed under Apache License v2.0. See the file "LICENSE" for more information. */ /* * Include necessary headers... */ -#include "globals.h" -#include "debug.h" -#include -#include +#include "cups-private.h" +#include "debug-internal.h" +#include #include +#ifdef HAVE_ICONV_H +# include +#endif /* HAVE_ICONV_H */ /* - * Local functions... + * Local globals... */ -static int compare_wide(const void *k1, const void *k2); -static int conv_sbcs_to_utf8(cups_utf8_t *dest, - const cups_sbcs_t *src, - int maxout, - const cups_encoding_t encoding); -static int conv_utf8_to_sbcs(cups_sbcs_t *dest, - const cups_utf8_t *src, - int maxout, - const cups_encoding_t encoding); -static int conv_utf8_to_vbcs(cups_sbcs_t *dest, - const cups_utf8_t *src, - int maxout, - const cups_encoding_t encoding); -static int conv_vbcs_to_utf8(cups_utf8_t *dest, - const cups_sbcs_t *src, - int maxout, - const cups_encoding_t encoding); -static void free_sbcs_charmap(_cups_cmap_t *sbcs); -static void free_vbcs_charmap(_cups_vmap_t *vbcs); -static int get_charmap_count(cups_file_t *fp); -static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding, - const char *filename); -static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding, - const char *filename); +#ifdef HAVE_ICONV_H +static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER; + /* Mutex to control access to maps */ +static iconv_t map_from_utf8 = (iconv_t)-1; + /* Convert from UTF-8 to charset */ +static iconv_t map_to_utf8 = (iconv_t)-1; + /* Convert from charset to UTF-8 */ +static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING; + /* Which charset is cached */ +#endif /* HAVE_ICONV_H */ /* @@ -88,207 +41,152 @@ static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding, */ void -_cupsCharmapFlush(_cups_globals_t *cg) /* I - Global data */ +_cupsCharmapFlush(void) { - _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */ - *cnext; /* Next Legacy SBCS Charset Map */ - _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */ - *vnext; /* Next Legacy VBCS Charset Map */ - - - /* - * Loop through SBCS charset map cache, free all memory... - */ - - for (cmap = cg->cmap_cache; cmap; cmap = cnext) +#ifdef HAVE_ICONV_H + if (map_from_utf8 != (iconv_t)-1) { - cnext = cmap->next; - - free_sbcs_charmap(cmap); + iconv_close(map_from_utf8); + map_from_utf8 = (iconv_t)-1; } - cg->cmap_cache = NULL; - - /* - * Loop through DBCS/VBCS charset map cache, free all memory... - */ - - for (vmap = cg->vmap_cache; vmap; vmap = vnext) + if (map_to_utf8 != (iconv_t)-1) { - vnext = vmap->next; - - free_vbcs_charmap(vmap); - - free(vmap); + iconv_close(map_to_utf8); + map_to_utf8 = (iconv_t)-1; } - cg->vmap_cache = NULL; + map_encoding = CUPS_AUTO_ENCODING; +#endif /* HAVE_ICONV_H */ } /* - * '_cupsCharmapFree()' - Free a character set map. - * - * This does not actually free; use '_cupsCharmapFlush()' for that. + * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8. */ -void -_cupsCharmapFree( +int /* O - Count or -1 on error */ +cupsCharsetToUTF8( + cups_utf8_t *dest, /* O - Target string */ + const char *src, /* I - Source string */ + const int maxout, /* I - Max output */ const cups_encoding_t encoding) /* I - Encoding */ { - _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ - _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ - _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */ + cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */ +#ifdef HAVE_ICONV_H + size_t srclen, /* Length of source string */ + outBytesLeft; /* Bytes remaining in output buffer */ +#endif /* HAVE_ICONV_H */ /* - * See if we already have this SBCS charset map loaded... + * Check for valid arguments... */ - for (cmap = cg->cmap_cache; cmap; cmap = cmap->next) + DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)", (void *)dest, src, maxout, encoding)); + + if (!dest || !src || maxout < 1) { - if (cmap->encoding == encoding) - { - if (cmap->used > 0) - cmap->used --; + if (dest) + *dest = '\0'; - return; - } + DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1"); + return (-1); } /* - * See if we already have this DBCS/VBCS charset map loaded... + * Handle identity conversions... */ - for (vmap = cg->vmap_cache; vmap; vmap = vmap->next) + if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII || + encoding >= CUPS_ENCODING_VBCS_END) { - if (vmap->encoding == encoding) - { - if (vmap->used > 0) - vmap->used --; - return; - } + strlcpy((char *)dest, src, (size_t)maxout); + return ((int)strlen((char *)dest)); } -} + /* + * Handle ISO-8859-1 to UTF-8 directly... + */ -/* - * '_cupsCharmapGet()' - Get a character set map. - * - * This code handles single-byte (SBCS), double-byte (DBCS), and - * variable-byte (VBCS) character sets _without_ charset escapes... - * This code does not handle multiple-byte character sets (MBCS) - * (such as ISO-2022-JP) with charset switching via escapes... - */ + destptr = dest; -void * /* O - Charset map pointer */ -_cupsCharmapGet( - const cups_encoding_t encoding) /* I - Encoding */ -{ - char filename[1024]; /* Filename for charset map file */ - _cups_globals_t *cg = _cupsGlobals(); /* Global data */ + if (encoding == CUPS_ISO8859_1) + { + int ch; /* Character from string */ + cups_utf8_t *destend; /* End of UTF-8 buffer */ - DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding)); + destend = dest + maxout - 2; - /* - * Check for valid arguments... - */ + while (*src && destptr < destend) + { + ch = *src++ & 255; - if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) - { - DEBUG_puts(" Bad encoding, returning NULL!"); - return (NULL); + if (ch & 128) + { + *destptr++ = (cups_utf8_t)(0xc0 | (ch >> 6)); + *destptr++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); + } + else + *destptr++ = (cups_utf8_t)ch; + } + + *destptr = '\0'; + + return ((int)(destptr - dest)); } /* - * Get the data directory and charset map name... + * Convert input legacy charset to UTF-8... */ - snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt", - cg->cups_datadir, _cupsEncodingName(encoding)); +#ifdef HAVE_ICONV_H + _cupsMutexLock(&map_mutex); - DEBUG_printf((" filename=\"%s\"\n", filename)); + if (map_encoding != encoding) + { + char toset[1024]; /* Destination character set */ - /* - * Read charset map input file into cache... - */ + _cupsCharmapFlush(); - if (encoding < CUPS_ENCODING_SBCS_END) - return (get_sbcs_charmap(encoding, filename)); - else if (encoding < CUPS_ENCODING_VBCS_END) - return (get_vbcs_charmap(encoding, filename)); - else - return (NULL); -} + snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding)); + map_encoding = encoding; + map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8"); + map_to_utf8 = iconv_open("UTF-8", toset); + } -/* - * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8. - * - * This code handles single-byte (SBCS), double-byte (DBCS), and - * variable-byte (VBCS) character sets _without_ charset escapes... - * This code does not handle multiple-byte character sets (MBCS) - * (such as ISO-2022-JP) with charset switching via escapes... - */ + if (map_to_utf8 != (iconv_t)-1) + { + char *altdestptr = (char *)dest; /* Silence bogus GCC type-punned */ -int /* O - Count or -1 on error */ -cupsCharsetToUTF8( - cups_utf8_t *dest, /* O - Target string */ - const char *src, /* I - Source string */ - const int maxout, /* I - Max output */ - const cups_encoding_t encoding) /* I - Encoding */ -{ - /* - * Check for valid arguments... - */ + srclen = strlen(src); + outBytesLeft = (size_t)maxout - 1; - DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n", - dest, src, maxout, encoding)); + iconv(map_to_utf8, (char **)&src, &srclen, &altdestptr, &outBytesLeft); + *altdestptr = '\0'; - if (dest) - *dest = '\0'; + _cupsMutexUnlock(&map_mutex); - if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) - { - DEBUG_puts(" Bad arguments, returning -1"); - return (-1); + return ((int)(altdestptr - (char *)dest)); } - /* - * Handle identity conversions... - */ - - if (encoding == CUPS_UTF8 || - encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) - { - strlcpy((char *)dest, src, maxout); - return (strlen((char *)dest)); - } + _cupsMutexUnlock(&map_mutex); +#endif /* HAVE_ICONV_H */ /* - * Convert input legacy charset to UTF-8... + * No iconv() support, so error out... */ - if (encoding < CUPS_ENCODING_SBCS_END) - return (conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding)); - else if (encoding < CUPS_ENCODING_VBCS_END) - return (conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding)); - else - { - puts(" Bad encoding, returning -1"); - return (-1); - } + *destptr = '\0'; + + return (-1); } /* * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set. - * - * This code handles single-byte (SBCS), double-byte (DBCS), and - * variable-byte (VBCS) character sets _without_ charset escapes... - * This code does not handle multiple-byte character sets (MBCS) - * (such as ISO-2022-JP) with charset switching via escapes... */ int /* O - Count or -1 on error */ @@ -298,11 +196,18 @@ cupsUTF8ToCharset( const int maxout, /* I - Max output */ const cups_encoding_t encoding) /* I - Encoding */ { + char *destptr; /* Pointer into destination */ +#ifdef HAVE_ICONV_H + size_t srclen, /* Length of source string */ + outBytesLeft; /* Bytes remaining in output buffer */ +#endif /* HAVE_ICONV_H */ + + /* * Check for valid arguments... */ - if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) + if (!dest || !src || maxout < 1) { if (dest) *dest = '\0'; @@ -315,22 +220,97 @@ cupsUTF8ToCharset( */ if (encoding == CUPS_UTF8 || - encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END) + encoding >= CUPS_ENCODING_VBCS_END) + { + strlcpy(dest, (char *)src, (size_t)maxout); + return ((int)strlen(dest)); + } + + /* + * Handle UTF-8 to ISO-8859-1 directly... + */ + + destptr = dest; + + if (encoding == CUPS_ISO8859_1 || encoding <= CUPS_US_ASCII) { - strlcpy(dest, (char *)src, maxout); - return (strlen(dest)); + int ch, /* Character from string */ + maxch; /* Maximum character for charset */ + char *destend; /* End of ISO-8859-1 buffer */ + + maxch = encoding == CUPS_ISO8859_1 ? 256 : 128; + destend = dest + maxout - 1; + + while (*src && destptr < destend) + { + ch = *src++; + + if ((ch & 0xe0) == 0xc0) + { + ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f); + + if (ch < maxch) + *destptr++ = (char)ch; + else + *destptr++ = '?'; + } + else if ((ch & 0xf0) == 0xe0 || + (ch & 0xf8) == 0xf0) + *destptr++ = '?'; + else if (!(ch & 0x80)) + *destptr++ = (char)ch; + } + + *destptr = '\0'; + + return ((int)(destptr - dest)); } +#ifdef HAVE_ICONV_H /* * Convert input UTF-8 to legacy charset... */ - if (encoding < CUPS_ENCODING_SBCS_END) - return (conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding)); - else if (encoding < CUPS_ENCODING_VBCS_END) - return (conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding)); - else - return (-1); + _cupsMutexLock(&map_mutex); + + if (map_encoding != encoding) + { + char toset[1024]; /* Destination character set */ + + _cupsCharmapFlush(); + + snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding)); + + map_encoding = encoding; + map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8"); + map_to_utf8 = iconv_open("UTF-8", toset); + } + + if (map_from_utf8 != (iconv_t)-1) + { + char *altsrc = (char *)src; /* Silence bogus GCC type-punned */ + + srclen = strlen((char *)src); + outBytesLeft = (size_t)maxout - 1; + + iconv(map_from_utf8, &altsrc, &srclen, &destptr, &outBytesLeft); + *destptr = '\0'; + + _cupsMutexUnlock(&map_mutex); + + return ((int)(destptr - dest)); + } + + _cupsMutexUnlock(&map_mutex); +#endif /* HAVE_ICONV_H */ + + /* + * No iconv() support, so error out... + */ + + *destptr = '\0'; + + return (-1); } @@ -356,7 +336,6 @@ cupsUTF8ToUTF32( const cups_utf8_t *src, /* I - Source string */ const int maxout) /* I - Max output */ { - size_t srclen; /* Source string length */ int i; /* Looping variable */ cups_utf8_t ch; /* Character value */ cups_utf8_t next; /* Next character value */ @@ -367,19 +346,22 @@ cupsUTF8ToUTF32( * Check for valid arguments and clear output... */ + DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", (void *)dest, src, maxout)); + if (dest) *dest = 0; if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)"); + return (-1); + } /* - * Convert input UTF-8 to output UTF-32 (and insert BOM)... + * Convert input UTF-8 to output UTF-32... */ - *dest++ = 0xfeff; - srclen = strlen((char *)src); - for (i = maxout - 1; *src && i > 0; i --) { ch = *src++; @@ -395,6 +377,9 @@ cupsUTF8ToUTF32( */ *dest++ = ch; + + DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch)); + continue; } else if ((ch & 0xe0) == 0xc0) { @@ -403,19 +388,30 @@ cupsUTF8ToUTF32( */ next = *src++; - if (!next) + if ((next & 0xc0) != 0x80) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } - ch32 = ((ch & 0x1f) << 6) | (next & 0x3f); + ch32 = (cups_utf32_t)((ch & 0x1f) << 6) | (cups_utf32_t)(next & 0x3f); /* * Check for non-shortest form (invalid UTF-8)... */ if (ch32 < 0x80) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } *dest++ = ch32; + + DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X", + src[-2], src[-1], (unsigned)ch32)); } else if ((ch & 0xf0) == 0xe0) { @@ -424,25 +420,40 @@ cupsUTF8ToUTF32( */ next = *src++; - if (!next) + if ((next & 0xc0) != 0x80) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } - ch32 = ((ch & 0x0f) << 6) | (next & 0x3f); + ch32 = (cups_utf32_t)((ch & 0x0f) << 6) | (cups_utf32_t)(next & 0x3f); next = *src++; - if (!next) + if ((next & 0xc0) != 0x80) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } - ch32 = (ch32 << 6) | (next & 0x3f); + ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f); /* * Check for non-shortest form (invalid UTF-8)... */ if (ch32 < 0x800) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } *dest++ = ch32; + + DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X", + src[-3], src[-2], src[-1], (unsigned)ch32)); } else if ((ch & 0xf8) == 0xf0) { @@ -451,31 +462,50 @@ cupsUTF8ToUTF32( */ next = *src++; - if (!next) + if ((next & 0xc0) != 0x80) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } - ch32 = ((ch & 0x07) << 6) | (next & 0x3f); + ch32 = (cups_utf32_t)((ch & 0x07) << 6) | (cups_utf32_t)(next & 0x3f); next = *src++; - if (!next) + if ((next & 0xc0) != 0x80) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } - ch32 = (ch32 << 6) | (next & 0x3f); + ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f); next = *src++; - if (!next) + if ((next & 0xc0) != 0x80) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } - ch32 = (ch32 << 6) | (next & 0x3f); + ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f); /* * Check for non-shortest form (invalid UTF-8)... */ if (ch32 < 0x10000) + { + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); + } *dest++ = ch32; + + DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X", + src[-4], src[-3], src[-2], src[-1], (unsigned)ch32)); } else { @@ -483,6 +513,8 @@ cupsUTF8ToUTF32( * More than 4-octet (invalid UTF-8 sequence)... */ + DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); + return (-1); } @@ -490,13 +522,15 @@ cupsUTF8ToUTF32( * Check for UTF-16 surrogate (illegal UTF-8)... */ - if (*dest >= 0xd800 && *dest <= 0xdfff) + if (ch32 >= 0xd800 && ch32 <= 0xdfff) return (-1); } *dest = 0; - return (i); + DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i)); + + return (maxout - 1 - i); } @@ -532,11 +566,17 @@ cupsUTF32ToUTF8( * Check for valid arguments and clear output... */ + DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", (void *)dest, (void *)src, maxout)); + if (dest) *dest = '\0'; if (!dest || !src || maxout < 1) + { + DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)"); + return (-1); + } /* * Check for leading BOM in UTF-32 and inverted BOM... @@ -545,6 +585,8 @@ cupsUTF32ToUTF8( start = dest; swap = *src == 0xfffe0000; + DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap)); + if (*src == 0xfffe0000 || *src == 0xfeff) src ++; @@ -569,7 +611,11 @@ cupsUTF32ToUTF8( */ if (ch > 0x10ffff) + { + DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)"); + return (-1); + } /* * Convert UTF-32 character to UTF-8 character(s)... @@ -583,6 +629,8 @@ cupsUTF32ToUTF8( *dest++ = (cups_utf8_t)ch; i --; + + DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1])); } else if (ch < 0x800) { @@ -591,11 +639,18 @@ cupsUTF32ToUTF8( */ if (i < 2) + { + DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)"); + return (-1); + } *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f)); *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); i -= 2; + + DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch, + dest[-2], dest[-1])); } else if (ch < 0x10000) { @@ -604,12 +659,19 @@ cupsUTF32ToUTF8( */ if (i < 3) + { + DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)"); + return (-1); + } *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f)); *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f)); *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); i -= 3; + + DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch, + dest[-3], dest[-2], dest[-1])); } else { @@ -618,876 +680,26 @@ cupsUTF32ToUTF8( */ if (i < 4) + { + DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)"); + return (-1); + } *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07)); *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f)); *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f)); *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); i -= 4; - } - } - *dest = '\0'; - - return ((int)(dest - start)); -} - - -/* - * 'compare_wide()' - Compare key for wide (VBCS) match. - */ - -static int -compare_wide(const void *k1, /* I - Key char */ - const void *k2) /* I - Map char */ -{ - cups_vbcs_t key; /* Legacy key character */ - cups_vbcs_t map; /* Legacy map character */ - - - key = *((cups_vbcs_t *)k1); - map = ((_cups_wide2uni_t *)k2)->widechar; - - return ((int)(key - map)); -} - - -/* - * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8. - */ - -static int /* O - Count or -1 on error */ -conv_sbcs_to_utf8( - cups_utf8_t *dest, /* O - Target string */ - const cups_sbcs_t *src, /* I - Source string */ - int maxout, /* I - Max output */ - const cups_encoding_t encoding) /* I - Encoding */ -{ - _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ - cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ - cups_sbcs_t legchar; /* Legacy character value */ - cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */ - *workptr; /* Pointer into string */ - - - /* - * Find legacy charset map in cache... - */ - - if ((cmap = (_cups_cmap_t *)_cupsCharmapGet(encoding)) == NULL) - return (-1); - - /* - * Convert input legacy charset to internal UCS-4 (and insert BOM)... - */ - - work[0] = 0xfeff; - for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);) - { - legchar = *src++; - - /* - * Convert ASCII verbatim (optimization)... - */ - - if (legchar < 0x80) - *workptr++ = (cups_utf32_t)legchar; - else - { - /* - * Convert unknown character to Replacement Character... - */ - - crow = cmap->char2uni + legchar; - - if (!*crow) - *workptr++ = 0xfffd; - else - *workptr++ = (cups_utf32_t)*crow; + DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x", + (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1])); } } - *workptr = 0; - - /* - * Convert internal UCS-4 to output UTF-8 (and delete BOM)... - */ + *dest = '\0'; - _cupsCharmapFree(encoding); + DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start))); - return (cupsUTF32ToUTF8(dest, work, maxout)); + return ((int)(dest - start)); } - - -/* - * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS. - */ - -static int /* O - Count or -1 on error */ -conv_utf8_to_sbcs( - cups_sbcs_t *dest, /* O - Target string */ - const cups_utf8_t *src, /* I - Source string */ - int maxout, /* I - Max output */ - const cups_encoding_t encoding) /* I - Encoding */ -{ - cups_sbcs_t *start; /* Start of destination string */ - _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ - cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ - cups_utf32_t unichar; /* Character value */ - cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */ - *workptr; /* Pointer into string */ - - - /* - * Find legacy charset map in cache... - */ - - if ((cmap = (_cups_cmap_t *) _cupsCharmapGet(encoding)) == NULL) - return (-1); - - /* - * Convert input UTF-8 to internal UCS-4 (and insert BOM)... - */ - - if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0) - return (-1); - - /* - * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)... - */ - - for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --) - { - unichar = *workptr++; - if (!unichar) - break; - - /* - * Convert ASCII verbatim (optimization)... - */ - - if (unichar < 0x80) - { - *dest++ = (cups_sbcs_t)unichar; - continue; - } - - /* - * Convert unknown character to visible replacement... - */ - - srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)]; - - if (srow) - srow += (int)(unichar & 0xff); - - if (!srow || !*srow) - *dest++ = '?'; - else - *dest++ = *srow; - } - - *dest = '\0'; - - _cupsCharmapFree(encoding); - - return ((int)(dest - start)); -} - - -/* - * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS. - */ - -static int /* O - Count or -1 on error */ -conv_utf8_to_vbcs( - cups_sbcs_t *dest, /* O - Target string */ - const cups_utf8_t *src, /* I - Source string */ - int maxout, /* I - Max output */ - const cups_encoding_t encoding) /* I - Encoding */ -{ - cups_sbcs_t *start; /* Start of destination string */ - _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */ - cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ - cups_utf32_t unichar; /* Character value */ - cups_vbcs_t legchar; /* Legacy character value */ - cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */ - *workptr; /* Pointer into string */ - - - /* - * Find legacy charset map in cache... - */ - - if ((vmap = (_cups_vmap_t *)_cupsCharmapGet(encoding)) == NULL) - return (-1); - - /* - * Convert input UTF-8 to internal UCS-4 (and insert BOM)... - */ - - if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0) - return (-1); - - /* - * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)... - */ - - for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --) - { - unichar = *workptr++; - if (!unichar) - break; - - /* - * Convert ASCII verbatim (optimization)... - */ - - if (unichar < 0x80) - { - *dest++ = (cups_vbcs_t)unichar; - continue; - } - - /* - * Convert unknown character to visible replacement... - */ - - vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)]; - - if (vrow) - vrow += (int)(unichar & 0xff); - - if (!vrow || !*vrow) - legchar = (cups_vbcs_t)'?'; - else - legchar = (cups_vbcs_t)*vrow; - - /* - * Save n-byte legacy character... - */ - - if (legchar > 0xffffff) - { - if (maxout < 5) - return (-1); - - *dest++ = (cups_sbcs_t)(legchar >> 24); - *dest++ = (cups_sbcs_t)(legchar >> 16); - *dest++ = (cups_sbcs_t)(legchar >> 8); - *dest++ = (cups_sbcs_t)legchar; - - maxout -= 3; - } - else if (legchar > 0xffff) - { - if (maxout < 4) - return (-1); - - *dest++ = (cups_sbcs_t)(legchar >> 16); - *dest++ = (cups_sbcs_t)(legchar >> 8); - *dest++ = (cups_sbcs_t)legchar; - - maxout -= 2; - } - else if (legchar > 0xff) - { - *dest++ = (cups_sbcs_t)(legchar >> 8); - *dest++ = (cups_sbcs_t)legchar; - - maxout --; - } - } - - *dest = '\0'; - - _cupsCharmapFree(encoding); - - return ((int)(dest - start)); -} - - -/* - * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8. - */ - -static int /* O - Count or -1 on error */ -conv_vbcs_to_utf8( - cups_utf8_t *dest, /* O - Target string */ - const cups_sbcs_t *src, /* I - Source string */ - int maxout, /* I - Max output */ - const cups_encoding_t encoding) /* I - Encoding */ -{ - _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ - cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ - _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */ - cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */ - cups_vbcs_t legchar; /* Legacy character value */ - cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */ - *workptr; /* Pointer into string */ - - - /* - * Find legacy charset map in cache... - */ - - if ((vmap = (_cups_vmap_t *)_cupsCharmapGet(encoding)) == NULL) - return (-1); - - /* - * Convert input legacy charset to internal UCS-4 (and insert BOM)... - */ - - work[0] = 0xfeff; - for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);) - { - legchar = *src++; - leadchar = (cups_sbcs_t)legchar; - - /* - * Convert ASCII verbatim (optimization)... - */ - - if (legchar < 0x80) - { - *workptr++ = (cups_utf32_t)legchar; - continue; - } - - /* - * Convert 2-byte legacy character... - */ - - if (vmap->lead2char[(int)leadchar] == leadchar) - { - if (!*src) - return (-1); - - legchar = (legchar << 8) | *src++; - - /* - * Convert unknown character to Replacement Character... - */ - - crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)]; - if (crow) - crow += (int) (legchar & 0xff); - - if (!crow || !*crow) - *workptr++ = 0xfffd; - else - *workptr++ = (cups_utf32_t)*crow; - continue; - } - - /* - * Fetch 3-byte or 4-byte legacy character... - */ - - if (vmap->lead3char[(int)leadchar] == leadchar) - { - if (!*src || !src[1]) - return (-1); - - legchar = (legchar << 8) | *src++; - legchar = (legchar << 8) | *src++; - } - else if (vmap->lead4char[(int)leadchar] == leadchar) - { - if (!*src || !src[1] || !src[2]) - return (-1); - - legchar = (legchar << 8) | *src++; - legchar = (legchar << 8) | *src++; - legchar = (legchar << 8) | *src++; - } - else - return (-1); - - /* - * Find 3-byte or 4-byte legacy character... - */ - - wide2uni = (_cups_wide2uni_t *)bsearch(&legchar, - vmap->wide2uni, - vmap->widecount, - sizeof(_cups_wide2uni_t), - compare_wide); - - /* - * Convert unknown character to Replacement Character... - */ - - if (!wide2uni || !wide2uni->unichar) - *workptr++ = 0xfffd; - else - *workptr++ = wide2uni->unichar; - } - - *workptr = 0; - - _cupsCharmapFree(encoding); - - /* - * Convert internal UCS-4 to output UTF-8 (and delete BOM)... - */ - - return (cupsUTF32ToUTF8(dest, work, maxout)); -} - - -/* - * 'free_sbcs_charmap()' - Free memory used by a single byte character set. - */ - -static void -free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */ -{ - int i; /* Looping variable */ - - - for (i = 0; i < 256; i ++) - if (cmap->uni2char[i]) - free(cmap->uni2char[i]); - - free(cmap); -} - - -/* - * 'free_vbcs_charmap()' - Free memory used by a variable byte character set. - */ - -static void -free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */ -{ - int i; /* Looping variable */ - - - for (i = 0; i < 256; i ++) - if (vmap->char2uni[i]) - free(vmap->char2uni[i]); - - for (i = 0; i < 256; i ++) - if (vmap->uni2char[i]) - free(vmap->uni2char[i]); - - if (vmap->wide2uni) - free(vmap->wide2uni); - - free(vmap); -} - - -/* - * 'get_charmap_count()' - Count lines in a charmap file. - */ - -static int /* O - Count or -1 on error */ -get_charmap_count(cups_file_t *fp) /* I - File to read from */ -{ - int count; /* Number of lines */ - char line[256]; /* Line from input map file */ - - - /* - * Count lines in map input file... - */ - - count = 0; - - while (cupsFileGets(fp, line, sizeof(line))) - if (line[0] == '0') - count ++; - - /* - * Return the number of lines... - */ - - if (count > 0) - return (count); - else - return (-1); -} - - -/* - * 'get_sbcs_charmap()' - Get SBCS Charmap. - */ - -static _cups_cmap_t * /* O - Charmap or 0 on error */ -get_sbcs_charmap( - const cups_encoding_t encoding, /* I - Charmap Encoding */ - const char *filename) /* I - Charmap Filename */ -{ - unsigned long legchar; /* Legacy character value */ - cups_utf32_t unichar; /* Unicode character value */ - _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */ - cups_file_t *fp; /* Charset map file pointer */ - char *s; /* Line parsing pointer */ - cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ - cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */ - char line[256]; /* Line from charset map file */ - _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */ - - - /* - * See if we already have this SBCS charset map loaded... - */ - - for (cmap = cg->cmap_cache; cmap; cmap = cmap->next) - { - if (cmap->encoding == encoding) - { - cmap->used ++; - DEBUG_printf((" returning existing cmap=%p\n", cmap)); - return ((void *)cmap); - } - } - - /* - * Open SBCS charset map input file... - */ - - if ((fp = cupsFileOpen(filename, "r")) == NULL) - return (NULL); - - /* - * Allocate memory for SBCS charset map... - */ - - if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL) - { - cupsFileClose(fp); - DEBUG_puts(" Unable to allocate memory!"); - return (NULL); - } - - cmap->used ++; - cmap->encoding = encoding; - - /* - * Save SBCS charset map into memory for transcoding... - */ - - while (cupsFileGets(fp, line, sizeof(line))) - { - if (line[0] != '0') - continue; - - legchar = strtol(line, &s, 16); - if (legchar < 0 || legchar > 0xff) - goto sbcs_error; - - unichar = strtol(s, NULL, 16); - if (unichar < 0 || unichar > 0xffff) - goto sbcs_error; - - /* - * Save legacy to Unicode mapping in direct lookup table... - */ - - crow = cmap->char2uni + legchar; - *crow = (cups_ucs2_t)(unichar & 0xffff); - - /* - * Save Unicode to legacy mapping in indirect lookup table... - */ - - srow = cmap->uni2char[(unichar >> 8) & 0xff]; - if (!srow) - { - srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t)); - if (!srow) - goto sbcs_error; - - cmap->uni2char[(unichar >> 8) & 0xff] = srow; - } - - srow += unichar & 0xff; - - /* - * Convert Replacement Character to visible replacement... - */ - - if (unichar == 0xfffd) - legchar = (unsigned long)'?'; - - /* - * First (oldest) legacy character uses Unicode mapping cell... - */ - - if (!*srow) - *srow = (cups_sbcs_t)legchar; - } - - cupsFileClose(fp); - - /* - * Add it to the cache and return... - */ - - cmap->next = cg->cmap_cache; - cg->cmap_cache = cmap; - - DEBUG_printf((" returning new cmap=%p\n", cmap)); - - return (cmap); - - /* - * If we get here, there was an error in the cmap file... - */ - - sbcs_error: - - free_sbcs_charmap(cmap); - - cupsFileClose(fp); - - DEBUG_puts(" Error, returning NULL!"); - - return (NULL); -} - - -/* - * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap. - */ - -static _cups_vmap_t * /* O - Charmap or 0 on error */ -get_vbcs_charmap( - const cups_encoding_t encoding, /* I - Charmap Encoding */ - const char *filename) /* I - Charmap Filename */ -{ - _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */ - cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */ - cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */ - _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */ - cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */ - unsigned long legchar; /* Legacy character value */ - cups_utf32_t unichar; /* Unicode character value */ - int mapcount; /* Count of lines in charmap file */ - cups_file_t *fp; /* Charset map file pointer */ - char *s; /* Line parsing pointer */ - char line[256]; /* Line from charset map file */ - int i; /* Loop variable */ - int wide; /* 32-bit legacy char */ - _cups_globals_t *cg = _cupsGlobals(); /* Pointer to library globals */ - - - DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n", - encoding, filename)); - - /* - * See if we already have this DBCS/VBCS charset map loaded... - */ - - for (vmap = cg->vmap_cache; vmap; vmap = vmap->next) - { - if (vmap->encoding == encoding) - { - vmap->used ++; - DEBUG_printf((" returning existing vmap=%p\n", vmap)); - return ((void *)vmap); - } - } - - /* - * Open VBCS charset map input file... - */ - - if ((fp = cupsFileOpen(filename, "r")) == NULL) - { - DEBUG_printf((" Unable to open file: %s\n", strerror(errno))); - return (NULL); - } - - /* - * Count lines in charmap file... - */ - - if ((mapcount = get_charmap_count(fp)) <= 0) - { - DEBUG_puts(" Unable to get charmap count!"); - return (NULL); - } - - DEBUG_printf((" mapcount=%d\n", mapcount)); - - /* - * Allocate memory for DBCS/VBCS charset map... - */ - - if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL) - { - cupsFileClose(fp); - DEBUG_puts(" Unable to allocate memory!"); - return (NULL); - } - - vmap->used ++; - vmap->encoding = encoding; - - /* - * Save DBCS/VBCS charset map into memory for transcoding... - */ - - leadchar = 0; - wide2uni = NULL; - - cupsFileRewind(fp); - - i = 0; - wide = 0; - - while (cupsFileGets(fp, line, sizeof(line))) - { - if (line[0] != '0') - continue; - - legchar = strtoul(line, &s, 16); - if (legchar == ULONG_MAX) - goto vbcs_error; - - unichar = strtol(s, NULL, 16); - if (unichar < 0 || unichar > 0xffff) - goto vbcs_error; - - i ++; - -/* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i, - legchar, (unsigned)unichar)); */ - - /* - * Save lead char of 2/3/4-byte legacy char... - */ - - if (legchar > 0xff && legchar <= 0xffff) - { - leadchar = (cups_sbcs_t)(legchar >> 8); - vmap->lead2char[leadchar] = leadchar; - } - - if (legchar > 0xffff && legchar <= 0xffffff) - { - leadchar = (cups_sbcs_t)(legchar >> 16); - vmap->lead3char[leadchar] = leadchar; - } - - if (legchar > 0xffffff) - { - leadchar = (cups_sbcs_t)(legchar >> 24); - vmap->lead4char[leadchar] = leadchar; - } - - /* - * Save Legacy to Unicode mapping... - */ - - if (legchar <= 0xffff) - { - /* - * Save DBCS 16-bit to Unicode mapping in indirect lookup table... - */ - - crow = vmap->char2uni[(int)leadchar]; - if (!crow) - { - crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t)); - if (!crow) - goto vbcs_error; - - vmap->char2uni[(int)leadchar] = crow; - } - - crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar; - } - else - { - /* - * Save VBCS 32-bit to Unicode mapping in sorted list table... - */ - - if (!wide) - { - wide = 1; - vmap->widecount = (mapcount - i + 1); - wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount, - sizeof(_cups_wide2uni_t)); - if (!wide2uni) - goto vbcs_error; - - vmap->wide2uni = wide2uni; - } - - wide2uni->widechar = (cups_vbcs_t)legchar; - wide2uni->unichar = (cups_ucs2_t)unichar; - wide2uni ++; - } - - /* - * Save Unicode to legacy mapping in indirect lookup table... - */ - - vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)]; - if (!vrow) - { - vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t)); - if (!vrow) - goto vbcs_error; - - vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow; - } - - vrow += (int)(unichar & 0xff); - - /* - * Convert Replacement Character to visible replacement... - */ - - if (unichar == 0xfffd) - legchar = (unsigned long)'?'; - - /* - * First (oldest) legacy character uses Unicode mapping cell... - */ - - if (!*vrow) - *vrow = (cups_vbcs_t)legchar; - } - - vmap->charcount = (i - vmap->widecount); - - cupsFileClose(fp); - - /* - * Add it to the cache and return... - */ - - vmap->next = cg->vmap_cache; - cg->vmap_cache = vmap; - - DEBUG_printf((" returning new vmap=%p\n", vmap)); - - return (vmap); - - /* - * If we get here, the file contains errors... - */ - - vbcs_error: - - free_vbcs_charmap(vmap); - - cupsFileClose(fp); - - DEBUG_puts(" Error, returning NULL!"); - - return (NULL); -} - - -/* - * End of "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $" - */