]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/transcode.c
Merge changes from CUPS 1.5svn-r9229.
[thirdparty/cups.git] / cups / transcode.c
CommitLineData
ef416fc2 1/*
75bd9771 2 * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
ef416fc2 3 *
71e16022 4 * Transcoding support for CUPS.
ef416fc2 5 *
71e16022 6 * Copyright 2007-2010 by Apple Inc.
b86bc4cf 7 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 8 *
bc44d920 9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
ef416fc2 14 *
bc44d920 15 * This file is subject to the Apple OS-Developed Software exception.
ef416fc2 16 *
17 * Contents:
18 *
fa73b229 19 * _cupsCharmapFlush() - Flush all character set maps out of cache.
ef416fc2 20 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
e1d6a774 21 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
ef416fc2 22 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
23 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
ef416fc2 24 */
25
26/*
27 * Include necessary headers...
28 */
29
71e16022 30#include "cups-private.h"
e53920b9 31#include <limits.h>
ef416fc2 32#include <time.h>
cc754834
MS
33#ifdef HAVE_ICONV_H
34# include <iconv.h>
35#endif /* HAVE_ICONV_H */
ef416fc2 36
37
d6ae789d 38/*
39 * Local globals...
40 */
41
cc754834 42#ifdef HAVE_ICONV_H
6d2f911b 43static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER;
d6ae789d 44 /* Mutex to control access to maps */
cc754834
MS
45static iconv_t map_from_utf8 = (iconv_t)-1;
46 /* Convert from UTF-8 to charset */
47static iconv_t map_to_utf8 = (iconv_t)-1;
48 /* Convert from charset to UTF-8 */
49static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING;
50 /* Which charset is cached */
51#endif /* HAVE_ICONV_H */
e1d6a774 52
ef416fc2 53
54/*
e1d6a774 55 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
ef416fc2 56 */
57
e1d6a774 58void
d6ae789d 59_cupsCharmapFlush(void)
ef416fc2 60{
cc754834
MS
61#ifdef HAVE_ICONV_H
62 if (map_from_utf8 != (iconv_t)-1)
ef416fc2 63 {
cc754834
MS
64 iconv_close(map_from_utf8);
65 map_from_utf8 = (iconv_t)-1;
ef416fc2 66 }
d6ae789d 67
cc754834 68 if (map_to_utf8 != (iconv_t)-1)
ef416fc2 69 {
cc754834
MS
70 iconv_close(map_to_utf8);
71 map_to_utf8 = (iconv_t)-1;
ef416fc2 72 }
ef416fc2 73
cc754834
MS
74 map_encoding = CUPS_AUTO_ENCODING;
75#endif /* HAVE_ICONV_H */
ef416fc2 76}
77
e1d6a774 78
ef416fc2 79/*
e1d6a774 80 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
ef416fc2 81 */
e1d6a774 82
83int /* O - Count or -1 on error */
84cupsCharsetToUTF8(
cc754834
MS
85 cups_utf8_t *dest, /* O - Target string */
86 const char *src, /* I - Source string */
87 const int maxout, /* I - Max output */
e1d6a774 88 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 89{
cc754834 90 cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
cc754834
MS
91 size_t srclen, /* Length of source string */
92 outBytesLeft; /* Bytes remaining in output buffer */
d6ae789d 93
94
ef416fc2 95 /*
96 * Check for valid arguments...
97 */
98
f11a948a 99 DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
e1d6a774 100 dest, src, maxout, encoding));
101
cc754834 102 if (!dest || !src || maxout < 1)
e1d6a774 103 {
cc754834
MS
104 if (dest)
105 *dest = '\0';
106
f11a948a 107 DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
ef416fc2 108 return (-1);
e1d6a774 109 }
ef416fc2 110
111 /*
112 * Handle identity conversions...
113 */
114
cc754834
MS
115 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
116 encoding >= CUPS_ENCODING_VBCS_END)
ef416fc2 117 {
e1d6a774 118 strlcpy((char *)dest, src, maxout);
b86bc4cf 119 return ((int)strlen((char *)dest));
ef416fc2 120 }
121
411affcf 122 /*
123 * Handle ISO-8859-1 to UTF-8 directly...
124 */
125
cc754834
MS
126 destptr = dest;
127
411affcf 128 if (encoding == CUPS_ISO8859_1)
129 {
130 int ch; /* Character from string */
cc754834 131 cups_utf8_t *destend; /* End of UTF-8 buffer */
411affcf 132
133
411affcf 134 destend = dest + maxout - 2;
135
136 while (*src && destptr < destend)
137 {
138 ch = *src++ & 255;
139
140 if (ch & 128)
141 {
142 *destptr++ = 0xc0 | (ch >> 6);
143 *destptr++ = 0x80 | (ch & 0x3f);
144 }
145 else
146 *destptr++ = ch;
147 }
148
149 *destptr = '\0';
150
b86bc4cf 151 return ((int)(destptr - dest));
411affcf 152 }
153
ef416fc2 154 /*
e1d6a774 155 * Convert input legacy charset to UTF-8...
ef416fc2 156 */
e1d6a774 157
cc754834 158#ifdef HAVE_ICONV_H
6d2f911b 159 _cupsMutexLock(&map_mutex);
d6ae789d 160
cc754834
MS
161 if (map_encoding != encoding)
162 {
163 _cupsCharmapFlush();
164
165 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
166 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
167 map_encoding = encoding;
168 }
169
170 if (map_to_utf8 != (iconv_t)-1)
171 {
172 srclen = strlen(src);
173 outBytesLeft = maxout - 1;
4220952d
MS
174
175 iconv(map_to_utf8, (char **)&src, &srclen, (char **)&destptr,
176 &outBytesLeft);
177 *destptr = '\0';
cc754834
MS
178
179 _cupsMutexUnlock(&map_mutex);
180
181 return ((int)(destptr - dest));
182 }
d6ae789d 183
6d2f911b 184 _cupsMutexUnlock(&map_mutex);
cc754834 185#endif /* HAVE_ICONV_H */
d6ae789d 186
cc754834
MS
187 /*
188 * No iconv() support, so error out...
189 */
190
191 *destptr = '\0';
192
193 return (-1);
ef416fc2 194}
195
e1d6a774 196
ef416fc2 197/*
e1d6a774 198 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
ef416fc2 199 */
e1d6a774 200
201int /* O - Count or -1 on error */
202cupsUTF8ToCharset(
203 char *dest, /* O - Target string */
204 const cups_utf8_t *src, /* I - Source string */
205 const int maxout, /* I - Max output */
206 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 207{
cc754834 208 char *destptr; /* Pointer into destination */
cc754834
MS
209 size_t srclen, /* Length of source string */
210 outBytesLeft; /* Bytes remaining in output buffer */
d6ae789d 211
212
ef416fc2 213 /*
214 * Check for valid arguments...
215 */
216
cc754834 217 if (!dest || !src || maxout < 1)
e1d6a774 218 {
219 if (dest)
220 *dest = '\0';
221
ef416fc2 222 return (-1);
e1d6a774 223 }
ef416fc2 224
225 /*
226 * Handle identity conversions...
227 */
228
cc754834
MS
229 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
230 encoding >= CUPS_ENCODING_VBCS_END)
ef416fc2 231 {
e1d6a774 232 strlcpy(dest, (char *)src, maxout);
b86bc4cf 233 return ((int)strlen(dest));
ef416fc2 234 }
235
411affcf 236 /*
237 * Handle UTF-8 to ISO-8859-1 directly...
238 */
239
cc754834
MS
240 destptr = dest;
241
411affcf 242 if (encoding == CUPS_ISO8859_1)
243 {
244 int ch; /* Character from string */
cc754834 245 char *destend; /* End of ISO-8859-1 buffer */
411affcf 246
247
411affcf 248 destend = dest + maxout - 1;
249
250 while (*src && destptr < destend)
251 {
252 ch = *src++;
253
254 if ((ch & 0xe0) == 0xc0)
255 {
256 ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
257
258 if (ch < 256)
259 *destptr++ = ch;
260 else
261 *destptr++ = '?';
262 }
263 else if ((ch & 0xf0) == 0xe0 ||
264 (ch & 0xf8) == 0xf0)
265 *destptr++ = '?';
266 else if (!(ch & 0x80))
267 *destptr++ = ch;
268 }
269
270 *destptr = '\0';
271
b86bc4cf 272 return ((int)(destptr - dest));
411affcf 273 }
274
cc754834 275#ifdef HAVE_ICONV_H
ef416fc2 276 /*
e1d6a774 277 * Convert input UTF-8 to legacy charset...
ef416fc2 278 */
e1d6a774 279
6d2f911b 280 _cupsMutexLock(&map_mutex);
d6ae789d 281
cc754834
MS
282 if (map_encoding != encoding)
283 {
284 _cupsCharmapFlush();
285
286 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
287 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
288 map_encoding = encoding;
289 }
290
291 if (map_from_utf8 != (iconv_t)-1)
292 {
293 srclen = strlen((char *)src);
294 outBytesLeft = maxout - 1;
4220952d
MS
295
296 iconv(map_from_utf8, (char **)&src, &srclen, &destptr, &outBytesLeft);
297 *destptr = '\0';
cc754834
MS
298
299 _cupsMutexUnlock(&map_mutex);
300
301 return ((int)(destptr - dest));
302 }
d6ae789d 303
6d2f911b 304 _cupsMutexUnlock(&map_mutex);
cc754834
MS
305#endif /* HAVE_ICONV_H */
306
307 /*
308 * No iconv() support, so error out...
309 */
310
311 *destptr = '\0';
d6ae789d 312
cc754834 313 return (-1);
ef416fc2 314}
315
ef416fc2 316
317/*
318 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
319 *
320 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
321 *
322 * UTF-32 char UTF-8 char(s)
323 * --------------------------------------------------
e1d6a774 324 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 325 * 128 to 2047 = 110xxxxx 10yyyyyy
326 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 327 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 328 *
329 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
330 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 331 */
e1d6a774 332
333int /* O - Count or -1 on error */
334cupsUTF8ToUTF32(
335 cups_utf32_t *dest, /* O - Target string */
336 const cups_utf8_t *src, /* I - Source string */
337 const int maxout) /* I - Max output */
ef416fc2 338{
e1d6a774 339 int i; /* Looping variable */
340 cups_utf8_t ch; /* Character value */
341 cups_utf8_t next; /* Next character value */
342 cups_utf32_t ch32; /* UTF-32 character value */
343
ef416fc2 344
345 /*
346 * Check for valid arguments and clear output...
347 */
e1d6a774 348
e07d4801
MS
349 DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
350 src, maxout));
c9fc04c6 351
e1d6a774 352 if (dest)
353 *dest = 0;
354
355 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
c9fc04c6 356 {
e07d4801 357 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
c9fc04c6 358
ef416fc2 359 return (-1);
c9fc04c6 360 }
ef416fc2 361
362 /*
cda47a96 363 * Convert input UTF-8 to output UTF-32...
ef416fc2 364 */
e1d6a774 365
e1d6a774 366 for (i = maxout - 1; *src && i > 0; i --)
ef416fc2 367 {
e1d6a774 368 ch = *src++;
ef416fc2 369
370 /*
371 * Convert UTF-8 character(s) to UTF-32 character...
372 */
e1d6a774 373
374 if (!(ch & 0x80))
ef416fc2 375 {
376 /*
377 * One-octet UTF-8 <= 127 (US-ASCII)...
378 */
e1d6a774 379
380 *dest++ = ch;
c9fc04c6 381
e07d4801 382 DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
2abf387c 383 continue;
ef416fc2 384 }
385 else if ((ch & 0xe0) == 0xc0)
386 {
387 /*
388 * Two-octet UTF-8 <= 2047 (Latin-x)...
389 */
e1d6a774 390
391 next = *src++;
c9fc04c6
MS
392 if ((next & 0xc0) != 0x80)
393 {
e07d4801 394 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 395
ef416fc2 396 return (-1);
c9fc04c6 397 }
e1d6a774 398
ef416fc2 399 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
400
401 /*
402 * Check for non-shortest form (invalid UTF-8)...
403 */
e1d6a774 404
405 if (ch32 < 0x80)
c9fc04c6 406 {
e07d4801 407 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 408
ef416fc2 409 return (-1);
c9fc04c6 410 }
e1d6a774 411
412 *dest++ = ch32;
c9fc04c6 413
e07d4801 414 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
c9fc04c6 415 src[-2], src[-1], (unsigned)ch32));
ef416fc2 416 }
417 else if ((ch & 0xf0) == 0xe0)
418 {
419 /*
420 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
421 */
e1d6a774 422
423 next = *src++;
c9fc04c6
MS
424 if ((next & 0xc0) != 0x80)
425 {
e07d4801 426 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 427
ef416fc2 428 return (-1);
c9fc04c6 429 }
e1d6a774 430
431 ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
432
433 next = *src++;
c9fc04c6
MS
434 if ((next & 0xc0) != 0x80)
435 {
e07d4801 436 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 437
ef416fc2 438 return (-1);
c9fc04c6 439 }
e1d6a774 440
441 ch32 = (ch32 << 6) | (next & 0x3f);
ef416fc2 442
443 /*
444 * Check for non-shortest form (invalid UTF-8)...
445 */
e1d6a774 446
447 if (ch32 < 0x800)
c9fc04c6 448 {
e07d4801 449 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 450
ef416fc2 451 return (-1);
c9fc04c6 452 }
e1d6a774 453
454 *dest++ = ch32;
c9fc04c6 455
e07d4801 456 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
c9fc04c6 457 src[-3], src[-2], src[-1], (unsigned)ch32));
ef416fc2 458 }
459 else if ((ch & 0xf8) == 0xf0)
460 {
461 /*
e1d6a774 462 * Four-octet UTF-8...
ef416fc2 463 */
e1d6a774 464
465 next = *src++;
c9fc04c6
MS
466 if ((next & 0xc0) != 0x80)
467 {
e07d4801 468 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 469
ef416fc2 470 return (-1);
c9fc04c6 471 }
e1d6a774 472
473 ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
474
475 next = *src++;
c9fc04c6
MS
476 if ((next & 0xc0) != 0x80)
477 {
e07d4801 478 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 479
e1d6a774 480 return (-1);
c9fc04c6 481 }
e1d6a774 482
483 ch32 = (ch32 << 6) | (next & 0x3f);
484
485 next = *src++;
c9fc04c6
MS
486 if ((next & 0xc0) != 0x80)
487 {
e07d4801 488 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 489
e1d6a774 490 return (-1);
c9fc04c6 491 }
e1d6a774 492
493 ch32 = (ch32 << 6) | (next & 0x3f);
494
ef416fc2 495 /*
e1d6a774 496 * Check for non-shortest form (invalid UTF-8)...
ef416fc2 497 */
e1d6a774 498
499 if (ch32 < 0x10000)
c9fc04c6 500 {
e07d4801 501 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 502
e1d6a774 503 return (-1);
c9fc04c6 504 }
e1d6a774 505
506 *dest++ = ch32;
c9fc04c6 507
e07d4801 508 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
c9fc04c6 509 src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
ef416fc2 510 }
511 else
512 {
513 /*
e1d6a774 514 * More than 4-octet (invalid UTF-8 sequence)...
ef416fc2 515 */
e1d6a774 516
e07d4801 517 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 518
ef416fc2 519 return (-1);
520 }
521
522 /*
523 * Check for UTF-16 surrogate (illegal UTF-8)...
524 */
ef416fc2 525
2abf387c 526 if (ch32 >= 0xd800 && ch32 <= 0xdfff)
ef416fc2 527 return (-1);
528 }
e1d6a774 529
ef416fc2 530 *dest = 0;
e1d6a774 531
e07d4801 532 DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
c9fc04c6
MS
533
534 return (maxout - 1 - i);
ef416fc2 535}
536
e1d6a774 537
ef416fc2 538/*
539 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
540 *
541 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
542 *
543 * UTF-32 char UTF-8 char(s)
544 * --------------------------------------------------
e1d6a774 545 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 546 * 128 to 2047 = 110xxxxx 10yyyyyy
547 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 548 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 549 *
550 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
551 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 552 */
e1d6a774 553
554int /* O - Count or -1 on error */
555cupsUTF32ToUTF8(
556 cups_utf8_t *dest, /* O - Target string */
557 const cups_utf32_t *src, /* I - Source string */
558 const int maxout) /* I - Max output */
ef416fc2 559{
e1d6a774 560 cups_utf8_t *start; /* Start of destination string */
561 int i; /* Looping variable */
562 int swap; /* Byte-swap input to output */
563 cups_utf32_t ch; /* Character value */
564
ef416fc2 565
566 /*
567 * Check for valid arguments and clear output...
568 */
e1d6a774 569
e07d4801 570 DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
c9fc04c6
MS
571 maxout));
572
e1d6a774 573 if (dest)
574 *dest = '\0';
575
576 if (!dest || !src || maxout < 1)
c9fc04c6 577 {
e07d4801 578 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
c9fc04c6 579
ef416fc2 580 return (-1);
c9fc04c6 581 }
ef416fc2 582
583 /*
584 * Check for leading BOM in UTF-32 and inverted BOM...
585 */
e1d6a774 586
587 start = dest;
588 swap = *src == 0xfffe0000;
589
e07d4801 590 DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
c9fc04c6 591
e1d6a774 592 if (*src == 0xfffe0000 || *src == 0xfeff)
593 src ++;
ef416fc2 594
595 /*
596 * Convert input UTF-32 to output UTF-8...
597 */
e1d6a774 598
599 for (i = maxout - 1; *src && i > 0;)
ef416fc2 600 {
e1d6a774 601 ch = *src++;
ef416fc2 602
603 /*
604 * Byte swap input UTF-32, if necessary...
e1d6a774 605 * (only byte-swapping 24 of 32 bits)
ef416fc2 606 */
e1d6a774 607
ef416fc2 608 if (swap)
609 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
610
611 /*
e1d6a774 612 * Check for beyond Plane 16 (invalid UTF-32)...
ef416fc2 613 */
ef416fc2 614
ef416fc2 615 if (ch > 0x10ffff)
c9fc04c6 616 {
e07d4801 617 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
c9fc04c6 618
ef416fc2 619 return (-1);
c9fc04c6 620 }
ef416fc2 621
ef416fc2 622 /*
623 * Convert UTF-32 character to UTF-8 character(s)...
624 */
e1d6a774 625
626 if (ch < 0x80)
ef416fc2 627 {
628 /*
629 * One-octet UTF-8 <= 127 (US-ASCII)...
630 */
e1d6a774 631
632 *dest++ = (cups_utf8_t)ch;
633 i --;
c9fc04c6 634
e07d4801 635 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
ef416fc2 636 }
e1d6a774 637 else if (ch < 0x800)
ef416fc2 638 {
639 /*
640 * Two-octet UTF-8 <= 2047 (Latin-x)...
641 */
e1d6a774 642
643 if (i < 2)
c9fc04c6 644 {
e07d4801 645 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
c9fc04c6 646
e1d6a774 647 return (-1);
c9fc04c6 648 }
e1d6a774 649
650 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
651 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
652 i -= 2;
c9fc04c6 653
e07d4801 654 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
c9fc04c6 655 dest[-2], dest[-1]));
ef416fc2 656 }
e1d6a774 657 else if (ch < 0x10000)
ef416fc2 658 {
659 /*
660 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
661 */
e1d6a774 662
663 if (i < 3)
c9fc04c6 664 {
e07d4801 665 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
c9fc04c6 666
e1d6a774 667 return (-1);
c9fc04c6 668 }
e1d6a774 669
670 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
671 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
672 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
673 i -= 3;
c9fc04c6 674
e07d4801 675 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
c9fc04c6 676 dest[-3], dest[-2], dest[-1]));
e1d6a774 677 }
678 else
679 {
680 /*
681 * Four-octet UTF-8...
682 */
683
684 if (i < 4)
e07d4801
MS
685 {
686 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
687
e1d6a774 688 return (-1);
e07d4801 689 }
e1d6a774 690
691 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
692 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
693 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
694 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
695 i -= 4;
c9fc04c6 696
e07d4801 697 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
c9fc04c6 698 (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
ef416fc2 699 }
700 }
e1d6a774 701
ef416fc2 702 *dest = '\0';
e1d6a774 703
e07d4801 704 DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
c9fc04c6 705
e1d6a774 706 return ((int)(dest - start));
ef416fc2 707}
708
e1d6a774 709
ef416fc2 710/*
e1d6a774 711 * 'compare_wide()' - Compare key for wide (VBCS) match.
712 */
713
714static int
715compare_wide(const void *k1, /* I - Key char */
716 const void *k2) /* I - Map char */
717{
718 cups_vbcs_t key; /* Legacy key character */
719 cups_vbcs_t map; /* Legacy map character */
720
721
722 key = *((cups_vbcs_t *)k1);
723 map = ((_cups_wide2uni_t *)k2)->widechar;
724
725 return ((int)(key - map));
726}
727
728
729/*
730 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
ef416fc2 731 */
e1d6a774 732
733static int /* O - Count or -1 on error */
734conv_sbcs_to_utf8(
735 cups_utf8_t *dest, /* O - Target string */
736 const cups_sbcs_t *src, /* I - Source string */
737 int maxout, /* I - Max output */
738 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 739{
e1d6a774 740 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
741 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
742 cups_sbcs_t legchar; /* Legacy character value */
743 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
744 *workptr; /* Pointer into string */
745
ef416fc2 746
747 /*
e1d6a774 748 * Find legacy charset map in cache...
ef416fc2 749 */
e1d6a774 750
d6ae789d 751 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
ef416fc2 752 return (-1);
ef416fc2 753
754 /*
e1d6a774 755 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 756 */
ef416fc2 757
e1d6a774 758 work[0] = 0xfeff;
759 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 760 {
e1d6a774 761 legchar = *src++;
ef416fc2 762
763 /*
e1d6a774 764 * Convert ASCII verbatim (optimization)...
ef416fc2 765 */
ef416fc2 766
e1d6a774 767 if (legchar < 0x80)
768 *workptr++ = (cups_utf32_t)legchar;
769 else
ef416fc2 770 {
e1d6a774 771 /*
772 * Convert unknown character to Replacement Character...
773 */
ef416fc2 774
e1d6a774 775 crow = cmap->char2uni + legchar;
776
777 if (!*crow)
778 *workptr++ = 0xfffd;
779 else
780 *workptr++ = (cups_utf32_t)*crow;
ef416fc2 781 }
ef416fc2 782 }
e1d6a774 783
784 *workptr = 0;
785
786 /*
787 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
788 */
789
d6ae789d 790 cmap->used --;
e1d6a774 791
792 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 793}
794
e1d6a774 795
ef416fc2 796/*
e1d6a774 797 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
ef416fc2 798 */
e1d6a774 799
800static int /* O - Count or -1 on error */
801conv_utf8_to_sbcs(
802 cups_sbcs_t *dest, /* O - Target string */
803 const cups_utf8_t *src, /* I - Source string */
804 int maxout, /* I - Max output */
805 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 806{
e1d6a774 807 cups_sbcs_t *start; /* Start of destination string */
808 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
809 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
810 cups_utf32_t unichar; /* Character value */
811 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
812 *workptr; /* Pointer into string */
813
ef416fc2 814
815 /*
e1d6a774 816 * Find legacy charset map in cache...
ef416fc2 817 */
e1d6a774 818
d6ae789d 819 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
ef416fc2 820 return (-1);
ef416fc2 821
822 /*
e1d6a774 823 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 824 */
e1d6a774 825
826 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
827 return (-1);
ef416fc2 828
829 /*
e1d6a774 830 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
ef416fc2 831 */
e1d6a774 832
58dc1933 833 for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
ef416fc2 834 {
e1d6a774 835 unichar = *workptr++;
836 if (!unichar)
ef416fc2 837 break;
ef416fc2 838
839 /*
e1d6a774 840 * Convert ASCII verbatim (optimization)...
ef416fc2 841 */
ef416fc2 842
e1d6a774 843 if (unichar < 0x80)
844 {
845 *dest++ = (cups_sbcs_t)unichar;
846 continue;
847 }
ef416fc2 848
849 /*
e1d6a774 850 * Convert unknown character to visible replacement...
ef416fc2 851 */
ef416fc2 852
e1d6a774 853 srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
ef416fc2 854
e1d6a774 855 if (srow)
856 srow += (int)(unichar & 0xff);
ef416fc2 857
e1d6a774 858 if (!srow || !*srow)
859 *dest++ = '?';
860 else
861 *dest++ = *srow;
ef416fc2 862 }
ef416fc2 863
e1d6a774 864 *dest = '\0';
865
d6ae789d 866 cmap->used --;
e1d6a774 867
868 return ((int)(dest - start));
ef416fc2 869}
870
e1d6a774 871
ef416fc2 872/*
e1d6a774 873 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
ef416fc2 874 */
e1d6a774 875
876static int /* O - Count or -1 on error */
877conv_utf8_to_vbcs(
878 cups_sbcs_t *dest, /* O - Target string */
879 const cups_utf8_t *src, /* I - Source string */
880 int maxout, /* I - Max output */
881 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 882{
e1d6a774 883 cups_sbcs_t *start; /* Start of destination string */
884 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
885 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
886 cups_utf32_t unichar; /* Character value */
887 cups_vbcs_t legchar; /* Legacy character value */
888 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
889 *workptr; /* Pointer into string */
ef416fc2 890
ef416fc2 891
e07d4801
MS
892 DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
893 "encoding=%d)", dest, src, maxout, encoding));
c9fc04c6 894
ef416fc2 895 /*
e1d6a774 896 * Find legacy charset map in cache...
ef416fc2 897 */
ef416fc2 898
d6ae789d 899 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
c9fc04c6 900 {
e07d4801 901 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
c9fc04c6 902
e1d6a774 903 return (-1);
c9fc04c6 904 }
ef416fc2 905
906 /*
e1d6a774 907 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
ef416fc2 908 */
e1d6a774 909
910 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
c9fc04c6 911 {
e07d4801 912 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
c9fc04c6 913
e1d6a774 914 return (-1);
c9fc04c6 915 }
ef416fc2 916
917 /*
e1d6a774 918 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
ef416fc2 919 */
e1d6a774 920
58dc1933 921 for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
ef416fc2 922 {
e1d6a774 923 unichar = *workptr++;
ef416fc2 924
925 /*
e1d6a774 926 * Convert ASCII verbatim (optimization)...
ef416fc2 927 */
e1d6a774 928
929 if (unichar < 0x80)
930 {
b86bc4cf 931 *dest++ = (cups_sbcs_t)unichar;
c9fc04c6 932
e07d4801 933 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
c9fc04c6
MS
934 dest[-1]));
935
e1d6a774 936 continue;
937 }
ef416fc2 938
939 /*
e1d6a774 940 * Convert unknown character to visible replacement...
ef416fc2 941 */
e1d6a774 942
943 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
944
945 if (vrow)
946 vrow += (int)(unichar & 0xff);
947
948 if (!vrow || !*vrow)
949 legchar = (cups_vbcs_t)'?';
950 else
951 legchar = (cups_vbcs_t)*vrow;
ef416fc2 952
953 /*
e1d6a774 954 * Save n-byte legacy character...
ef416fc2 955 */
e1d6a774 956
957 if (legchar > 0xffffff)
ef416fc2 958 {
e1d6a774 959 if (maxout < 5)
c9fc04c6 960 {
e07d4801 961 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
c9fc04c6 962
e1d6a774 963 return (-1);
c9fc04c6 964 }
e1d6a774 965
966 *dest++ = (cups_sbcs_t)(legchar >> 24);
967 *dest++ = (cups_sbcs_t)(legchar >> 16);
968 *dest++ = (cups_sbcs_t)(legchar >> 8);
969 *dest++ = (cups_sbcs_t)legchar;
970
971 maxout -= 3;
c9fc04c6 972
e07d4801 973 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
c9fc04c6 974 (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
ef416fc2 975 }
e1d6a774 976 else if (legchar > 0xffff)
977 {
978 if (maxout < 4)
c9fc04c6 979 {
e07d4801 980 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
c9fc04c6 981
e1d6a774 982 return (-1);
c9fc04c6 983 }
ef416fc2 984
e1d6a774 985 *dest++ = (cups_sbcs_t)(legchar >> 16);
986 *dest++ = (cups_sbcs_t)(legchar >> 8);
987 *dest++ = (cups_sbcs_t)legchar;
ef416fc2 988
e1d6a774 989 maxout -= 2;
c9fc04c6 990
e07d4801 991 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
c9fc04c6 992 (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
e1d6a774 993 }
994 else if (legchar > 0xff)
995 {
996 *dest++ = (cups_sbcs_t)(legchar >> 8);
997 *dest++ = (cups_sbcs_t)legchar;
998
999 maxout --;
c9fc04c6 1000
e07d4801 1001 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
c9fc04c6
MS
1002 (unsigned)unichar, dest[-2], dest[-1]));
1003 }
1004 else
1005 {
536bc2c6 1006 *dest++ = (cups_sbcs_t)legchar;
c9fc04c6 1007
e07d4801 1008 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
c9fc04c6 1009 (unsigned)unichar, dest[-1]));
e1d6a774 1010 }
ef416fc2 1011 }
e1d6a774 1012
1013 *dest = '\0';
1014
d6ae789d 1015 vmap->used --;
e1d6a774 1016
e07d4801 1017 DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
c9fc04c6
MS
1018 (int)(dest - start)));
1019
e1d6a774 1020 return ((int)(dest - start));
ef416fc2 1021}
1022
e1d6a774 1023
ef416fc2 1024/*
e1d6a774 1025 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
ef416fc2 1026 */
e1d6a774 1027
1028static int /* O - Count or -1 on error */
1029conv_vbcs_to_utf8(
1030 cups_utf8_t *dest, /* O - Target string */
1031 const cups_sbcs_t *src, /* I - Source string */
1032 int maxout, /* I - Max output */
1033 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 1034{
e1d6a774 1035 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1036 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1037 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1038 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1039 cups_vbcs_t legchar; /* Legacy character value */
1040 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1041 *workptr; /* Pointer into string */
ef416fc2 1042
ef416fc2 1043
1044 /*
e1d6a774 1045 * Find legacy charset map in cache...
ef416fc2 1046 */
ef416fc2 1047
e07d4801 1048 DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
c9fc04c6
MS
1049 dest, src, maxout, encoding));
1050
d6ae789d 1051 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
c9fc04c6 1052 {
e07d4801 1053 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
c9fc04c6 1054
e1d6a774 1055 return (-1);
c9fc04c6 1056 }
ef416fc2 1057
1058 /*
e1d6a774 1059 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
ef416fc2 1060 */
ef416fc2 1061
e1d6a774 1062 work[0] = 0xfeff;
1063 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
ef416fc2 1064 {
e1d6a774 1065 legchar = *src++;
1066 leadchar = (cups_sbcs_t)legchar;
ef416fc2 1067
1068 /*
e1d6a774 1069 * Convert ASCII verbatim (optimization)...
ef416fc2 1070 */
ef416fc2 1071
e1d6a774 1072 if (legchar < 0x80)
ef416fc2 1073 {
e1d6a774 1074 *workptr++ = (cups_utf32_t)legchar;
c9fc04c6 1075
e07d4801 1076 DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
c9fc04c6 1077 (unsigned)legchar));
e1d6a774 1078 continue;
ef416fc2 1079 }
1080
1081 /*
e1d6a774 1082 * Convert 2-byte legacy character...
ef416fc2 1083 */
e1d6a774 1084
1085 if (vmap->lead2char[(int)leadchar] == leadchar)
ef416fc2 1086 {
e1d6a774 1087 if (!*src)
c9fc04c6 1088 {
e07d4801 1089 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
c9fc04c6 1090
e1d6a774 1091 return (-1);
c9fc04c6 1092 }
e1d6a774 1093
1094 legchar = (legchar << 8) | *src++;
1095
ef416fc2 1096 /*
e1d6a774 1097 * Convert unknown character to Replacement Character...
ef416fc2 1098 */
e1d6a774 1099
1100 crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1101 if (crow)
1102 crow += (int) (legchar & 0xff);
1103
1104 if (!crow || !*crow)
1105 *workptr++ = 0xfffd;
1106 else
1107 *workptr++ = (cups_utf32_t)*crow;
c9fc04c6 1108
e07d4801 1109 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
c9fc04c6 1110 src[-2], src[-1], (unsigned)workptr[-1]));
e1d6a774 1111 continue;
ef416fc2 1112 }
1113
1114 /*
e1d6a774 1115 * Fetch 3-byte or 4-byte legacy character...
ef416fc2 1116 */
e1d6a774 1117
1118 if (vmap->lead3char[(int)leadchar] == leadchar)
ef416fc2 1119 {
e1d6a774 1120 if (!*src || !src[1])
c9fc04c6 1121 {
e07d4801 1122 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
c9fc04c6 1123
e1d6a774 1124 return (-1);
c9fc04c6 1125 }
e1d6a774 1126
1127 legchar = (legchar << 8) | *src++;
1128 legchar = (legchar << 8) | *src++;
ef416fc2 1129 }
e1d6a774 1130 else if (vmap->lead4char[(int)leadchar] == leadchar)
1131 {
1132 if (!*src || !src[1] || !src[2])
c9fc04c6 1133 {
e07d4801 1134 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
c9fc04c6 1135
e1d6a774 1136 return (-1);
c9fc04c6 1137 }
e1d6a774 1138
1139 legchar = (legchar << 8) | *src++;
1140 legchar = (legchar << 8) | *src++;
1141 legchar = (legchar << 8) | *src++;
1142 }
1143 else
c9fc04c6 1144 {
e07d4801 1145 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
c9fc04c6 1146
e1d6a774 1147 return (-1);
c9fc04c6 1148 }
ef416fc2 1149
1150 /*
e1d6a774 1151 * Find 3-byte or 4-byte legacy character...
ef416fc2 1152 */
e1d6a774 1153
1154 wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1155 vmap->wide2uni,
1156 vmap->widecount,
1157 sizeof(_cups_wide2uni_t),
1158 compare_wide);
ef416fc2 1159
1160 /*
e1d6a774 1161 * Convert unknown character to Replacement Character...
ef416fc2 1162 */
e1d6a774 1163
1164 if (!wide2uni || !wide2uni->unichar)
1165 *workptr++ = 0xfffd;
1166 else
1167 *workptr++ = wide2uni->unichar;
c9fc04c6
MS
1168
1169 if (vmap->lead3char[(int)leadchar] == leadchar)
e07d4801 1170 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
c9fc04c6
MS
1171 src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1172 else
e07d4801 1173 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
c9fc04c6 1174 src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
ef416fc2 1175 }
e1d6a774 1176
1177 *workptr = 0;
1178
d6ae789d 1179 vmap->used --;
e1d6a774 1180
e07d4801 1181 DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
c9fc04c6
MS
1182 (int)(workptr - work)));
1183
e1d6a774 1184 /*
1185 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1186 */
1187
1188 return (cupsUTF32ToUTF8(dest, work, maxout));
ef416fc2 1189}
1190
e1d6a774 1191
ef416fc2 1192/*
e1d6a774 1193 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
ef416fc2 1194 */
e1d6a774 1195
1196static void
1197free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
ef416fc2 1198{
e1d6a774 1199 int i; /* Looping variable */
ef416fc2 1200
ef416fc2 1201
e1d6a774 1202 for (i = 0; i < 256; i ++)
1203 if (cmap->uni2char[i])
1204 free(cmap->uni2char[i]);
1205
1206 free(cmap);
1207}
1208
1209
1210/*
1211 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1212 */
1213
1214static void
1215free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
1216{
1217 int i; /* Looping variable */
1218
1219
1220 for (i = 0; i < 256; i ++)
1221 if (vmap->char2uni[i])
1222 free(vmap->char2uni[i]);
1223
1224 for (i = 0; i < 256; i ++)
1225 if (vmap->uni2char[i])
1226 free(vmap->uni2char[i]);
1227
1228 if (vmap->wide2uni)
1229 free(vmap->wide2uni);
1230
1231 free(vmap);
1232}
1233
1234
d6ae789d 1235/*
1236 * 'get_charmap()' - Lookup or get a character set map (private).
1237 *
1238 * This code handles single-byte (SBCS), double-byte (DBCS), and
1239 * variable-byte (VBCS) character sets _without_ charset escapes...
1240 * This code does not handle multiple-byte character sets (MBCS)
1241 * (such as ISO-2022-JP) with charset switching via escapes...
1242 */
1243
1244
d09495fa 1245static void * /* O - Charset map pointer */
d6ae789d 1246get_charmap(
1247 const cups_encoding_t encoding) /* I - Encoding */
1248{
1249 char filename[1024]; /* Filename for charset map file */
1250 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1251
1252
e07d4801 1253 DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
c9fc04c6 1254
d6ae789d 1255 /*
1256 * Get the data directory and charset map name...
1257 */
1258
1259 snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1260 cg->cups_datadir, _cupsEncodingName(encoding));
1261
e07d4801 1262 DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
d6ae789d 1263
1264 /*
1265 * Read charset map input file into cache...
1266 */
1267
1268 if (encoding < CUPS_ENCODING_SBCS_END)
1269 return (get_sbcs_charmap(encoding, filename));
1270 else if (encoding < CUPS_ENCODING_VBCS_END)
1271 return (get_vbcs_charmap(encoding, filename));
1272 else
1273 return (NULL);
1274}
1275
1276
e1d6a774 1277/*
1278 * 'get_charmap_count()' - Count lines in a charmap file.
1279 */
1280
1281static int /* O - Count or -1 on error */
1282get_charmap_count(cups_file_t *fp) /* I - File to read from */
1283{
1284 int count; /* Number of lines */
1285 char line[256]; /* Line from input map file */
ef416fc2 1286
ef416fc2 1287
1288 /*
e1d6a774 1289 * Count lines in map input file...
ef416fc2 1290 */
ef416fc2 1291
e1d6a774 1292 count = 0;
ef416fc2 1293
e1d6a774 1294 while (cupsFileGets(fp, line, sizeof(line)))
1295 if (line[0] == '0')
1296 count ++;
ef416fc2 1297
e1d6a774 1298 /*
1299 * Return the number of lines...
1300 */
1301
1302 if (count > 0)
1303 return (count);
1304 else
1305 return (-1);
ef416fc2 1306}
1307
e1d6a774 1308
ef416fc2 1309/*
e1d6a774 1310 * 'get_sbcs_charmap()' - Get SBCS Charmap.
ef416fc2 1311 */
e1d6a774 1312
1313static _cups_cmap_t * /* O - Charmap or 0 on error */
1314get_sbcs_charmap(
1315 const cups_encoding_t encoding, /* I - Charmap Encoding */
1316 const char *filename) /* I - Charmap Filename */
ef416fc2 1317{
e1d6a774 1318 unsigned long legchar; /* Legacy character value */
1319 cups_utf32_t unichar; /* Unicode character value */
1320 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1321 cups_file_t *fp; /* Charset map file pointer */
1322 char *s; /* Line parsing pointer */
1323 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1324 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1325 char line[256]; /* Line from charset map file */
e1d6a774 1326
ef416fc2 1327
1328 /*
e1d6a774 1329 * See if we already have this SBCS charset map loaded...
ef416fc2 1330 */
e1d6a774 1331
e07d4801 1332 DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
c9fc04c6
MS
1333 filename));
1334
d6ae789d 1335 for (cmap = cmap_cache; cmap; cmap = cmap->next)
e1d6a774 1336 {
1337 if (cmap->encoding == encoding)
1338 {
1339 cmap->used ++;
e07d4801 1340 DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
d6ae789d 1341
e1d6a774 1342 return ((void *)cmap);
1343 }
1344 }
ef416fc2 1345
1346 /*
e1d6a774 1347 * Open SBCS charset map input file...
ef416fc2 1348 */
e1d6a774 1349
1350 if ((fp = cupsFileOpen(filename, "r")) == NULL)
c9fc04c6 1351 {
e07d4801 1352 DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
c9fc04c6 1353
e1d6a774 1354 return (NULL);
c9fc04c6 1355 }
ef416fc2 1356
1357 /*
e1d6a774 1358 * Allocate memory for SBCS charset map...
ef416fc2 1359 */
e1d6a774 1360
1361 if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1362 {
1363 cupsFileClose(fp);
e07d4801 1364 DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
d6ae789d 1365
e1d6a774 1366 return (NULL);
1367 }
1368
1369 cmap->used ++;
1370 cmap->encoding = encoding;
ef416fc2 1371
1372 /*
e1d6a774 1373 * Save SBCS charset map into memory for transcoding...
ef416fc2 1374 */
e1d6a774 1375
1376 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1377 {
e1d6a774 1378 if (line[0] != '0')
1379 continue;
1380
1381 legchar = strtol(line, &s, 16);
1382 if (legchar < 0 || legchar > 0xff)
1383 goto sbcs_error;
1384
1385 unichar = strtol(s, NULL, 16);
bf3816c7 1386 if (unichar < 0 || unichar > 0x10ffff)
e1d6a774 1387 goto sbcs_error;
ef416fc2 1388
1389 /*
e1d6a774 1390 * Save legacy to Unicode mapping in direct lookup table...
ef416fc2 1391 */
e1d6a774 1392
1393 crow = cmap->char2uni + legchar;
1394 *crow = (cups_ucs2_t)(unichar & 0xffff);
ef416fc2 1395
1396 /*
e1d6a774 1397 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1398 */
e1d6a774 1399
1400 srow = cmap->uni2char[(unichar >> 8) & 0xff];
1401 if (!srow)
ef416fc2 1402 {
e1d6a774 1403 srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1404 if (!srow)
1405 goto sbcs_error;
1406
1407 cmap->uni2char[(unichar >> 8) & 0xff] = srow;
ef416fc2 1408 }
1409
e1d6a774 1410 srow += unichar & 0xff;
1411
ef416fc2 1412 /*
e1d6a774 1413 * Convert Replacement Character to visible replacement...
ef416fc2 1414 */
e1d6a774 1415
1416 if (unichar == 0xfffd)
1417 legchar = (unsigned long)'?';
ef416fc2 1418
1419 /*
e1d6a774 1420 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1421 */
ef416fc2 1422
e1d6a774 1423 if (!*srow)
1424 *srow = (cups_sbcs_t)legchar;
1425 }
ef416fc2 1426
e1d6a774 1427 cupsFileClose(fp);
1428
ef416fc2 1429 /*
e1d6a774 1430 * Add it to the cache and return...
ef416fc2 1431 */
e1d6a774 1432
d6ae789d 1433 cmap->next = cmap_cache;
1434 cmap_cache = cmap;
e1d6a774 1435
e07d4801 1436 DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
e1d6a774 1437
1438 return (cmap);
ef416fc2 1439
1440 /*
e1d6a774 1441 * If we get here, there was an error in the cmap file...
ef416fc2 1442 */
e1d6a774 1443
1444 sbcs_error:
1445
1446 free_sbcs_charmap(cmap);
1447
1448 cupsFileClose(fp);
1449
e07d4801 1450 DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
e1d6a774 1451
1452 return (NULL);
1453}
1454
1455
1456/*
1457 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1458 */
1459
1460static _cups_vmap_t * /* O - Charmap or 0 on error */
1461get_vbcs_charmap(
1462 const cups_encoding_t encoding, /* I - Charmap Encoding */
1463 const char *filename) /* I - Charmap Filename */
1464{
1465 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1466 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1467 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1468 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1469 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1470 unsigned long legchar; /* Legacy character value */
1471 cups_utf32_t unichar; /* Unicode character value */
1472 int mapcount; /* Count of lines in charmap file */
1473 cups_file_t *fp; /* Charset map file pointer */
1474 char *s; /* Line parsing pointer */
1475 char line[256]; /* Line from charset map file */
1476 int i; /* Loop variable */
09a101d6 1477 int legacy; /* 32-bit legacy char */
e1d6a774 1478
1479
e07d4801 1480 DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
e1d6a774 1481 encoding, filename));
ef416fc2 1482
1483 /*
e1d6a774 1484 * See if we already have this DBCS/VBCS charset map loaded...
ef416fc2 1485 */
ef416fc2 1486
d6ae789d 1487 for (vmap = vmap_cache; vmap; vmap = vmap->next)
e1d6a774 1488 {
1489 if (vmap->encoding == encoding)
ef416fc2 1490 {
e1d6a774 1491 vmap->used ++;
e07d4801 1492 DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
d6ae789d 1493
e1d6a774 1494 return ((void *)vmap);
ef416fc2 1495 }
ef416fc2 1496 }
ef416fc2 1497
1498 /*
e1d6a774 1499 * Open VBCS charset map input file...
ef416fc2 1500 */
ef416fc2 1501
e1d6a774 1502 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1503 {
e07d4801 1504 DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
d6ae789d 1505
e1d6a774 1506 return (NULL);
1507 }
ef416fc2 1508
1509 /*
e1d6a774 1510 * Count lines in charmap file...
ef416fc2 1511 */
e1d6a774 1512
1513 if ((mapcount = get_charmap_count(fp)) <= 0)
1514 {
e07d4801 1515 DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
d6ae789d 1516
91c84a35
MS
1517 cupsFileClose(fp);
1518
e1d6a774 1519 return (NULL);
1520 }
1521
e07d4801 1522 DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
ef416fc2 1523
1524 /*
e1d6a774 1525 * Allocate memory for DBCS/VBCS charset map...
ef416fc2 1526 */
e1d6a774 1527
1528 if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1529 {
e07d4801 1530 DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
d6ae789d 1531
91c84a35
MS
1532 cupsFileClose(fp);
1533
e1d6a774 1534 return (NULL);
1535 }
1536
1537 vmap->used ++;
1538 vmap->encoding = encoding;
ef416fc2 1539
1540 /*
e1d6a774 1541 * Save DBCS/VBCS charset map into memory for transcoding...
ef416fc2 1542 */
e1d6a774 1543
e1d6a774 1544 wide2uni = NULL;
1545
1546 cupsFileRewind(fp);
1547
09a101d6 1548 i = 0;
1549 legacy = 0;
e1d6a774 1550
1551 while (cupsFileGets(fp, line, sizeof(line)))
ef416fc2 1552 {
e1d6a774 1553 if (line[0] != '0')
1554 continue;
1555
1556 legchar = strtoul(line, &s, 16);
1557 if (legchar == ULONG_MAX)
1558 goto vbcs_error;
1559
1560 unichar = strtol(s, NULL, 16);
bf3816c7 1561 if (unichar < 0 || unichar > 0x10ffff)
e1d6a774 1562 goto vbcs_error;
1563
1564 i ++;
1565
e07d4801
MS
1566 DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1567 legchar, (unsigned)unichar));
ef416fc2 1568
1569 /*
e1d6a774 1570 * Save lead char of 2/3/4-byte legacy char...
ef416fc2 1571 */
e1d6a774 1572
c9fc04c6 1573 if (legchar > 0xffffff)
ef416fc2 1574 {
c9fc04c6
MS
1575 leadchar = (cups_sbcs_t)(legchar >> 24);
1576 vmap->lead4char[leadchar] = leadchar;
e1d6a774 1577 }
c9fc04c6 1578 else if (legchar > 0xffff)
e1d6a774 1579 {
1580 leadchar = (cups_sbcs_t)(legchar >> 16);
1581 vmap->lead3char[leadchar] = leadchar;
1582 }
bf3816c7 1583 else
e1d6a774 1584 {
c9fc04c6
MS
1585 leadchar = (cups_sbcs_t)(legchar >> 8);
1586 vmap->lead2char[leadchar] = leadchar;
ef416fc2 1587 }
1588
1589 /*
e1d6a774 1590 * Save Legacy to Unicode mapping...
ef416fc2 1591 */
e1d6a774 1592
1593 if (legchar <= 0xffff)
ef416fc2 1594 {
ef416fc2 1595 /*
e1d6a774 1596 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
ef416fc2 1597 */
e1d6a774 1598
1599 crow = vmap->char2uni[(int)leadchar];
1600 if (!crow)
1601 {
1602 crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1603 if (!crow)
1604 goto vbcs_error;
1605
1606 vmap->char2uni[(int)leadchar] = crow;
1607 }
1608
1609 crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1610 }
1611 else
1612 {
1613 /*
1614 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1615 */
1616
09a101d6 1617 if (!legacy)
e1d6a774 1618 {
09a101d6 1619 legacy = 1;
e1d6a774 1620 vmap->widecount = (mapcount - i + 1);
1621 wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
1622 sizeof(_cups_wide2uni_t));
1623 if (!wide2uni)
1624 goto vbcs_error;
1625
1626 vmap->wide2uni = wide2uni;
1627 }
1628
1629 wide2uni->widechar = (cups_vbcs_t)legchar;
1630 wide2uni->unichar = (cups_ucs2_t)unichar;
1631 wide2uni ++;
ef416fc2 1632 }
1633
1634 /*
e1d6a774 1635 * Save Unicode to legacy mapping in indirect lookup table...
ef416fc2 1636 */
e1d6a774 1637
1638 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1639 if (!vrow)
ef416fc2 1640 {
e1d6a774 1641 vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1642 if (!vrow)
1643 goto vbcs_error;
1644
1645 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
ef416fc2 1646 }
e1d6a774 1647
1648 vrow += (int)(unichar & 0xff);
ef416fc2 1649
1650 /*
e1d6a774 1651 * Convert Replacement Character to visible replacement...
ef416fc2 1652 */
e1d6a774 1653
1654 if (unichar == 0xfffd)
1655 legchar = (unsigned long)'?';
ef416fc2 1656
1657 /*
e1d6a774 1658 * First (oldest) legacy character uses Unicode mapping cell...
ef416fc2 1659 */
e1d6a774 1660
1661 if (!*vrow)
1662 *vrow = (cups_vbcs_t)legchar;
ef416fc2 1663 }
e1d6a774 1664
1665 vmap->charcount = (i - vmap->widecount);
1666
1667 cupsFileClose(fp);
ef416fc2 1668
1669 /*
e1d6a774 1670 * Add it to the cache and return...
ef416fc2 1671 */
ef416fc2 1672
c9fc04c6 1673 vmap->next = vmap_cache;
d6ae789d 1674 vmap_cache = vmap;
e1d6a774 1675
e07d4801 1676 DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
e1d6a774 1677
1678 return (vmap);
1679
1680 /*
1681 * If we get here, the file contains errors...
1682 */
1683
1684 vbcs_error:
1685
1686 free_vbcs_charmap(vmap);
1687
1688 cupsFileClose(fp);
1689
e07d4801 1690 DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
e1d6a774 1691
1692 return (NULL);
ef416fc2 1693}
1694
1695
1696/*
75bd9771 1697 * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
ef416fc2 1698 */