]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/transcode.c
Fix build errors on Fedora.
[thirdparty/cups.git] / cups / transcode.c
CommitLineData
ef416fc2 1/*
f2d18633 2 * "$Id$"
ef416fc2 3 *
7e86f2f6 4 * Transcoding support for CUPS.
ef416fc2 5 *
7e86f2f6
MS
6 * Copyright 2007-2014 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
ef416fc2 8 *
7e86f2f6
MS
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
ef416fc2 14 *
7e86f2f6 15 * This file is subject to the Apple OS-Developed Software exception.
ef416fc2 16 */
17
18/*
19 * Include necessary headers...
20 */
21
71e16022 22#include "cups-private.h"
e53920b9 23#include <limits.h>
ef416fc2 24#include <time.h>
cc754834
MS
25#ifdef HAVE_ICONV_H
26# include <iconv.h>
27#endif /* HAVE_ICONV_H */
ef416fc2 28
29
d6ae789d 30/*
31 * Local globals...
32 */
33
cc754834 34#ifdef HAVE_ICONV_H
6d2f911b 35static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER;
d6ae789d 36 /* Mutex to control access to maps */
cc754834
MS
37static iconv_t map_from_utf8 = (iconv_t)-1;
38 /* Convert from UTF-8 to charset */
39static iconv_t map_to_utf8 = (iconv_t)-1;
40 /* Convert from charset to UTF-8 */
41static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING;
42 /* Which charset is cached */
43#endif /* HAVE_ICONV_H */
e1d6a774 44
ef416fc2 45
46/*
e1d6a774 47 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
ef416fc2 48 */
49
e1d6a774 50void
d6ae789d 51_cupsCharmapFlush(void)
ef416fc2 52{
cc754834
MS
53#ifdef HAVE_ICONV_H
54 if (map_from_utf8 != (iconv_t)-1)
ef416fc2 55 {
cc754834
MS
56 iconv_close(map_from_utf8);
57 map_from_utf8 = (iconv_t)-1;
ef416fc2 58 }
d6ae789d 59
cc754834 60 if (map_to_utf8 != (iconv_t)-1)
ef416fc2 61 {
cc754834
MS
62 iconv_close(map_to_utf8);
63 map_to_utf8 = (iconv_t)-1;
ef416fc2 64 }
ef416fc2 65
cc754834
MS
66 map_encoding = CUPS_AUTO_ENCODING;
67#endif /* HAVE_ICONV_H */
ef416fc2 68}
69
e1d6a774 70
ef416fc2 71/*
e1d6a774 72 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
ef416fc2 73 */
e1d6a774 74
75int /* O - Count or -1 on error */
76cupsCharsetToUTF8(
cc754834
MS
77 cups_utf8_t *dest, /* O - Target string */
78 const char *src, /* I - Source string */
79 const int maxout, /* I - Max output */
e1d6a774 80 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 81{
cc754834 82 cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
84315f46 83#ifdef HAVE_ICONV_H
cc754834
MS
84 size_t srclen, /* Length of source string */
85 outBytesLeft; /* Bytes remaining in output buffer */
7cf5915e 86#endif /* HAVE_ICONV_H */
d6ae789d 87
88
ef416fc2 89 /*
90 * Check for valid arguments...
91 */
92
f11a948a 93 DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
e1d6a774 94 dest, src, maxout, encoding));
95
cc754834 96 if (!dest || !src || maxout < 1)
e1d6a774 97 {
cc754834
MS
98 if (dest)
99 *dest = '\0';
100
f11a948a 101 DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
ef416fc2 102 return (-1);
e1d6a774 103 }
ef416fc2 104
105 /*
106 * Handle identity conversions...
107 */
108
cc754834
MS
109 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
110 encoding >= CUPS_ENCODING_VBCS_END)
ef416fc2 111 {
07623986 112 strlcpy((char *)dest, src, (size_t)maxout);
b86bc4cf 113 return ((int)strlen((char *)dest));
ef416fc2 114 }
115
411affcf 116 /*
117 * Handle ISO-8859-1 to UTF-8 directly...
118 */
119
cc754834
MS
120 destptr = dest;
121
411affcf 122 if (encoding == CUPS_ISO8859_1)
123 {
124 int ch; /* Character from string */
cc754834 125 cups_utf8_t *destend; /* End of UTF-8 buffer */
411affcf 126
127
411affcf 128 destend = dest + maxout - 2;
129
130 while (*src && destptr < destend)
131 {
132 ch = *src++ & 255;
133
134 if (ch & 128)
135 {
7e86f2f6
MS
136 *destptr++ = (cups_utf8_t)(0xc0 | (ch >> 6));
137 *destptr++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
411affcf 138 }
139 else
7e86f2f6 140 *destptr++ = (cups_utf8_t)ch;
411affcf 141 }
142
143 *destptr = '\0';
144
b86bc4cf 145 return ((int)(destptr - dest));
411affcf 146 }
147
ef416fc2 148 /*
e1d6a774 149 * Convert input legacy charset to UTF-8...
ef416fc2 150 */
e1d6a774 151
cc754834 152#ifdef HAVE_ICONV_H
6d2f911b 153 _cupsMutexLock(&map_mutex);
d6ae789d 154
cc754834
MS
155 if (map_encoding != encoding)
156 {
157 _cupsCharmapFlush();
158
159 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
160 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
161 map_encoding = encoding;
162 }
163
164 if (map_to_utf8 != (iconv_t)-1)
165 {
f99f3698
MS
166 char *altdestptr = (char *)dest; /* Silence bogus GCC type-punned */
167
cc754834 168 srclen = strlen(src);
7e86f2f6 169 outBytesLeft = (size_t)maxout - 1;
4220952d 170
f99f3698
MS
171 iconv(map_to_utf8, (char **)&src, &srclen, &altdestptr, &outBytesLeft);
172 *altdestptr = '\0';
cc754834
MS
173
174 _cupsMutexUnlock(&map_mutex);
175
f99f3698 176 return ((int)(altdestptr - (char *)dest));
cc754834 177 }
d6ae789d 178
6d2f911b 179 _cupsMutexUnlock(&map_mutex);
cc754834 180#endif /* HAVE_ICONV_H */
d6ae789d 181
cc754834
MS
182 /*
183 * No iconv() support, so error out...
184 */
185
186 *destptr = '\0';
187
188 return (-1);
ef416fc2 189}
190
e1d6a774 191
ef416fc2 192/*
e1d6a774 193 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
ef416fc2 194 */
e1d6a774 195
196int /* O - Count or -1 on error */
197cupsUTF8ToCharset(
198 char *dest, /* O - Target string */
199 const cups_utf8_t *src, /* I - Source string */
200 const int maxout, /* I - Max output */
201 const cups_encoding_t encoding) /* I - Encoding */
ef416fc2 202{
cc754834 203 char *destptr; /* Pointer into destination */
84315f46 204#ifdef HAVE_ICONV_H
cc754834
MS
205 size_t srclen, /* Length of source string */
206 outBytesLeft; /* Bytes remaining in output buffer */
7cf5915e 207#endif /* HAVE_ICONV_H */
d6ae789d 208
209
ef416fc2 210 /*
211 * Check for valid arguments...
212 */
213
cc754834 214 if (!dest || !src || maxout < 1)
e1d6a774 215 {
216 if (dest)
217 *dest = '\0';
218
ef416fc2 219 return (-1);
e1d6a774 220 }
ef416fc2 221
222 /*
223 * Handle identity conversions...
224 */
225
22c9029b 226 if (encoding == CUPS_UTF8 ||
cc754834 227 encoding >= CUPS_ENCODING_VBCS_END)
ef416fc2 228 {
07623986 229 strlcpy(dest, (char *)src, (size_t)maxout);
b86bc4cf 230 return ((int)strlen(dest));
ef416fc2 231 }
232
411affcf 233 /*
234 * Handle UTF-8 to ISO-8859-1 directly...
235 */
236
cc754834
MS
237 destptr = dest;
238
22c9029b 239 if (encoding == CUPS_ISO8859_1 || encoding <= CUPS_US_ASCII)
411affcf 240 {
22c9029b
MS
241 int ch, /* Character from string */
242 maxch; /* Maximum character for charset */
cc754834 243 char *destend; /* End of ISO-8859-1 buffer */
411affcf 244
22c9029b 245 maxch = encoding == CUPS_ISO8859_1 ? 256 : 128;
411affcf 246 destend = dest + maxout - 1;
247
248 while (*src && destptr < destend)
249 {
250 ch = *src++;
251
252 if ((ch & 0xe0) == 0xc0)
253 {
254 ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
255
22c9029b 256 if (ch < maxch)
7e86f2f6 257 *destptr++ = (char)ch;
411affcf 258 else
259 *destptr++ = '?';
260 }
261 else if ((ch & 0xf0) == 0xe0 ||
262 (ch & 0xf8) == 0xf0)
263 *destptr++ = '?';
264 else if (!(ch & 0x80))
7e86f2f6 265 *destptr++ = (char)ch;
411affcf 266 }
267
268 *destptr = '\0';
269
b86bc4cf 270 return ((int)(destptr - dest));
411affcf 271 }
272
cc754834 273#ifdef HAVE_ICONV_H
ef416fc2 274 /*
e1d6a774 275 * Convert input UTF-8 to legacy charset...
ef416fc2 276 */
e1d6a774 277
6d2f911b 278 _cupsMutexLock(&map_mutex);
d6ae789d 279
cc754834
MS
280 if (map_encoding != encoding)
281 {
282 _cupsCharmapFlush();
283
284 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
285 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
286 map_encoding = encoding;
287 }
288
289 if (map_from_utf8 != (iconv_t)-1)
290 {
f99f3698
MS
291 char *altsrc = (char *)src; /* Silence bogus GCC type-punned */
292
cc754834 293 srclen = strlen((char *)src);
7e86f2f6 294 outBytesLeft = (size_t)maxout - 1;
4220952d 295
f99f3698 296 iconv(map_from_utf8, &altsrc, &srclen, &destptr, &outBytesLeft);
4220952d 297 *destptr = '\0';
cc754834
MS
298
299 _cupsMutexUnlock(&map_mutex);
300
301 return ((int)(destptr - dest));
302 }
d6ae789d 303
6d2f911b 304 _cupsMutexUnlock(&map_mutex);
cc754834
MS
305#endif /* HAVE_ICONV_H */
306
307 /*
308 * No iconv() support, so error out...
309 */
310
311 *destptr = '\0';
d6ae789d 312
cc754834 313 return (-1);
ef416fc2 314}
315
ef416fc2 316
317/*
318 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
319 *
320 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
321 *
322 * UTF-32 char UTF-8 char(s)
323 * --------------------------------------------------
e1d6a774 324 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 325 * 128 to 2047 = 110xxxxx 10yyyyyy
326 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 327 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 328 *
329 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
330 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 331 */
e1d6a774 332
333int /* O - Count or -1 on error */
334cupsUTF8ToUTF32(
335 cups_utf32_t *dest, /* O - Target string */
336 const cups_utf8_t *src, /* I - Source string */
337 const int maxout) /* I - Max output */
ef416fc2 338{
e1d6a774 339 int i; /* Looping variable */
340 cups_utf8_t ch; /* Character value */
341 cups_utf8_t next; /* Next character value */
342 cups_utf32_t ch32; /* UTF-32 character value */
343
ef416fc2 344
345 /*
346 * Check for valid arguments and clear output...
347 */
e1d6a774 348
e07d4801
MS
349 DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
350 src, maxout));
c9fc04c6 351
e1d6a774 352 if (dest)
353 *dest = 0;
354
355 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
c9fc04c6 356 {
e07d4801 357 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
c9fc04c6 358
ef416fc2 359 return (-1);
c9fc04c6 360 }
ef416fc2 361
362 /*
cda47a96 363 * Convert input UTF-8 to output UTF-32...
ef416fc2 364 */
e1d6a774 365
e1d6a774 366 for (i = maxout - 1; *src && i > 0; i --)
ef416fc2 367 {
e1d6a774 368 ch = *src++;
ef416fc2 369
370 /*
371 * Convert UTF-8 character(s) to UTF-32 character...
372 */
e1d6a774 373
374 if (!(ch & 0x80))
ef416fc2 375 {
376 /*
377 * One-octet UTF-8 <= 127 (US-ASCII)...
378 */
e1d6a774 379
380 *dest++ = ch;
c9fc04c6 381
e07d4801 382 DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
2abf387c 383 continue;
ef416fc2 384 }
385 else if ((ch & 0xe0) == 0xc0)
386 {
387 /*
388 * Two-octet UTF-8 <= 2047 (Latin-x)...
389 */
e1d6a774 390
391 next = *src++;
c9fc04c6
MS
392 if ((next & 0xc0) != 0x80)
393 {
e07d4801 394 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 395
ef416fc2 396 return (-1);
c9fc04c6 397 }
e1d6a774 398
7e86f2f6 399 ch32 = (cups_utf32_t)((ch & 0x1f) << 6) | (cups_utf32_t)(next & 0x3f);
ef416fc2 400
401 /*
402 * Check for non-shortest form (invalid UTF-8)...
403 */
e1d6a774 404
405 if (ch32 < 0x80)
c9fc04c6 406 {
e07d4801 407 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 408
ef416fc2 409 return (-1);
c9fc04c6 410 }
e1d6a774 411
412 *dest++ = ch32;
c9fc04c6 413
e07d4801 414 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
c9fc04c6 415 src[-2], src[-1], (unsigned)ch32));
ef416fc2 416 }
417 else if ((ch & 0xf0) == 0xe0)
418 {
419 /*
420 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
421 */
e1d6a774 422
423 next = *src++;
c9fc04c6
MS
424 if ((next & 0xc0) != 0x80)
425 {
e07d4801 426 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 427
ef416fc2 428 return (-1);
c9fc04c6 429 }
e1d6a774 430
7e86f2f6 431 ch32 = (cups_utf32_t)((ch & 0x0f) << 6) | (cups_utf32_t)(next & 0x3f);
e1d6a774 432
433 next = *src++;
c9fc04c6
MS
434 if ((next & 0xc0) != 0x80)
435 {
e07d4801 436 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 437
ef416fc2 438 return (-1);
c9fc04c6 439 }
e1d6a774 440
7e86f2f6 441 ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
ef416fc2 442
443 /*
444 * Check for non-shortest form (invalid UTF-8)...
445 */
e1d6a774 446
447 if (ch32 < 0x800)
c9fc04c6 448 {
e07d4801 449 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 450
ef416fc2 451 return (-1);
c9fc04c6 452 }
e1d6a774 453
454 *dest++ = ch32;
c9fc04c6 455
e07d4801 456 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
c9fc04c6 457 src[-3], src[-2], src[-1], (unsigned)ch32));
ef416fc2 458 }
459 else if ((ch & 0xf8) == 0xf0)
460 {
461 /*
e1d6a774 462 * Four-octet UTF-8...
ef416fc2 463 */
e1d6a774 464
465 next = *src++;
c9fc04c6
MS
466 if ((next & 0xc0) != 0x80)
467 {
e07d4801 468 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 469
ef416fc2 470 return (-1);
c9fc04c6 471 }
e1d6a774 472
7e86f2f6 473 ch32 = (cups_utf32_t)((ch & 0x07) << 6) | (cups_utf32_t)(next & 0x3f);
e1d6a774 474
475 next = *src++;
c9fc04c6
MS
476 if ((next & 0xc0) != 0x80)
477 {
e07d4801 478 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 479
e1d6a774 480 return (-1);
c9fc04c6 481 }
e1d6a774 482
7e86f2f6 483 ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
e1d6a774 484
485 next = *src++;
c9fc04c6
MS
486 if ((next & 0xc0) != 0x80)
487 {
e07d4801 488 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 489
e1d6a774 490 return (-1);
c9fc04c6 491 }
e1d6a774 492
7e86f2f6 493 ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
e1d6a774 494
ef416fc2 495 /*
e1d6a774 496 * Check for non-shortest form (invalid UTF-8)...
ef416fc2 497 */
e1d6a774 498
499 if (ch32 < 0x10000)
c9fc04c6 500 {
e07d4801 501 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 502
e1d6a774 503 return (-1);
c9fc04c6 504 }
e1d6a774 505
506 *dest++ = ch32;
c9fc04c6 507
e07d4801 508 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
c9fc04c6 509 src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
ef416fc2 510 }
511 else
512 {
513 /*
e1d6a774 514 * More than 4-octet (invalid UTF-8 sequence)...
ef416fc2 515 */
e1d6a774 516
e07d4801 517 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
c9fc04c6 518
ef416fc2 519 return (-1);
520 }
521
522 /*
523 * Check for UTF-16 surrogate (illegal UTF-8)...
524 */
ef416fc2 525
2abf387c 526 if (ch32 >= 0xd800 && ch32 <= 0xdfff)
ef416fc2 527 return (-1);
528 }
e1d6a774 529
ef416fc2 530 *dest = 0;
e1d6a774 531
e07d4801 532 DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
c9fc04c6
MS
533
534 return (maxout - 1 - i);
ef416fc2 535}
536
e1d6a774 537
ef416fc2 538/*
539 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
540 *
541 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
542 *
543 * UTF-32 char UTF-8 char(s)
544 * --------------------------------------------------
e1d6a774 545 * 0 to 127 = 0xxxxxxx (US-ASCII)
ef416fc2 546 * 128 to 2047 = 110xxxxx 10yyyyyy
547 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
e1d6a774 548 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
ef416fc2 549 *
550 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
551 * which would convert to five- or six-octet UTF-8 sequences...
ef416fc2 552 */
e1d6a774 553
554int /* O - Count or -1 on error */
555cupsUTF32ToUTF8(
556 cups_utf8_t *dest, /* O - Target string */
557 const cups_utf32_t *src, /* I - Source string */
558 const int maxout) /* I - Max output */
ef416fc2 559{
e1d6a774 560 cups_utf8_t *start; /* Start of destination string */
561 int i; /* Looping variable */
562 int swap; /* Byte-swap input to output */
563 cups_utf32_t ch; /* Character value */
564
ef416fc2 565
566 /*
567 * Check for valid arguments and clear output...
568 */
e1d6a774 569
e07d4801 570 DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
c9fc04c6
MS
571 maxout));
572
e1d6a774 573 if (dest)
574 *dest = '\0';
575
576 if (!dest || !src || maxout < 1)
c9fc04c6 577 {
e07d4801 578 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
c9fc04c6 579
ef416fc2 580 return (-1);
c9fc04c6 581 }
ef416fc2 582
583 /*
584 * Check for leading BOM in UTF-32 and inverted BOM...
585 */
e1d6a774 586
587 start = dest;
588 swap = *src == 0xfffe0000;
589
e07d4801 590 DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
c9fc04c6 591
e1d6a774 592 if (*src == 0xfffe0000 || *src == 0xfeff)
593 src ++;
ef416fc2 594
595 /*
596 * Convert input UTF-32 to output UTF-8...
597 */
e1d6a774 598
599 for (i = maxout - 1; *src && i > 0;)
ef416fc2 600 {
e1d6a774 601 ch = *src++;
ef416fc2 602
603 /*
604 * Byte swap input UTF-32, if necessary...
e1d6a774 605 * (only byte-swapping 24 of 32 bits)
ef416fc2 606 */
e1d6a774 607
ef416fc2 608 if (swap)
609 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
610
611 /*
e1d6a774 612 * Check for beyond Plane 16 (invalid UTF-32)...
ef416fc2 613 */
ef416fc2 614
ef416fc2 615 if (ch > 0x10ffff)
c9fc04c6 616 {
e07d4801 617 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
c9fc04c6 618
ef416fc2 619 return (-1);
c9fc04c6 620 }
ef416fc2 621
ef416fc2 622 /*
623 * Convert UTF-32 character to UTF-8 character(s)...
624 */
e1d6a774 625
626 if (ch < 0x80)
ef416fc2 627 {
628 /*
629 * One-octet UTF-8 <= 127 (US-ASCII)...
630 */
e1d6a774 631
632 *dest++ = (cups_utf8_t)ch;
633 i --;
c9fc04c6 634
e07d4801 635 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
ef416fc2 636 }
e1d6a774 637 else if (ch < 0x800)
ef416fc2 638 {
639 /*
640 * Two-octet UTF-8 <= 2047 (Latin-x)...
641 */
e1d6a774 642
643 if (i < 2)
c9fc04c6 644 {
e07d4801 645 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
c9fc04c6 646
e1d6a774 647 return (-1);
c9fc04c6 648 }
e1d6a774 649
650 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
651 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
652 i -= 2;
c9fc04c6 653
e07d4801 654 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
c9fc04c6 655 dest[-2], dest[-1]));
ef416fc2 656 }
e1d6a774 657 else if (ch < 0x10000)
ef416fc2 658 {
659 /*
660 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
661 */
e1d6a774 662
663 if (i < 3)
c9fc04c6 664 {
e07d4801 665 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
c9fc04c6 666
e1d6a774 667 return (-1);
c9fc04c6 668 }
e1d6a774 669
670 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
671 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
672 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
673 i -= 3;
c9fc04c6 674
e07d4801 675 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
c9fc04c6 676 dest[-3], dest[-2], dest[-1]));
e1d6a774 677 }
678 else
679 {
680 /*
681 * Four-octet UTF-8...
682 */
683
684 if (i < 4)
e07d4801
MS
685 {
686 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
687
e1d6a774 688 return (-1);
e07d4801 689 }
e1d6a774 690
691 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
692 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
693 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
694 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
695 i -= 4;
c9fc04c6 696
e07d4801 697 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
c9fc04c6 698 (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
ef416fc2 699 }
700 }
e1d6a774 701
ef416fc2 702 *dest = '\0';
e1d6a774 703
e07d4801 704 DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
c9fc04c6 705
e1d6a774 706 return ((int)(dest - start));
ef416fc2 707}
708
e1d6a774 709
ef416fc2 710/*
f2d18633 711 * End of "$Id$"
ef416fc2 712 */