]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Transcoding support for CUPS. | |
3 | * | |
4 | * Copyright © 2020-2024 by OpenPrinting. | |
5 | * Copyright 2007-2014 by Apple Inc. | |
6 | * Copyright 1997-2007 by Easy Software Products. | |
7 | * | |
8 | * Licensed under Apache License v2.0. See the file "LICENSE" for more information. | |
9 | */ | |
10 | ||
11 | /* | |
12 | * Include necessary headers... | |
13 | */ | |
14 | ||
15 | #include "cups-private.h" | |
16 | #include "debug-internal.h" | |
17 | #include <limits.h> | |
18 | #include <time.h> | |
19 | #ifdef HAVE_ICONV_H | |
20 | # include <iconv.h> | |
21 | #endif /* HAVE_ICONV_H */ | |
22 | ||
23 | ||
24 | /* | |
25 | * Local globals... | |
26 | */ | |
27 | ||
28 | #ifdef HAVE_ICONV_H | |
29 | static cups_mutex_t map_mutex = CUPS_MUTEX_INITIALIZER; | |
30 | /* Mutex to control access to maps */ | |
31 | static iconv_t map_from_utf8 = (iconv_t)-1; | |
32 | /* Convert from UTF-8 to charset */ | |
33 | static iconv_t map_to_utf8 = (iconv_t)-1; | |
34 | /* Convert from charset to UTF-8 */ | |
35 | static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING; | |
36 | /* Which charset is cached */ | |
37 | #endif /* HAVE_ICONV_H */ | |
38 | ||
39 | ||
40 | /* | |
41 | * '_cupsCharmapFlush()' - Flush all character set maps out of cache. | |
42 | */ | |
43 | ||
44 | void | |
45 | _cupsCharmapFlush(void) | |
46 | { | |
47 | #ifdef HAVE_ICONV_H | |
48 | if (map_from_utf8 != (iconv_t)-1) | |
49 | { | |
50 | iconv_close(map_from_utf8); | |
51 | map_from_utf8 = (iconv_t)-1; | |
52 | } | |
53 | ||
54 | if (map_to_utf8 != (iconv_t)-1) | |
55 | { | |
56 | iconv_close(map_to_utf8); | |
57 | map_to_utf8 = (iconv_t)-1; | |
58 | } | |
59 | ||
60 | map_encoding = CUPS_AUTO_ENCODING; | |
61 | #endif /* HAVE_ICONV_H */ | |
62 | } | |
63 | ||
64 | ||
65 | /* | |
66 | * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8. | |
67 | */ | |
68 | ||
69 | int /* O - Count or -1 on error */ | |
70 | cupsCharsetToUTF8( | |
71 | cups_utf8_t *dest, /* O - Target string */ | |
72 | const char *src, /* I - Source string */ | |
73 | const int maxout, /* I - Max output */ | |
74 | const cups_encoding_t encoding) /* I - Encoding */ | |
75 | { | |
76 | cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */ | |
77 | #ifdef HAVE_ICONV_H | |
78 | size_t srclen, /* Length of source string */ | |
79 | outBytesLeft; /* Bytes remaining in output buffer */ | |
80 | #endif /* HAVE_ICONV_H */ | |
81 | ||
82 | ||
83 | /* | |
84 | * Check for valid arguments... | |
85 | */ | |
86 | ||
87 | DEBUG_printf("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)", (void *)dest, src, maxout, encoding); | |
88 | ||
89 | if (!dest || !src || maxout < 1) | |
90 | { | |
91 | if (dest) | |
92 | *dest = '\0'; | |
93 | ||
94 | DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1"); | |
95 | return (-1); | |
96 | } | |
97 | ||
98 | /* | |
99 | * Handle identity conversions... | |
100 | */ | |
101 | ||
102 | if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII || | |
103 | encoding >= CUPS_ENCODING_VBCS_END) | |
104 | { | |
105 | cupsCopyString((char *)dest, src, (size_t)maxout); | |
106 | return ((int)strlen((char *)dest)); | |
107 | } | |
108 | ||
109 | /* | |
110 | * Handle ISO-8859-1 to UTF-8 directly... | |
111 | */ | |
112 | ||
113 | destptr = dest; | |
114 | ||
115 | if (encoding == CUPS_ISO8859_1) | |
116 | { | |
117 | int ch; /* Character from string */ | |
118 | cups_utf8_t *destend; /* End of UTF-8 buffer */ | |
119 | ||
120 | ||
121 | destend = dest + maxout - 2; | |
122 | ||
123 | while (*src && destptr < destend) | |
124 | { | |
125 | ch = *src++ & 255; | |
126 | ||
127 | if (ch & 128) | |
128 | { | |
129 | *destptr++ = (cups_utf8_t)(0xc0 | (ch >> 6)); | |
130 | *destptr++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); | |
131 | } | |
132 | else | |
133 | *destptr++ = (cups_utf8_t)ch; | |
134 | } | |
135 | ||
136 | *destptr = '\0'; | |
137 | ||
138 | return ((int)(destptr - dest)); | |
139 | } | |
140 | ||
141 | /* | |
142 | * Convert input legacy charset to UTF-8... | |
143 | */ | |
144 | ||
145 | #ifdef HAVE_ICONV_H | |
146 | cupsMutexLock(&map_mutex); | |
147 | ||
148 | if (map_encoding != encoding) | |
149 | { | |
150 | char toset[1024]; /* Destination character set */ | |
151 | ||
152 | _cupsCharmapFlush(); | |
153 | ||
154 | snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding)); | |
155 | ||
156 | map_encoding = encoding; | |
157 | map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8"); | |
158 | map_to_utf8 = iconv_open("UTF-8", toset); | |
159 | } | |
160 | ||
161 | if (map_to_utf8 != (iconv_t)-1) | |
162 | { | |
163 | char *altdestptr = (char *)dest; /* Silence bogus GCC type-punned */ | |
164 | ||
165 | srclen = strlen(src); | |
166 | outBytesLeft = (size_t)maxout - 1; | |
167 | ||
168 | iconv(map_to_utf8, (char **)&src, &srclen, &altdestptr, &outBytesLeft); | |
169 | *altdestptr = '\0'; | |
170 | ||
171 | cupsMutexUnlock(&map_mutex); | |
172 | ||
173 | return ((int)(altdestptr - (char *)dest)); | |
174 | } | |
175 | ||
176 | cupsMutexUnlock(&map_mutex); | |
177 | #endif /* HAVE_ICONV_H */ | |
178 | ||
179 | /* | |
180 | * No iconv() support, so error out... | |
181 | */ | |
182 | ||
183 | *destptr = '\0'; | |
184 | ||
185 | return (-1); | |
186 | } | |
187 | ||
188 | ||
189 | /* | |
190 | * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set. | |
191 | */ | |
192 | ||
193 | int /* O - Count or -1 on error */ | |
194 | cupsUTF8ToCharset( | |
195 | char *dest, /* O - Target string */ | |
196 | const cups_utf8_t *src, /* I - Source string */ | |
197 | const int maxout, /* I - Max output */ | |
198 | const cups_encoding_t encoding) /* I - Encoding */ | |
199 | { | |
200 | char *destptr; /* Pointer into destination */ | |
201 | #ifdef HAVE_ICONV_H | |
202 | size_t srclen, /* Length of source string */ | |
203 | outBytesLeft; /* Bytes remaining in output buffer */ | |
204 | #endif /* HAVE_ICONV_H */ | |
205 | ||
206 | ||
207 | /* | |
208 | * Check for valid arguments... | |
209 | */ | |
210 | ||
211 | if (!dest || !src || maxout < 1) | |
212 | { | |
213 | if (dest) | |
214 | *dest = '\0'; | |
215 | ||
216 | return (-1); | |
217 | } | |
218 | ||
219 | /* | |
220 | * Handle identity conversions... | |
221 | */ | |
222 | ||
223 | if (encoding == CUPS_UTF8 || | |
224 | encoding >= CUPS_ENCODING_VBCS_END) | |
225 | { | |
226 | cupsCopyString(dest, (char *)src, (size_t)maxout); | |
227 | return ((int)strlen(dest)); | |
228 | } | |
229 | ||
230 | /* | |
231 | * Handle UTF-8 to ISO-8859-1 directly... | |
232 | */ | |
233 | ||
234 | destptr = dest; | |
235 | ||
236 | if (encoding == CUPS_ISO8859_1 || encoding <= CUPS_US_ASCII) | |
237 | { | |
238 | int ch, /* Character from string */ | |
239 | maxch; /* Maximum character for charset */ | |
240 | char *destend; /* End of ISO-8859-1 buffer */ | |
241 | ||
242 | maxch = encoding == CUPS_ISO8859_1 ? 256 : 128; | |
243 | destend = dest + maxout - 1; | |
244 | ||
245 | while (*src && destptr < destend) | |
246 | { | |
247 | ch = *src++; | |
248 | ||
249 | if ((ch & 0xe0) == 0xc0) | |
250 | { | |
251 | ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f); | |
252 | ||
253 | if (ch < maxch) | |
254 | *destptr++ = (char)ch; | |
255 | else | |
256 | *destptr++ = '?'; | |
257 | } | |
258 | else if ((ch & 0xf0) == 0xe0 || | |
259 | (ch & 0xf8) == 0xf0) | |
260 | *destptr++ = '?'; | |
261 | else if (!(ch & 0x80)) | |
262 | *destptr++ = (char)ch; | |
263 | } | |
264 | ||
265 | *destptr = '\0'; | |
266 | ||
267 | return ((int)(destptr - dest)); | |
268 | } | |
269 | ||
270 | #ifdef HAVE_ICONV_H | |
271 | /* | |
272 | * Convert input UTF-8 to legacy charset... | |
273 | */ | |
274 | ||
275 | cupsMutexLock(&map_mutex); | |
276 | ||
277 | if (map_encoding != encoding) | |
278 | { | |
279 | char toset[1024]; /* Destination character set */ | |
280 | ||
281 | _cupsCharmapFlush(); | |
282 | ||
283 | snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding)); | |
284 | ||
285 | map_encoding = encoding; | |
286 | map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8"); | |
287 | map_to_utf8 = iconv_open("UTF-8", toset); | |
288 | } | |
289 | ||
290 | if (map_from_utf8 != (iconv_t)-1) | |
291 | { | |
292 | char *altsrc = (char *)src; /* Silence bogus GCC type-punned */ | |
293 | ||
294 | srclen = strlen((char *)src); | |
295 | outBytesLeft = (size_t)maxout - 1; | |
296 | ||
297 | iconv(map_from_utf8, &altsrc, &srclen, &destptr, &outBytesLeft); | |
298 | *destptr = '\0'; | |
299 | ||
300 | cupsMutexUnlock(&map_mutex); | |
301 | ||
302 | return ((int)(destptr - dest)); | |
303 | } | |
304 | ||
305 | cupsMutexUnlock(&map_mutex); | |
306 | #endif /* HAVE_ICONV_H */ | |
307 | ||
308 | /* | |
309 | * No iconv() support, so error out... | |
310 | */ | |
311 | ||
312 | *destptr = '\0'; | |
313 | ||
314 | return (-1); | |
315 | } | |
316 | ||
317 | ||
318 | /* | |
319 | * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32. | |
320 | * | |
321 | * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows... | |
322 | * | |
323 | * UTF-32 char UTF-8 char(s) | |
324 | * -------------------------------------------------- | |
325 | * 0 to 127 = 0xxxxxxx (US-ASCII) | |
326 | * 128 to 2047 = 110xxxxx 10yyyyyy | |
327 | * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz | |
328 | * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx | |
329 | * | |
330 | * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4, | |
331 | * which would convert to five- or six-octet UTF-8 sequences... | |
332 | */ | |
333 | ||
334 | int /* O - Count or -1 on error */ | |
335 | cupsUTF8ToUTF32( | |
336 | cups_utf32_t *dest, /* O - Target string */ | |
337 | const cups_utf8_t *src, /* I - Source string */ | |
338 | const int maxout) /* I - Max output */ | |
339 | { | |
340 | int i; /* Looping variable */ | |
341 | cups_utf8_t ch; /* Character value */ | |
342 | cups_utf8_t next; /* Next character value */ | |
343 | cups_utf32_t ch32; /* UTF-32 character value */ | |
344 | ||
345 | ||
346 | /* | |
347 | * Check for valid arguments and clear output... | |
348 | */ | |
349 | ||
350 | DEBUG_printf("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", (void *)dest, src, maxout); | |
351 | ||
352 | if (dest) | |
353 | *dest = 0; | |
354 | ||
355 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
356 | { | |
357 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)"); | |
358 | ||
359 | return (-1); | |
360 | } | |
361 | ||
362 | /* | |
363 | * Convert input UTF-8 to output UTF-32... | |
364 | */ | |
365 | ||
366 | for (i = maxout - 1; *src && i > 0; i --) | |
367 | { | |
368 | ch = *src++; | |
369 | ||
370 | /* | |
371 | * Convert UTF-8 character(s) to UTF-32 character... | |
372 | */ | |
373 | ||
374 | if (!(ch & 0x80)) | |
375 | { | |
376 | /* | |
377 | * One-octet UTF-8 <= 127 (US-ASCII)... | |
378 | */ | |
379 | ||
380 | *dest++ = ch; | |
381 | ||
382 | DEBUG_printf("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch); | |
383 | continue; | |
384 | } | |
385 | else if ((ch & 0xe0) == 0xc0) | |
386 | { | |
387 | /* | |
388 | * Two-octet UTF-8 <= 2047 (Latin-x)... | |
389 | */ | |
390 | ||
391 | next = *src++; | |
392 | if ((next & 0xc0) != 0x80) | |
393 | { | |
394 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
395 | ||
396 | return (-1); | |
397 | } | |
398 | ||
399 | ch32 = (cups_utf32_t)((ch & 0x1f) << 6) | (cups_utf32_t)(next & 0x3f); | |
400 | ||
401 | /* | |
402 | * Check for non-shortest form (invalid UTF-8)... | |
403 | */ | |
404 | ||
405 | if (ch32 < 0x80) | |
406 | { | |
407 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
408 | ||
409 | return (-1); | |
410 | } | |
411 | ||
412 | *dest++ = ch32; | |
413 | ||
414 | DEBUG_printf("4cupsUTF8ToUTF32: %02x %02x => %08X", src[-2], src[-1], (unsigned)ch32); | |
415 | } | |
416 | else if ((ch & 0xf0) == 0xe0) | |
417 | { | |
418 | /* | |
419 | * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)... | |
420 | */ | |
421 | ||
422 | next = *src++; | |
423 | if ((next & 0xc0) != 0x80) | |
424 | { | |
425 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
426 | ||
427 | return (-1); | |
428 | } | |
429 | ||
430 | ch32 = (cups_utf32_t)((ch & 0x0f) << 6) | (cups_utf32_t)(next & 0x3f); | |
431 | ||
432 | next = *src++; | |
433 | if ((next & 0xc0) != 0x80) | |
434 | { | |
435 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
436 | ||
437 | return (-1); | |
438 | } | |
439 | ||
440 | ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f); | |
441 | ||
442 | /* | |
443 | * Check for non-shortest form (invalid UTF-8)... | |
444 | */ | |
445 | ||
446 | if (ch32 < 0x800) | |
447 | { | |
448 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
449 | ||
450 | return (-1); | |
451 | } | |
452 | ||
453 | *dest++ = ch32; | |
454 | ||
455 | DEBUG_printf("4cupsUTF8ToUTF32: %02x %02x %02x => %08X", src[-3], src[-2], src[-1], (unsigned)ch32); | |
456 | } | |
457 | else if ((ch & 0xf8) == 0xf0) | |
458 | { | |
459 | /* | |
460 | * Four-octet UTF-8... | |
461 | */ | |
462 | ||
463 | next = *src++; | |
464 | if ((next & 0xc0) != 0x80) | |
465 | { | |
466 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
467 | ||
468 | return (-1); | |
469 | } | |
470 | ||
471 | ch32 = (cups_utf32_t)((ch & 0x07) << 6) | (cups_utf32_t)(next & 0x3f); | |
472 | ||
473 | next = *src++; | |
474 | if ((next & 0xc0) != 0x80) | |
475 | { | |
476 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
477 | ||
478 | return (-1); | |
479 | } | |
480 | ||
481 | ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f); | |
482 | ||
483 | next = *src++; | |
484 | if ((next & 0xc0) != 0x80) | |
485 | { | |
486 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
487 | ||
488 | return (-1); | |
489 | } | |
490 | ||
491 | ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f); | |
492 | ||
493 | /* | |
494 | * Check for non-shortest form (invalid UTF-8)... | |
495 | */ | |
496 | ||
497 | if (ch32 < 0x10000) | |
498 | { | |
499 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
500 | ||
501 | return (-1); | |
502 | } | |
503 | ||
504 | *dest++ = ch32; | |
505 | ||
506 | DEBUG_printf("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X", src[-4], src[-3], src[-2], src[-1], (unsigned)ch32); | |
507 | } | |
508 | else | |
509 | { | |
510 | /* | |
511 | * More than 4-octet (invalid UTF-8 sequence)... | |
512 | */ | |
513 | ||
514 | DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)"); | |
515 | ||
516 | return (-1); | |
517 | } | |
518 | ||
519 | /* | |
520 | * Check for UTF-16 surrogate (illegal UTF-8)... | |
521 | */ | |
522 | ||
523 | if (ch32 >= 0xd800 && ch32 <= 0xdfff) | |
524 | return (-1); | |
525 | } | |
526 | ||
527 | *dest = 0; | |
528 | ||
529 | DEBUG_printf("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i); | |
530 | ||
531 | return (maxout - 1 - i); | |
532 | } | |
533 | ||
534 | ||
535 | /* | |
536 | * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8. | |
537 | * | |
538 | * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows... | |
539 | * | |
540 | * UTF-32 char UTF-8 char(s) | |
541 | * -------------------------------------------------- | |
542 | * 0 to 127 = 0xxxxxxx (US-ASCII) | |
543 | * 128 to 2047 = 110xxxxx 10yyyyyy | |
544 | * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz | |
545 | * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx | |
546 | * | |
547 | * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4, | |
548 | * which would convert to five- or six-octet UTF-8 sequences... | |
549 | */ | |
550 | ||
551 | int /* O - Count or -1 on error */ | |
552 | cupsUTF32ToUTF8( | |
553 | cups_utf8_t *dest, /* O - Target string */ | |
554 | const cups_utf32_t *src, /* I - Source string */ | |
555 | const int maxout) /* I - Max output */ | |
556 | { | |
557 | cups_utf8_t *start; /* Start of destination string */ | |
558 | int i; /* Looping variable */ | |
559 | int swap; /* Byte-swap input to output */ | |
560 | cups_utf32_t ch; /* Character value */ | |
561 | ||
562 | ||
563 | /* | |
564 | * Check for valid arguments and clear output... | |
565 | */ | |
566 | ||
567 | DEBUG_printf("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", (void *)dest, (void *)src, maxout); | |
568 | ||
569 | if (dest) | |
570 | *dest = '\0'; | |
571 | ||
572 | if (!dest || !src || maxout < 1) | |
573 | { | |
574 | DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)"); | |
575 | ||
576 | return (-1); | |
577 | } | |
578 | ||
579 | /* | |
580 | * Check for leading BOM in UTF-32 and inverted BOM... | |
581 | */ | |
582 | ||
583 | start = dest; | |
584 | swap = *src == 0xfffe0000; | |
585 | ||
586 | DEBUG_printf("4cupsUTF32ToUTF8: swap=%d", swap); | |
587 | ||
588 | if (*src == 0xfffe0000 || *src == 0xfeff) | |
589 | src ++; | |
590 | ||
591 | /* | |
592 | * Convert input UTF-32 to output UTF-8... | |
593 | */ | |
594 | ||
595 | for (i = maxout - 1; *src && i > 0;) | |
596 | { | |
597 | ch = *src++; | |
598 | ||
599 | /* | |
600 | * Byte swap input UTF-32, if necessary... | |
601 | * (only byte-swapping 24 of 32 bits) | |
602 | */ | |
603 | ||
604 | if (swap) | |
605 | ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000)); | |
606 | ||
607 | /* | |
608 | * Check for beyond Plane 16 (invalid UTF-32)... | |
609 | */ | |
610 | ||
611 | if (ch > 0x10ffff) | |
612 | { | |
613 | DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)"); | |
614 | ||
615 | return (-1); | |
616 | } | |
617 | ||
618 | /* | |
619 | * Convert UTF-32 character to UTF-8 character(s)... | |
620 | */ | |
621 | ||
622 | if (ch < 0x80) | |
623 | { | |
624 | /* | |
625 | * One-octet UTF-8 <= 127 (US-ASCII)... | |
626 | */ | |
627 | ||
628 | *dest++ = (cups_utf8_t)ch; | |
629 | i --; | |
630 | ||
631 | DEBUG_printf("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]); | |
632 | } | |
633 | else if (ch < 0x800) | |
634 | { | |
635 | /* | |
636 | * Two-octet UTF-8 <= 2047 (Latin-x)... | |
637 | */ | |
638 | ||
639 | if (i < 2) | |
640 | { | |
641 | DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)"); | |
642 | ||
643 | return (-1); | |
644 | } | |
645 | ||
646 | *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f)); | |
647 | *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); | |
648 | i -= 2; | |
649 | ||
650 | DEBUG_printf("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch, dest[-2], dest[-1]); | |
651 | } | |
652 | else if (ch < 0x10000) | |
653 | { | |
654 | /* | |
655 | * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)... | |
656 | */ | |
657 | ||
658 | if (i < 3) | |
659 | { | |
660 | DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)"); | |
661 | ||
662 | return (-1); | |
663 | } | |
664 | ||
665 | *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f)); | |
666 | *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f)); | |
667 | *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); | |
668 | i -= 3; | |
669 | ||
670 | DEBUG_printf("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch, dest[-3], dest[-2], dest[-1]); | |
671 | } | |
672 | else | |
673 | { | |
674 | /* | |
675 | * Four-octet UTF-8... | |
676 | */ | |
677 | ||
678 | if (i < 4) | |
679 | { | |
680 | DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)"); | |
681 | ||
682 | return (-1); | |
683 | } | |
684 | ||
685 | *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07)); | |
686 | *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f)); | |
687 | *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f)); | |
688 | *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f)); | |
689 | i -= 4; | |
690 | ||
691 | DEBUG_printf("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x", (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]); | |
692 | } | |
693 | } | |
694 | ||
695 | *dest = '\0'; | |
696 | ||
697 | DEBUG_printf("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)); | |
698 | ||
699 | return ((int)(dest - start)); | |
700 | } |