]> git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
Changes to eliminate warnings from new Clang.
[thirdparty/cups.git] / cups / transcode.c
1 /*
2 * "$Id$"
3 *
4 * Transcoding support for CUPS.
5 *
6 * Copyright 2007-2014 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * This file is subject to the Apple OS-Developed Software exception.
16 */
17
18 /*
19 * Include necessary headers...
20 */
21
22 #include "cups-private.h"
23 #include <limits.h>
24 #include <time.h>
25 #ifdef HAVE_ICONV_H
26 # include <iconv.h>
27 #endif /* HAVE_ICONV_H */
28
29
30 /*
31 * Local globals...
32 */
33
34 #ifdef HAVE_ICONV_H
35 static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER;
36 /* Mutex to control access to maps */
37 static iconv_t map_from_utf8 = (iconv_t)-1;
38 /* Convert from UTF-8 to charset */
39 static iconv_t map_to_utf8 = (iconv_t)-1;
40 /* Convert from charset to UTF-8 */
41 static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING;
42 /* Which charset is cached */
43 #endif /* HAVE_ICONV_H */
44
45
46 /*
47 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
48 */
49
50 void
51 _cupsCharmapFlush(void)
52 {
53 #ifdef HAVE_ICONV_H
54 if (map_from_utf8 != (iconv_t)-1)
55 {
56 iconv_close(map_from_utf8);
57 map_from_utf8 = (iconv_t)-1;
58 }
59
60 if (map_to_utf8 != (iconv_t)-1)
61 {
62 iconv_close(map_to_utf8);
63 map_to_utf8 = (iconv_t)-1;
64 }
65
66 map_encoding = CUPS_AUTO_ENCODING;
67 #endif /* HAVE_ICONV_H */
68 }
69
70
71 /*
72 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
73 */
74
75 int /* O - Count or -1 on error */
76 cupsCharsetToUTF8(
77 cups_utf8_t *dest, /* O - Target string */
78 const char *src, /* I - Source string */
79 const int maxout, /* I - Max output */
80 const cups_encoding_t encoding) /* I - Encoding */
81 {
82 cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
83 #ifdef HAVE_ICONV_H
84 size_t srclen, /* Length of source string */
85 outBytesLeft; /* Bytes remaining in output buffer */
86 #endif /* HAVE_ICONV_H */
87
88
89 /*
90 * Check for valid arguments...
91 */
92
93 DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)", (void *)dest, src, maxout, encoding));
94
95 if (!dest || !src || maxout < 1)
96 {
97 if (dest)
98 *dest = '\0';
99
100 DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
101 return (-1);
102 }
103
104 /*
105 * Handle identity conversions...
106 */
107
108 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
109 encoding >= CUPS_ENCODING_VBCS_END)
110 {
111 strlcpy((char *)dest, src, (size_t)maxout);
112 return ((int)strlen((char *)dest));
113 }
114
115 /*
116 * Handle ISO-8859-1 to UTF-8 directly...
117 */
118
119 destptr = dest;
120
121 if (encoding == CUPS_ISO8859_1)
122 {
123 int ch; /* Character from string */
124 cups_utf8_t *destend; /* End of UTF-8 buffer */
125
126
127 destend = dest + maxout - 2;
128
129 while (*src && destptr < destend)
130 {
131 ch = *src++ & 255;
132
133 if (ch & 128)
134 {
135 *destptr++ = (cups_utf8_t)(0xc0 | (ch >> 6));
136 *destptr++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
137 }
138 else
139 *destptr++ = (cups_utf8_t)ch;
140 }
141
142 *destptr = '\0';
143
144 return ((int)(destptr - dest));
145 }
146
147 /*
148 * Convert input legacy charset to UTF-8...
149 */
150
151 #ifdef HAVE_ICONV_H
152 _cupsMutexLock(&map_mutex);
153
154 if (map_encoding != encoding)
155 {
156 char toset[1024]; /* Destination character set */
157
158 _cupsCharmapFlush();
159
160 snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding));
161
162 map_encoding = encoding;
163 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
164 map_to_utf8 = iconv_open("UTF-8", toset);
165 }
166
167 if (map_to_utf8 != (iconv_t)-1)
168 {
169 char *altdestptr = (char *)dest; /* Silence bogus GCC type-punned */
170
171 srclen = strlen(src);
172 outBytesLeft = (size_t)maxout - 1;
173
174 iconv(map_to_utf8, (char **)&src, &srclen, &altdestptr, &outBytesLeft);
175 *altdestptr = '\0';
176
177 _cupsMutexUnlock(&map_mutex);
178
179 return ((int)(altdestptr - (char *)dest));
180 }
181
182 _cupsMutexUnlock(&map_mutex);
183 #endif /* HAVE_ICONV_H */
184
185 /*
186 * No iconv() support, so error out...
187 */
188
189 *destptr = '\0';
190
191 return (-1);
192 }
193
194
195 /*
196 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
197 */
198
199 int /* O - Count or -1 on error */
200 cupsUTF8ToCharset(
201 char *dest, /* O - Target string */
202 const cups_utf8_t *src, /* I - Source string */
203 const int maxout, /* I - Max output */
204 const cups_encoding_t encoding) /* I - Encoding */
205 {
206 char *destptr; /* Pointer into destination */
207 #ifdef HAVE_ICONV_H
208 size_t srclen, /* Length of source string */
209 outBytesLeft; /* Bytes remaining in output buffer */
210 #endif /* HAVE_ICONV_H */
211
212
213 /*
214 * Check for valid arguments...
215 */
216
217 if (!dest || !src || maxout < 1)
218 {
219 if (dest)
220 *dest = '\0';
221
222 return (-1);
223 }
224
225 /*
226 * Handle identity conversions...
227 */
228
229 if (encoding == CUPS_UTF8 ||
230 encoding >= CUPS_ENCODING_VBCS_END)
231 {
232 strlcpy(dest, (char *)src, (size_t)maxout);
233 return ((int)strlen(dest));
234 }
235
236 /*
237 * Handle UTF-8 to ISO-8859-1 directly...
238 */
239
240 destptr = dest;
241
242 if (encoding == CUPS_ISO8859_1 || encoding <= CUPS_US_ASCII)
243 {
244 int ch, /* Character from string */
245 maxch; /* Maximum character for charset */
246 char *destend; /* End of ISO-8859-1 buffer */
247
248 maxch = encoding == CUPS_ISO8859_1 ? 256 : 128;
249 destend = dest + maxout - 1;
250
251 while (*src && destptr < destend)
252 {
253 ch = *src++;
254
255 if ((ch & 0xe0) == 0xc0)
256 {
257 ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
258
259 if (ch < maxch)
260 *destptr++ = (char)ch;
261 else
262 *destptr++ = '?';
263 }
264 else if ((ch & 0xf0) == 0xe0 ||
265 (ch & 0xf8) == 0xf0)
266 *destptr++ = '?';
267 else if (!(ch & 0x80))
268 *destptr++ = (char)ch;
269 }
270
271 *destptr = '\0';
272
273 return ((int)(destptr - dest));
274 }
275
276 #ifdef HAVE_ICONV_H
277 /*
278 * Convert input UTF-8 to legacy charset...
279 */
280
281 _cupsMutexLock(&map_mutex);
282
283 if (map_encoding != encoding)
284 {
285 char toset[1024]; /* Destination character set */
286
287 _cupsCharmapFlush();
288
289 snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding));
290
291 map_encoding = encoding;
292 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
293 map_to_utf8 = iconv_open("UTF-8", toset);
294 }
295
296 if (map_from_utf8 != (iconv_t)-1)
297 {
298 char *altsrc = (char *)src; /* Silence bogus GCC type-punned */
299
300 srclen = strlen((char *)src);
301 outBytesLeft = (size_t)maxout - 1;
302
303 iconv(map_from_utf8, &altsrc, &srclen, &destptr, &outBytesLeft);
304 *destptr = '\0';
305
306 _cupsMutexUnlock(&map_mutex);
307
308 return ((int)(destptr - dest));
309 }
310
311 _cupsMutexUnlock(&map_mutex);
312 #endif /* HAVE_ICONV_H */
313
314 /*
315 * No iconv() support, so error out...
316 */
317
318 *destptr = '\0';
319
320 return (-1);
321 }
322
323
324 /*
325 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
326 *
327 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
328 *
329 * UTF-32 char UTF-8 char(s)
330 * --------------------------------------------------
331 * 0 to 127 = 0xxxxxxx (US-ASCII)
332 * 128 to 2047 = 110xxxxx 10yyyyyy
333 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
334 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
335 *
336 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
337 * which would convert to five- or six-octet UTF-8 sequences...
338 */
339
340 int /* O - Count or -1 on error */
341 cupsUTF8ToUTF32(
342 cups_utf32_t *dest, /* O - Target string */
343 const cups_utf8_t *src, /* I - Source string */
344 const int maxout) /* I - Max output */
345 {
346 int i; /* Looping variable */
347 cups_utf8_t ch; /* Character value */
348 cups_utf8_t next; /* Next character value */
349 cups_utf32_t ch32; /* UTF-32 character value */
350
351
352 /*
353 * Check for valid arguments and clear output...
354 */
355
356 DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", (void *)dest, src, maxout));
357
358 if (dest)
359 *dest = 0;
360
361 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
362 {
363 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
364
365 return (-1);
366 }
367
368 /*
369 * Convert input UTF-8 to output UTF-32...
370 */
371
372 for (i = maxout - 1; *src && i > 0; i --)
373 {
374 ch = *src++;
375
376 /*
377 * Convert UTF-8 character(s) to UTF-32 character...
378 */
379
380 if (!(ch & 0x80))
381 {
382 /*
383 * One-octet UTF-8 <= 127 (US-ASCII)...
384 */
385
386 *dest++ = ch;
387
388 DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
389 continue;
390 }
391 else if ((ch & 0xe0) == 0xc0)
392 {
393 /*
394 * Two-octet UTF-8 <= 2047 (Latin-x)...
395 */
396
397 next = *src++;
398 if ((next & 0xc0) != 0x80)
399 {
400 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
401
402 return (-1);
403 }
404
405 ch32 = (cups_utf32_t)((ch & 0x1f) << 6) | (cups_utf32_t)(next & 0x3f);
406
407 /*
408 * Check for non-shortest form (invalid UTF-8)...
409 */
410
411 if (ch32 < 0x80)
412 {
413 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
414
415 return (-1);
416 }
417
418 *dest++ = ch32;
419
420 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
421 src[-2], src[-1], (unsigned)ch32));
422 }
423 else if ((ch & 0xf0) == 0xe0)
424 {
425 /*
426 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
427 */
428
429 next = *src++;
430 if ((next & 0xc0) != 0x80)
431 {
432 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
433
434 return (-1);
435 }
436
437 ch32 = (cups_utf32_t)((ch & 0x0f) << 6) | (cups_utf32_t)(next & 0x3f);
438
439 next = *src++;
440 if ((next & 0xc0) != 0x80)
441 {
442 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
443
444 return (-1);
445 }
446
447 ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
448
449 /*
450 * Check for non-shortest form (invalid UTF-8)...
451 */
452
453 if (ch32 < 0x800)
454 {
455 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
456
457 return (-1);
458 }
459
460 *dest++ = ch32;
461
462 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
463 src[-3], src[-2], src[-1], (unsigned)ch32));
464 }
465 else if ((ch & 0xf8) == 0xf0)
466 {
467 /*
468 * Four-octet UTF-8...
469 */
470
471 next = *src++;
472 if ((next & 0xc0) != 0x80)
473 {
474 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
475
476 return (-1);
477 }
478
479 ch32 = (cups_utf32_t)((ch & 0x07) << 6) | (cups_utf32_t)(next & 0x3f);
480
481 next = *src++;
482 if ((next & 0xc0) != 0x80)
483 {
484 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
485
486 return (-1);
487 }
488
489 ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
490
491 next = *src++;
492 if ((next & 0xc0) != 0x80)
493 {
494 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
495
496 return (-1);
497 }
498
499 ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
500
501 /*
502 * Check for non-shortest form (invalid UTF-8)...
503 */
504
505 if (ch32 < 0x10000)
506 {
507 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
508
509 return (-1);
510 }
511
512 *dest++ = ch32;
513
514 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
515 src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
516 }
517 else
518 {
519 /*
520 * More than 4-octet (invalid UTF-8 sequence)...
521 */
522
523 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
524
525 return (-1);
526 }
527
528 /*
529 * Check for UTF-16 surrogate (illegal UTF-8)...
530 */
531
532 if (ch32 >= 0xd800 && ch32 <= 0xdfff)
533 return (-1);
534 }
535
536 *dest = 0;
537
538 DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
539
540 return (maxout - 1 - i);
541 }
542
543
544 /*
545 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
546 *
547 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
548 *
549 * UTF-32 char UTF-8 char(s)
550 * --------------------------------------------------
551 * 0 to 127 = 0xxxxxxx (US-ASCII)
552 * 128 to 2047 = 110xxxxx 10yyyyyy
553 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
554 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
555 *
556 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
557 * which would convert to five- or six-octet UTF-8 sequences...
558 */
559
560 int /* O - Count or -1 on error */
561 cupsUTF32ToUTF8(
562 cups_utf8_t *dest, /* O - Target string */
563 const cups_utf32_t *src, /* I - Source string */
564 const int maxout) /* I - Max output */
565 {
566 cups_utf8_t *start; /* Start of destination string */
567 int i; /* Looping variable */
568 int swap; /* Byte-swap input to output */
569 cups_utf32_t ch; /* Character value */
570
571
572 /*
573 * Check for valid arguments and clear output...
574 */
575
576 DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", (void *)dest, (void *)src, maxout));
577
578 if (dest)
579 *dest = '\0';
580
581 if (!dest || !src || maxout < 1)
582 {
583 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
584
585 return (-1);
586 }
587
588 /*
589 * Check for leading BOM in UTF-32 and inverted BOM...
590 */
591
592 start = dest;
593 swap = *src == 0xfffe0000;
594
595 DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
596
597 if (*src == 0xfffe0000 || *src == 0xfeff)
598 src ++;
599
600 /*
601 * Convert input UTF-32 to output UTF-8...
602 */
603
604 for (i = maxout - 1; *src && i > 0;)
605 {
606 ch = *src++;
607
608 /*
609 * Byte swap input UTF-32, if necessary...
610 * (only byte-swapping 24 of 32 bits)
611 */
612
613 if (swap)
614 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
615
616 /*
617 * Check for beyond Plane 16 (invalid UTF-32)...
618 */
619
620 if (ch > 0x10ffff)
621 {
622 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
623
624 return (-1);
625 }
626
627 /*
628 * Convert UTF-32 character to UTF-8 character(s)...
629 */
630
631 if (ch < 0x80)
632 {
633 /*
634 * One-octet UTF-8 <= 127 (US-ASCII)...
635 */
636
637 *dest++ = (cups_utf8_t)ch;
638 i --;
639
640 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
641 }
642 else if (ch < 0x800)
643 {
644 /*
645 * Two-octet UTF-8 <= 2047 (Latin-x)...
646 */
647
648 if (i < 2)
649 {
650 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
651
652 return (-1);
653 }
654
655 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
656 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
657 i -= 2;
658
659 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
660 dest[-2], dest[-1]));
661 }
662 else if (ch < 0x10000)
663 {
664 /*
665 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
666 */
667
668 if (i < 3)
669 {
670 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
671
672 return (-1);
673 }
674
675 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
676 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
677 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
678 i -= 3;
679
680 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
681 dest[-3], dest[-2], dest[-1]));
682 }
683 else
684 {
685 /*
686 * Four-octet UTF-8...
687 */
688
689 if (i < 4)
690 {
691 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
692
693 return (-1);
694 }
695
696 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
697 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
698 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
699 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
700 i -= 4;
701
702 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
703 (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
704 }
705 }
706
707 *dest = '\0';
708
709 DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
710
711 return ((int)(dest - start));
712 }
713
714
715 /*
716 * End of "$Id$"
717 */