]>
Commit | Line | Data |
---|---|---|
ac884b6a MS |
1 | // |
2 | // "$Id$" | |
3 | // | |
4 | // Shared message catalog class for the CUPS PPD Compiler. | |
5 | // | |
6 | // Copyright 2007-2008 by Apple Inc. | |
7 | // Copyright 2002-2006 by Easy Software Products. | |
8 | // | |
9 | // These coded instructions, statements, and computer programs are the | |
10 | // property of Apple Inc. and are protected by Federal copyright | |
11 | // law. Distribution and use rights are outlined in the file "LICENSE.txt" | |
12 | // which should have been included with this file. If this file is | |
13 | // file is missing or damaged, see the license at "http://www.cups.org/". | |
14 | // | |
15 | // Contents: | |
16 | // | |
17 | // ppdcCatalog::ppdcCatalog() - Create a shared message catalog. | |
18 | // ppdcCatalog::~ppdcCatalog() - Destroy a shared message catalog. | |
19 | // ppdcCatalog::add_message() - Add a new message. | |
20 | // ppdcCatalog::find_message() - Find a message in a catalog... | |
21 | // ppdcCatalog::load_messages() - Load messages from a .po file. | |
22 | // ppdcCatalog::save_messages() - Save the messages to a .po file. | |
23 | // | |
24 | ||
25 | // | |
26 | // Include necessary headers... | |
27 | // | |
28 | ||
29 | #include "ppdc.h" | |
30 | #include <cups/globals.h> | |
31 | ||
32 | ||
839a51c8 MS |
33 | // |
34 | // Character encodings... | |
35 | // | |
36 | ||
37 | typedef enum | |
38 | { | |
39 | PPDC_CS_AUTO, | |
40 | PPDC_CS_UTF8, | |
41 | PPDC_CS_UTF16BE, | |
42 | PPDC_CS_UTF16LE | |
43 | } ppdc_cs_t; | |
44 | ||
45 | ||
46 | // | |
47 | // Local functions... | |
48 | // | |
49 | ||
50 | static int get_utf8(char *&ptr); | |
51 | static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs); | |
52 | static int put_utf8(int ch, char *&ptr, char *end); | |
53 | static int put_utf16(cups_file_t *fp, int ch); | |
54 | ||
55 | ||
ac884b6a MS |
56 | // |
57 | // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog. | |
58 | // | |
59 | ||
60 | ppdcCatalog::ppdcCatalog(const char *l, // I - Locale | |
61 | const char *f) // I - Message catalog file | |
62 | : ppdcShared() | |
63 | { | |
64 | _cups_globals_t *cg = _cupsGlobals(); | |
65 | // Global information | |
66 | ||
67 | ||
68 | locale = new ppdcString(l); | |
69 | filename = new ppdcString(f); | |
70 | messages = new ppdcArray(); | |
71 | ||
72 | if (l) | |
73 | { | |
74 | // Try loading the base messages for this locale... | |
75 | char pofile[1024]; // Message catalog file | |
76 | ||
77 | ||
78 | snprintf(pofile, sizeof(pofile), "%s/%s/ppdc_%s.po", cg->localedir, l, l); | |
79 | ||
80 | if (load_messages(pofile) && strchr(l, '_')) | |
81 | { | |
82 | // Try the base locale... | |
83 | char baseloc[3]; // Base locale... | |
84 | ||
85 | ||
86 | strlcpy(baseloc, l, sizeof(baseloc)); | |
87 | snprintf(pofile, sizeof(pofile), "%s/%s/ppdc_%s.po", cg->localedir, | |
88 | baseloc, baseloc); | |
89 | ||
90 | load_messages(pofile); | |
91 | } | |
92 | } | |
93 | ||
94 | if (f) | |
95 | load_messages(f); | |
96 | } | |
97 | ||
98 | ||
99 | // | |
100 | // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog. | |
101 | // | |
102 | ||
103 | ppdcCatalog::~ppdcCatalog() | |
104 | { | |
105 | delete locale; | |
106 | delete filename; | |
107 | delete messages; | |
108 | } | |
109 | ||
110 | ||
111 | // | |
112 | // 'ppdcCatalog::add_message()' - Add a new message. | |
113 | // | |
114 | ||
115 | void | |
116 | ppdcCatalog::add_message(const char *id)// I - Message ID to add | |
117 | { | |
118 | ppdcMessage *m; // Current message | |
119 | char text[1024]; // Text to translate | |
120 | ||
121 | ||
122 | // Range check input... | |
123 | if (!id || !*id) | |
124 | return; | |
125 | ||
126 | // Verify that we don't already have the message ID... | |
127 | for (m = (ppdcMessage *)messages->first(); | |
128 | m; | |
129 | m = (ppdcMessage *)messages->next()) | |
130 | if (!strcmp(m->id->value, id)) | |
131 | return; | |
132 | ||
133 | // Add the message... | |
134 | snprintf(text, sizeof(text), "TRANSLATE %s", id); | |
135 | messages->add(new ppdcMessage(id, text)); | |
136 | } | |
137 | ||
138 | ||
139 | // | |
140 | // 'ppdcCatalog::find_message()' - Find a message in a catalog... | |
141 | // | |
142 | ||
143 | const char * // O - Message text | |
144 | ppdcCatalog::find_message( | |
145 | const char *id) // I - Message ID | |
146 | { | |
147 | ppdcMessage *m; // Current message | |
148 | ||
149 | ||
150 | for (m = (ppdcMessage *)messages->first(); | |
151 | m; | |
152 | m = (ppdcMessage *)messages->next()) | |
153 | if (!strcmp(m->id->value, id)) | |
154 | return (m->string->value); | |
155 | ||
156 | return (id); | |
157 | } | |
158 | ||
159 | ||
160 | // | |
161 | // 'ppdcCatalog::load_messages()' - Load messages from a .po file. | |
162 | // | |
163 | ||
164 | int // O - 0 on success, -1 on failure | |
165 | ppdcCatalog::load_messages( | |
166 | const char *f) // I - Message catalog file | |
167 | { | |
168 | cups_file_t *fp; // Message file | |
169 | ppdcMessage *temp; // Current message | |
170 | char line[4096], // Line buffer | |
171 | *ptr, // Pointer into buffer | |
172 | id[4096], // Translation ID | |
173 | str[4096]; // Translation string | |
174 | int linenum; // Line number | |
175 | ||
176 | ||
177 | // Open the message catalog file... | |
178 | if ((fp = cupsFileOpen(f, "r")) == NULL) | |
179 | return (-1); | |
180 | ||
ae71f5de | 181 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
839a51c8 MS |
182 | goto unknown_load_format; |
183 | else if (!strcmp(ptr, ".strings")) | |
184 | { | |
185 | /* | |
186 | * Read messages in Mac OS X ".strings" format, which are UTF-16 text | |
187 | * files of the format: | |
188 | * | |
189 | * "id" = "str"; | |
190 | * | |
191 | * Strings files can also contain C-style comments. | |
192 | */ | |
193 | ||
194 | ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file | |
195 | int ch; // Current character from file | |
196 | char *end; // End of buffer | |
197 | ||
198 | ||
199 | id[0] = '\0'; | |
200 | str[0] = '\0'; | |
201 | ptr = NULL; | |
202 | end = NULL; | |
203 | ||
204 | while ((ch = get_utf16(fp, cs)) != 0) | |
205 | { | |
206 | if (ptr) | |
207 | { | |
208 | if (ch == '\\') | |
209 | { | |
210 | if ((ch = get_utf16(fp, cs)) == 0) | |
211 | break; | |
ac884b6a | 212 | |
839a51c8 MS |
213 | if (ch == 'n') |
214 | ch = '\n'; | |
215 | else if (ch == 't') | |
216 | ch = '\t'; | |
217 | } | |
ac884b6a | 218 | |
839a51c8 MS |
219 | put_utf8(ch, ptr, end); |
220 | } | |
221 | else if (ch == '/') | |
222 | { | |
223 | // Start of a comment? | |
224 | if ((ch = get_utf16(fp, cs)) == 0) | |
225 | break; | |
ac884b6a | 226 | |
839a51c8 MS |
227 | if (ch == '*') |
228 | { | |
229 | // Skip C comment... | |
230 | int lastch = 0; | |
ac884b6a | 231 | |
839a51c8 MS |
232 | while ((ch = get_utf16(fp, cs)) != 0) |
233 | { | |
234 | if (ch == '/' && lastch == '*') | |
235 | break; | |
ac884b6a | 236 | |
839a51c8 MS |
237 | lastch = ch; |
238 | } | |
239 | } | |
240 | else if (ch == '/') | |
241 | { | |
242 | // Skip C++ comment... | |
243 | while ((ch = get_utf16(fp, cs)) != 0) | |
244 | if (ch == '\n') | |
245 | break; | |
246 | } | |
247 | } | |
248 | else if (ch == '\"') | |
249 | { | |
250 | // Start or finish quoted string... | |
251 | if (ptr) | |
252 | { | |
253 | *ptr = '\0'; | |
254 | ptr = NULL; | |
255 | } | |
256 | else if (id[0]) | |
257 | { | |
258 | ptr = str; | |
259 | end = str + sizeof(str) - 1; | |
260 | } | |
261 | else | |
262 | { | |
263 | ptr = id; | |
264 | end = id + sizeof(id) - 1; | |
265 | } | |
266 | } | |
267 | else if (ch == ';') | |
268 | { | |
269 | // Add string... | |
270 | temp = new ppdcMessage(id, str); | |
ac884b6a | 271 | |
839a51c8 MS |
272 | messages->add(temp); |
273 | } | |
ac884b6a | 274 | } |
839a51c8 MS |
275 | } |
276 | else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz")) | |
277 | { | |
278 | /* | |
279 | * Read messages from the catalog file until EOF... | |
280 | * | |
281 | * The format is the GNU gettext .po format, which is fairly simple: | |
282 | * | |
283 | * msgid "some text" | |
284 | * msgstr "localized text" | |
285 | * | |
286 | * The ID and localized text can span multiple lines using the form: | |
287 | * | |
288 | * msgid "" | |
289 | * "some long text" | |
290 | * msgstr "" | |
291 | * "localized text spanning " | |
292 | * "multiple lines" | |
293 | */ | |
294 | ||
295 | linenum = 0; | |
296 | id[0] = '\0'; | |
297 | str[0] = '\0'; | |
298 | ||
299 | while (cupsFileGets(fp, line, sizeof(line))) | |
300 | { | |
301 | linenum ++; | |
ac884b6a | 302 | |
839a51c8 MS |
303 | // Skip blank and comment lines... |
304 | if (line[0] == '#' || !line[0]) | |
305 | continue; | |
ac884b6a | 306 | |
839a51c8 | 307 | // Strip the trailing quote... |
ae71f5de | 308 | if ((ptr = (char *)strrchr(line, '\"')) == NULL) |
839a51c8 MS |
309 | { |
310 | fprintf(stderr, "ERROR: Expected quoted string on line %d of %s!\n", | |
311 | linenum, f); | |
312 | cupsFileClose(fp); | |
313 | return (-1); | |
314 | } | |
ac884b6a | 315 | |
839a51c8 MS |
316 | *ptr = '\0'; |
317 | ||
318 | // Find start of value... | |
319 | if ((ptr = strchr(line, '\"')) == NULL) | |
320 | { | |
321 | fprintf(stderr, "ERROR: Expected quoted string on line %d of %s!\n", | |
322 | linenum, f); | |
323 | cupsFileClose(fp); | |
324 | return (-1); | |
325 | } | |
326 | ||
327 | ptr ++; | |
328 | ||
329 | // Unquote the text... | |
330 | char *sptr, *dptr; // Source/destination pointers | |
331 | ||
332 | for (sptr = ptr, dptr = ptr; *sptr;) | |
ac884b6a | 333 | { |
839a51c8 | 334 | if (*sptr == '\\') |
ac884b6a | 335 | { |
839a51c8 MS |
336 | sptr ++; |
337 | if (isdigit(*sptr)) | |
338 | { | |
339 | *dptr = 0; | |
340 | ||
341 | while (isdigit(*sptr)) | |
342 | { | |
343 | *dptr = *dptr * 8 + *sptr - '0'; | |
344 | sptr ++; | |
345 | } | |
ac884b6a | 346 | |
839a51c8 MS |
347 | dptr ++; |
348 | } | |
349 | else | |
ac884b6a | 350 | { |
839a51c8 MS |
351 | if (*sptr == 'n') |
352 | *dptr++ = '\n'; | |
353 | else if (*sptr == 'r') | |
354 | *dptr++ = '\r'; | |
355 | else if (*sptr == 't') | |
356 | *dptr++ = '\t'; | |
357 | else | |
358 | *dptr++ = *sptr; | |
359 | ||
ac884b6a MS |
360 | sptr ++; |
361 | } | |
ac884b6a MS |
362 | } |
363 | else | |
839a51c8 | 364 | *dptr++ = *sptr++; |
ac884b6a | 365 | } |
ac884b6a | 366 | |
839a51c8 | 367 | *dptr = '\0'; |
ac884b6a | 368 | |
839a51c8 MS |
369 | // Create or add to a message... |
370 | if (!strncmp(line, "msgid", 5)) | |
ac884b6a | 371 | { |
839a51c8 MS |
372 | if (id[0] && str[0]) |
373 | { | |
374 | temp = new ppdcMessage(id, str); | |
ac884b6a | 375 | |
839a51c8 MS |
376 | messages->add(temp); |
377 | } | |
378 | ||
379 | strlcpy(id, ptr, sizeof(id)); | |
380 | str[0] = '\0'; | |
ac884b6a | 381 | } |
839a51c8 MS |
382 | else if (!strncmp(line, "msgstr", 6)) |
383 | { | |
384 | if (!id[0]) | |
385 | { | |
386 | fprintf(stderr, "ERROR: Need a msgid line before any " | |
387 | "translation strings on line %d of %s!\n", | |
388 | linenum, f); | |
389 | cupsFileClose(fp); | |
390 | return (-1); | |
391 | } | |
ac884b6a | 392 | |
839a51c8 MS |
393 | strlcpy(str, ptr, sizeof(str)); |
394 | } | |
395 | else if (line[0] == '\"' && str[0]) | |
396 | strlcat(str, ptr, sizeof(str)); | |
397 | else if (line[0] == '\"' && id[0]) | |
398 | strlcat(id, ptr, sizeof(id)); | |
399 | else | |
ac884b6a | 400 | { |
839a51c8 | 401 | fprintf(stderr, "ERROR: Unexpected text on line %d of %s!\n", |
ac884b6a MS |
402 | linenum, f); |
403 | cupsFileClose(fp); | |
404 | return (-1); | |
405 | } | |
ac884b6a | 406 | } |
839a51c8 MS |
407 | |
408 | if (id[0] && str[0]) | |
ac884b6a | 409 | { |
839a51c8 MS |
410 | temp = new ppdcMessage(id, str); |
411 | ||
412 | messages->add(temp); | |
ac884b6a MS |
413 | } |
414 | } | |
839a51c8 MS |
415 | else |
416 | goto unknown_load_format; | |
ac884b6a | 417 | |
839a51c8 MS |
418 | /* |
419 | * Close the file and return... | |
420 | */ | |
ac884b6a MS |
421 | |
422 | cupsFileClose(fp); | |
423 | ||
424 | return (0); | |
839a51c8 MS |
425 | |
426 | /* | |
427 | * Unknown format error... | |
428 | */ | |
429 | ||
430 | unknown_load_format: | |
431 | ||
432 | fprintf(stderr, "ERROR: Unknown message catalog format for \"%s\"!\n", f); | |
433 | cupsFileClose(fp); | |
434 | return (-1); | |
ac884b6a MS |
435 | } |
436 | ||
437 | ||
438 | // | |
439 | // 'ppdcCatalog::save_messages()' - Save the messages to a .po file. | |
440 | // | |
441 | ||
442 | int // O - 0 on success, -1 on error | |
443 | ppdcCatalog::save_messages( | |
444 | const char *f) // I - File to save to | |
445 | { | |
446 | cups_file_t *fp; // Message file | |
447 | ppdcMessage *m; // Current message | |
839a51c8 MS |
448 | char *ptr; // Pointer into string |
449 | int utf16; // Output UTF-16 .strings file? | |
450 | int ch; // Current character | |
ac884b6a MS |
451 | |
452 | ||
839a51c8 | 453 | // Open the file... |
ae71f5de | 454 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
ac884b6a MS |
455 | return (-1); |
456 | ||
839a51c8 MS |
457 | if (!strcmp(ptr, ".gz")) |
458 | fp = cupsFileOpen(f, "w9"); | |
459 | else | |
460 | fp = cupsFileOpen(f, "w"); | |
461 | ||
462 | if (!fp) | |
463 | return (-1); | |
464 | ||
465 | // For .strings files, write a BOM for big-endian output... | |
466 | utf16 = !strcmp(ptr, ".strings"); | |
467 | ||
468 | if (utf16) | |
469 | put_utf16(fp, 0xfeff); | |
470 | ||
471 | // Loop through all of the messages... | |
ac884b6a MS |
472 | for (m = (ppdcMessage *)messages->first(); |
473 | m; | |
474 | m = (ppdcMessage *)messages->next()) | |
475 | { | |
839a51c8 MS |
476 | if (utf16) |
477 | { | |
478 | put_utf16(fp, '\"'); | |
ac884b6a | 479 | |
839a51c8 MS |
480 | ptr = m->id->value; |
481 | while ((ch = get_utf8(ptr)) != 0) | |
482 | switch (ch) | |
483 | { | |
484 | case '\n' : | |
485 | put_utf16(fp, '\\'); | |
486 | put_utf16(fp, 'n'); | |
487 | break; | |
488 | case '\\' : | |
489 | put_utf16(fp, '\\'); | |
490 | put_utf16(fp, '\\'); | |
491 | break; | |
492 | case '\"' : | |
493 | put_utf16(fp, '\\'); | |
494 | put_utf16(fp, '\"'); | |
495 | break; | |
496 | default : | |
497 | put_utf16(fp, ch); | |
498 | break; | |
499 | } | |
500 | ||
501 | put_utf16(fp, '\"'); | |
502 | put_utf16(fp, ' '); | |
503 | put_utf16(fp, '='); | |
504 | put_utf16(fp, ' '); | |
505 | put_utf16(fp, '\"'); | |
506 | ||
507 | ptr = m->string->value; | |
508 | while ((ch = get_utf8(ptr)) != 0) | |
509 | switch (ch) | |
510 | { | |
511 | case '\n' : | |
512 | put_utf16(fp, '\\'); | |
513 | put_utf16(fp, 'n'); | |
514 | break; | |
515 | case '\\' : | |
516 | put_utf16(fp, '\\'); | |
517 | put_utf16(fp, '\\'); | |
518 | break; | |
519 | case '\"' : | |
520 | put_utf16(fp, '\\'); | |
521 | put_utf16(fp, '\"'); | |
522 | break; | |
523 | default : | |
524 | put_utf16(fp, ch); | |
525 | break; | |
526 | } | |
ac884b6a | 527 | |
839a51c8 MS |
528 | put_utf16(fp, '\"'); |
529 | put_utf16(fp, ';'); | |
530 | put_utf16(fp, '\n'); | |
531 | } | |
532 | else | |
533 | { | |
534 | cupsFilePuts(fp, "msgid \""); | |
535 | for (ptr = m->id->value; *ptr; ptr ++) | |
536 | switch (*ptr) | |
537 | { | |
538 | case '\n' : | |
539 | cupsFilePuts(fp, "\\n"); | |
540 | break; | |
541 | case '\\' : | |
542 | cupsFilePuts(fp, "\\\\"); | |
543 | break; | |
544 | case '\"' : | |
545 | cupsFilePuts(fp, "\\\""); | |
546 | break; | |
547 | default : | |
548 | cupsFilePutChar(fp, *ptr); | |
549 | break; | |
550 | } | |
551 | cupsFilePuts(fp, "\"\n"); | |
552 | ||
553 | cupsFilePuts(fp, "msgstr \""); | |
554 | for (ptr = m->string->value; *ptr; ptr ++) | |
555 | switch (*ptr) | |
556 | { | |
557 | case '\n' : | |
558 | cupsFilePuts(fp, "\\n"); | |
559 | break; | |
560 | case '\\' : | |
561 | cupsFilePuts(fp, "\\\\"); | |
562 | break; | |
563 | case '\"' : | |
564 | cupsFilePuts(fp, "\\\""); | |
565 | break; | |
566 | default : | |
567 | cupsFilePutChar(fp, *ptr); | |
568 | break; | |
569 | } | |
570 | cupsFilePuts(fp, "\"\n"); | |
571 | ||
572 | cupsFilePutChar(fp, '\n'); | |
573 | } | |
ac884b6a MS |
574 | } |
575 | ||
576 | cupsFileClose(fp); | |
577 | ||
578 | return (0); | |
579 | } | |
580 | ||
581 | ||
839a51c8 MS |
582 | // |
583 | // 'get_utf8()' - Get a UTF-8 character. | |
584 | // | |
585 | ||
586 | static int // O - Unicode character or 0 on EOF | |
587 | get_utf8(char *&ptr) // IO - Pointer to character | |
588 | { | |
589 | int ch; // Current character | |
590 | ||
591 | ||
592 | if ((ch = *ptr++ & 255) < 0xc0) | |
593 | return (ch); | |
594 | ||
595 | if ((ch & 0xe0) == 0xc0) | |
596 | { | |
597 | // Two-byte UTF-8... | |
598 | if ((*ptr & 0xc0) != 0x80) | |
599 | return (0); | |
600 | ||
601 | ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f); | |
602 | } | |
603 | else if ((ch & 0xf0) == 0xe0) | |
604 | { | |
605 | // Three-byte UTF-8... | |
606 | if ((*ptr & 0xc0) != 0x80) | |
607 | return (0); | |
608 | ||
609 | ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f); | |
610 | ||
611 | if ((*ptr & 0xc0) != 0x80) | |
612 | return (0); | |
613 | ||
614 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
615 | } | |
616 | else if ((ch & 0xf8) == 0xf0) | |
617 | { | |
618 | // Four-byte UTF-8... | |
619 | if ((*ptr & 0xc0) != 0x80) | |
620 | return (0); | |
621 | ||
622 | ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f); | |
623 | ||
624 | if ((*ptr & 0xc0) != 0x80) | |
625 | return (0); | |
626 | ||
627 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
628 | ||
629 | if ((*ptr & 0xc0) != 0x80) | |
630 | return (0); | |
631 | ||
632 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
633 | } | |
634 | ||
635 | return (ch); | |
636 | } | |
637 | ||
638 | ||
639 | // | |
640 | // 'get_utf16()' - Get a UTF-16 character... | |
641 | // | |
642 | ||
643 | static int // O - Unicode character or 0 on EOF | |
644 | get_utf16(cups_file_t *fp, // I - File to read from | |
645 | ppdc_cs_t &cs) // IO - Character set of file | |
646 | { | |
647 | int ch; // Current character | |
648 | unsigned char buffer[3]; // Bytes | |
649 | ||
650 | ||
651 | if (cs == PPDC_CS_AUTO) | |
652 | { | |
653 | // Get byte-order-mark, if present... | |
654 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
655 | return (0); | |
656 | ||
657 | if (buffer[0] == 0xfe && buffer[1] == 0xff) | |
658 | { | |
659 | // Big-endian UTF-16... | |
660 | cs = PPDC_CS_UTF16BE; | |
661 | ||
662 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
663 | return (0); | |
664 | } | |
665 | else if (buffer[0] == 0xff && buffer[1] == 0xfe) | |
666 | { | |
667 | // Little-endian UTF-16... | |
668 | cs = PPDC_CS_UTF16LE; | |
669 | ||
670 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
671 | return (0); | |
672 | } | |
673 | else if (buffer[0] == 0x00 && buffer[1] != 0x00) | |
674 | { | |
675 | // No BOM, assume big-endian UTF-16... | |
676 | cs = PPDC_CS_UTF16BE; | |
677 | } | |
678 | else if (buffer[0] != 0x00 && buffer[1] == 0x00) | |
679 | { | |
680 | // No BOM, assume little-endian UTF-16... | |
681 | cs = PPDC_CS_UTF16LE; | |
682 | } | |
683 | else | |
684 | { | |
685 | // No BOM, assume UTF-8... | |
686 | cs = PPDC_CS_UTF8; | |
687 | ||
688 | cupsFileRewind(fp); | |
689 | } | |
690 | } | |
691 | else if (cs != PPDC_CS_UTF8) | |
692 | { | |
693 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
694 | return (0); | |
695 | } | |
696 | ||
697 | if (cs == PPDC_CS_UTF8) | |
698 | { | |
699 | // UTF-8 character... | |
700 | ch = cupsFileGetChar(fp); | |
701 | ||
702 | if ((ch & 0xe0) == 0xc0) | |
703 | { | |
704 | // Two-byte UTF-8... | |
705 | if (cupsFileRead(fp, (char *)buffer, 1) != 1) | |
706 | return (0); | |
707 | ||
708 | if ((buffer[0] & 0xc0) != 0x80) | |
709 | return (0); | |
710 | ||
711 | ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f); | |
712 | } | |
713 | else if ((ch & 0xf0) == 0xe0) | |
714 | { | |
715 | // Three-byte UTF-8... | |
716 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
717 | return (0); | |
718 | ||
719 | if ((buffer[0] & 0xc0) != 0x80 || | |
720 | (buffer[1] & 0xc0) != 0x80) | |
721 | return (0); | |
722 | ||
723 | ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) | | |
724 | (buffer[1] & 0x3f); | |
725 | } | |
726 | else if ((ch & 0xf8) == 0xf0) | |
727 | { | |
728 | // Four-byte UTF-8... | |
729 | if (cupsFileRead(fp, (char *)buffer, 3) != 3) | |
730 | return (0); | |
731 | ||
732 | if ((buffer[0] & 0xc0) != 0x80 || | |
733 | (buffer[1] & 0xc0) != 0x80 || | |
734 | (buffer[2] & 0xc0) != 0x80) | |
735 | return (0); | |
736 | ||
737 | ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) | | |
738 | (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f); | |
739 | } | |
740 | } | |
741 | else | |
742 | { | |
743 | // UTF-16 character... | |
744 | if (cs == PPDC_CS_UTF16BE) | |
745 | ch = (buffer[0] << 8) | buffer[1]; | |
746 | else | |
747 | ch = (buffer[1] << 8) | buffer[0]; | |
748 | ||
749 | if (ch >= 0xd800 && ch <= 0xdbff) | |
750 | { | |
751 | // Handle multi-word encoding... | |
752 | int lch; | |
753 | ||
754 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
755 | return (0); | |
756 | ||
757 | if (cs == PPDC_CS_UTF16BE) | |
758 | lch = (buffer[0] << 8) | buffer[1]; | |
759 | else | |
760 | lch = (buffer[1] << 8) | buffer[0]; | |
761 | ||
762 | if (lch < 0xdc00 || lch >= 0xdfff) | |
763 | return (0); | |
764 | ||
765 | ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; | |
766 | } | |
767 | } | |
768 | ||
769 | return (ch); | |
770 | } | |
771 | ||
772 | ||
773 | // | |
774 | // 'put_utf8()' - Add a UTF-8 character to a string. | |
775 | // | |
776 | ||
777 | static int // O - 0 on success, -1 on failure | |
778 | put_utf8(int ch, // I - Unicode character | |
779 | char *&ptr, // IO - String pointer | |
780 | char *end) // I - End of buffer | |
781 | { | |
782 | if (ch < 0x80) | |
783 | { | |
784 | // One-byte ASCII... | |
785 | if (ptr >= end) | |
786 | return (-1); | |
787 | ||
788 | *ptr++ = ch; | |
789 | } | |
790 | else if (ch < 0x800) | |
791 | { | |
792 | // Two-byte UTF-8... | |
793 | if ((ptr + 1) >= end) | |
794 | return (-1); | |
795 | ||
796 | *ptr++ = 0xc0 | (ch >> 6); | |
797 | *ptr++ = 0x80 | (ch & 0x3f); | |
798 | } | |
799 | else if (ch < 0x10000) | |
800 | { | |
801 | // Three-byte UTF-8... | |
802 | if ((ptr + 2) >= end) | |
803 | return (-1); | |
804 | ||
805 | *ptr++ = 0xe0 | (ch >> 12); | |
806 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
807 | *ptr++ = 0x80 | (ch & 0x3f); | |
808 | } | |
809 | else | |
810 | { | |
811 | // Four-byte UTF-8... | |
812 | if ((ptr + 3) >= end) | |
813 | return (-1); | |
814 | ||
815 | *ptr++ = 0xf0 | (ch >> 18); | |
816 | *ptr++ = 0x80 | ((ch >> 12) & 0x3f); | |
817 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
818 | *ptr++ = 0x80 | (ch & 0x3f); | |
819 | } | |
820 | ||
821 | return (0); | |
822 | } | |
823 | ||
824 | ||
825 | // | |
826 | // 'put_utf16()' - Write a UTF-16 character to a file. | |
827 | // | |
828 | ||
829 | static int // O - 0 on success, -1 on failure | |
830 | put_utf16(cups_file_t *fp, // I - File to write to | |
831 | int ch) // I - Unicode character | |
832 | { | |
833 | unsigned char buffer[4]; // Output buffer | |
834 | ||
835 | ||
836 | if (ch < 0x10000) | |
837 | { | |
838 | // One-word UTF-16 big-endian... | |
839 | buffer[0] = ch >> 8; | |
840 | buffer[1] = ch; | |
841 | ||
842 | if (cupsFileWrite(fp, (char *)buffer, 2) == 2) | |
843 | return (0); | |
844 | } | |
845 | else | |
846 | { | |
847 | // Two-word UTF-16 big-endian... | |
848 | ch -= 0x10000; | |
849 | ||
850 | buffer[0] = 0xd8 | (ch >> 18); | |
851 | buffer[1] = ch >> 10; | |
852 | buffer[2] = 0xdc | ((ch >> 8) & 0x03); | |
853 | buffer[3] = ch; | |
854 | ||
855 | if (cupsFileWrite(fp, (char *)buffer, 4) == 4) | |
856 | return (0); | |
857 | } | |
858 | ||
859 | return (-1); | |
860 | } | |
861 | ||
862 | ||
ac884b6a MS |
863 | // |
864 | // End of "$Id$". | |
865 | // |