]>
Commit | Line | Data |
---|---|---|
ac884b6a MS |
1 | // |
2 | // "$Id$" | |
3 | // | |
4 | // Shared message catalog class for the CUPS PPD Compiler. | |
5 | // | |
6 | // Copyright 2007-2008 by Apple Inc. | |
7 | // Copyright 2002-2006 by Easy Software Products. | |
8 | // | |
9 | // These coded instructions, statements, and computer programs are the | |
10 | // property of Apple Inc. and are protected by Federal copyright | |
11 | // law. Distribution and use rights are outlined in the file "LICENSE.txt" | |
12 | // which should have been included with this file. If this file is | |
13 | // file is missing or damaged, see the license at "http://www.cups.org/". | |
14 | // | |
15 | // Contents: | |
16 | // | |
17 | // ppdcCatalog::ppdcCatalog() - Create a shared message catalog. | |
18 | // ppdcCatalog::~ppdcCatalog() - Destroy a shared message catalog. | |
19 | // ppdcCatalog::add_message() - Add a new message. | |
20 | // ppdcCatalog::find_message() - Find a message in a catalog... | |
21 | // ppdcCatalog::load_messages() - Load messages from a .po file. | |
22 | // ppdcCatalog::save_messages() - Save the messages to a .po file. | |
23 | // | |
24 | ||
25 | // | |
26 | // Include necessary headers... | |
27 | // | |
28 | ||
29 | #include "ppdc.h" | |
30 | #include <cups/globals.h> | |
31 | ||
32 | ||
839a51c8 MS |
33 | // |
34 | // Character encodings... | |
35 | // | |
36 | ||
37 | typedef enum | |
38 | { | |
39 | PPDC_CS_AUTO, | |
40 | PPDC_CS_UTF8, | |
41 | PPDC_CS_UTF16BE, | |
42 | PPDC_CS_UTF16LE | |
43 | } ppdc_cs_t; | |
44 | ||
45 | ||
46 | // | |
47 | // Local functions... | |
48 | // | |
49 | ||
50 | static int get_utf8(char *&ptr); | |
51 | static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs); | |
52 | static int put_utf8(int ch, char *&ptr, char *end); | |
53 | static int put_utf16(cups_file_t *fp, int ch); | |
54 | ||
55 | ||
ac884b6a MS |
56 | // |
57 | // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog. | |
58 | // | |
59 | ||
60 | ppdcCatalog::ppdcCatalog(const char *l, // I - Locale | |
61 | const char *f) // I - Message catalog file | |
62 | : ppdcShared() | |
63 | { | |
64 | _cups_globals_t *cg = _cupsGlobals(); | |
65 | // Global information | |
66 | ||
67 | ||
68 | locale = new ppdcString(l); | |
69 | filename = new ppdcString(f); | |
70 | messages = new ppdcArray(); | |
71 | ||
72 | if (l) | |
73 | { | |
74 | // Try loading the base messages for this locale... | |
75 | char pofile[1024]; // Message catalog file | |
76 | ||
77 | ||
78 | snprintf(pofile, sizeof(pofile), "%s/%s/ppdc_%s.po", cg->localedir, l, l); | |
79 | ||
80 | if (load_messages(pofile) && strchr(l, '_')) | |
81 | { | |
82 | // Try the base locale... | |
83 | char baseloc[3]; // Base locale... | |
84 | ||
85 | ||
86 | strlcpy(baseloc, l, sizeof(baseloc)); | |
87 | snprintf(pofile, sizeof(pofile), "%s/%s/ppdc_%s.po", cg->localedir, | |
88 | baseloc, baseloc); | |
89 | ||
90 | load_messages(pofile); | |
91 | } | |
92 | } | |
93 | ||
94 | if (f) | |
95 | load_messages(f); | |
96 | } | |
97 | ||
98 | ||
99 | // | |
100 | // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog. | |
101 | // | |
102 | ||
103 | ppdcCatalog::~ppdcCatalog() | |
104 | { | |
105 | delete locale; | |
106 | delete filename; | |
107 | delete messages; | |
108 | } | |
109 | ||
110 | ||
111 | // | |
112 | // 'ppdcCatalog::add_message()' - Add a new message. | |
113 | // | |
114 | ||
115 | void | |
116 | ppdcCatalog::add_message(const char *id)// I - Message ID to add | |
117 | { | |
118 | ppdcMessage *m; // Current message | |
119 | char text[1024]; // Text to translate | |
120 | ||
121 | ||
122 | // Range check input... | |
123 | if (!id || !*id) | |
124 | return; | |
125 | ||
126 | // Verify that we don't already have the message ID... | |
127 | for (m = (ppdcMessage *)messages->first(); | |
128 | m; | |
129 | m = (ppdcMessage *)messages->next()) | |
130 | if (!strcmp(m->id->value, id)) | |
131 | return; | |
132 | ||
133 | // Add the message... | |
134 | snprintf(text, sizeof(text), "TRANSLATE %s", id); | |
135 | messages->add(new ppdcMessage(id, text)); | |
136 | } | |
137 | ||
138 | ||
139 | // | |
140 | // 'ppdcCatalog::find_message()' - Find a message in a catalog... | |
141 | // | |
142 | ||
143 | const char * // O - Message text | |
144 | ppdcCatalog::find_message( | |
145 | const char *id) // I - Message ID | |
146 | { | |
147 | ppdcMessage *m; // Current message | |
148 | ||
149 | ||
150 | for (m = (ppdcMessage *)messages->first(); | |
151 | m; | |
152 | m = (ppdcMessage *)messages->next()) | |
153 | if (!strcmp(m->id->value, id)) | |
154 | return (m->string->value); | |
155 | ||
156 | return (id); | |
157 | } | |
158 | ||
159 | ||
160 | // | |
161 | // 'ppdcCatalog::load_messages()' - Load messages from a .po file. | |
162 | // | |
163 | ||
164 | int // O - 0 on success, -1 on failure | |
165 | ppdcCatalog::load_messages( | |
166 | const char *f) // I - Message catalog file | |
167 | { | |
168 | cups_file_t *fp; // Message file | |
169 | ppdcMessage *temp; // Current message | |
170 | char line[4096], // Line buffer | |
171 | *ptr, // Pointer into buffer | |
172 | id[4096], // Translation ID | |
173 | str[4096]; // Translation string | |
174 | int linenum; // Line number | |
175 | ||
176 | ||
177 | // Open the message catalog file... | |
178 | if ((fp = cupsFileOpen(f, "r")) == NULL) | |
179 | return (-1); | |
180 | ||
ae71f5de | 181 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
839a51c8 MS |
182 | goto unknown_load_format; |
183 | else if (!strcmp(ptr, ".strings")) | |
184 | { | |
185 | /* | |
186 | * Read messages in Mac OS X ".strings" format, which are UTF-16 text | |
187 | * files of the format: | |
188 | * | |
189 | * "id" = "str"; | |
190 | * | |
191 | * Strings files can also contain C-style comments. | |
192 | */ | |
193 | ||
194 | ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file | |
195 | int ch; // Current character from file | |
196 | char *end; // End of buffer | |
197 | ||
198 | ||
199 | id[0] = '\0'; | |
200 | str[0] = '\0'; | |
201 | ptr = NULL; | |
202 | end = NULL; | |
203 | ||
204 | while ((ch = get_utf16(fp, cs)) != 0) | |
205 | { | |
206 | if (ptr) | |
207 | { | |
208 | if (ch == '\\') | |
209 | { | |
210 | if ((ch = get_utf16(fp, cs)) == 0) | |
211 | break; | |
ac884b6a | 212 | |
4509bb49 | 213 | if (ch == 'n') |
839a51c8 MS |
214 | ch = '\n'; |
215 | else if (ch == 't') | |
216 | ch = '\t'; | |
217 | } | |
4509bb49 MS |
218 | else if (ch == '\"') |
219 | { | |
220 | *ptr = '\0'; | |
221 | ptr = NULL; | |
222 | } | |
ac884b6a | 223 | |
4509bb49 MS |
224 | if (ptr) |
225 | put_utf8(ch, ptr, end); | |
839a51c8 MS |
226 | } |
227 | else if (ch == '/') | |
228 | { | |
229 | // Start of a comment? | |
230 | if ((ch = get_utf16(fp, cs)) == 0) | |
231 | break; | |
ac884b6a | 232 | |
839a51c8 MS |
233 | if (ch == '*') |
234 | { | |
235 | // Skip C comment... | |
236 | int lastch = 0; | |
ac884b6a | 237 | |
839a51c8 MS |
238 | while ((ch = get_utf16(fp, cs)) != 0) |
239 | { | |
240 | if (ch == '/' && lastch == '*') | |
241 | break; | |
ac884b6a | 242 | |
839a51c8 MS |
243 | lastch = ch; |
244 | } | |
245 | } | |
246 | else if (ch == '/') | |
247 | { | |
248 | // Skip C++ comment... | |
249 | while ((ch = get_utf16(fp, cs)) != 0) | |
250 | if (ch == '\n') | |
251 | break; | |
252 | } | |
253 | } | |
254 | else if (ch == '\"') | |
255 | { | |
4509bb49 MS |
256 | // Start quoted string... |
257 | if (id[0]) | |
839a51c8 MS |
258 | { |
259 | ptr = str; | |
260 | end = str + sizeof(str) - 1; | |
261 | } | |
262 | else | |
263 | { | |
264 | ptr = id; | |
265 | end = id + sizeof(id) - 1; | |
266 | } | |
267 | } | |
268 | else if (ch == ';') | |
269 | { | |
270 | // Add string... | |
271 | temp = new ppdcMessage(id, str); | |
ac884b6a | 272 | |
839a51c8 MS |
273 | messages->add(temp); |
274 | } | |
ac884b6a | 275 | } |
839a51c8 MS |
276 | } |
277 | else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz")) | |
278 | { | |
279 | /* | |
280 | * Read messages from the catalog file until EOF... | |
281 | * | |
282 | * The format is the GNU gettext .po format, which is fairly simple: | |
283 | * | |
284 | * msgid "some text" | |
285 | * msgstr "localized text" | |
286 | * | |
287 | * The ID and localized text can span multiple lines using the form: | |
288 | * | |
289 | * msgid "" | |
290 | * "some long text" | |
291 | * msgstr "" | |
292 | * "localized text spanning " | |
293 | * "multiple lines" | |
294 | */ | |
295 | ||
296 | linenum = 0; | |
297 | id[0] = '\0'; | |
298 | str[0] = '\0'; | |
299 | ||
300 | while (cupsFileGets(fp, line, sizeof(line))) | |
301 | { | |
302 | linenum ++; | |
ac884b6a | 303 | |
839a51c8 MS |
304 | // Skip blank and comment lines... |
305 | if (line[0] == '#' || !line[0]) | |
306 | continue; | |
ac884b6a | 307 | |
839a51c8 | 308 | // Strip the trailing quote... |
ae71f5de | 309 | if ((ptr = (char *)strrchr(line, '\"')) == NULL) |
839a51c8 MS |
310 | { |
311 | fprintf(stderr, "ERROR: Expected quoted string on line %d of %s!\n", | |
312 | linenum, f); | |
313 | cupsFileClose(fp); | |
314 | return (-1); | |
315 | } | |
ac884b6a | 316 | |
839a51c8 MS |
317 | *ptr = '\0'; |
318 | ||
319 | // Find start of value... | |
320 | if ((ptr = strchr(line, '\"')) == NULL) | |
321 | { | |
322 | fprintf(stderr, "ERROR: Expected quoted string on line %d of %s!\n", | |
323 | linenum, f); | |
324 | cupsFileClose(fp); | |
325 | return (-1); | |
326 | } | |
327 | ||
328 | ptr ++; | |
329 | ||
330 | // Unquote the text... | |
331 | char *sptr, *dptr; // Source/destination pointers | |
332 | ||
333 | for (sptr = ptr, dptr = ptr; *sptr;) | |
ac884b6a | 334 | { |
839a51c8 | 335 | if (*sptr == '\\') |
ac884b6a | 336 | { |
839a51c8 MS |
337 | sptr ++; |
338 | if (isdigit(*sptr)) | |
339 | { | |
340 | *dptr = 0; | |
341 | ||
342 | while (isdigit(*sptr)) | |
343 | { | |
344 | *dptr = *dptr * 8 + *sptr - '0'; | |
345 | sptr ++; | |
346 | } | |
ac884b6a | 347 | |
839a51c8 MS |
348 | dptr ++; |
349 | } | |
350 | else | |
ac884b6a | 351 | { |
839a51c8 MS |
352 | if (*sptr == 'n') |
353 | *dptr++ = '\n'; | |
354 | else if (*sptr == 'r') | |
355 | *dptr++ = '\r'; | |
356 | else if (*sptr == 't') | |
357 | *dptr++ = '\t'; | |
358 | else | |
359 | *dptr++ = *sptr; | |
360 | ||
ac884b6a MS |
361 | sptr ++; |
362 | } | |
ac884b6a MS |
363 | } |
364 | else | |
839a51c8 | 365 | *dptr++ = *sptr++; |
ac884b6a | 366 | } |
ac884b6a | 367 | |
839a51c8 | 368 | *dptr = '\0'; |
ac884b6a | 369 | |
839a51c8 MS |
370 | // Create or add to a message... |
371 | if (!strncmp(line, "msgid", 5)) | |
ac884b6a | 372 | { |
839a51c8 MS |
373 | if (id[0] && str[0]) |
374 | { | |
375 | temp = new ppdcMessage(id, str); | |
ac884b6a | 376 | |
839a51c8 MS |
377 | messages->add(temp); |
378 | } | |
379 | ||
380 | strlcpy(id, ptr, sizeof(id)); | |
381 | str[0] = '\0'; | |
ac884b6a | 382 | } |
839a51c8 MS |
383 | else if (!strncmp(line, "msgstr", 6)) |
384 | { | |
385 | if (!id[0]) | |
386 | { | |
387 | fprintf(stderr, "ERROR: Need a msgid line before any " | |
388 | "translation strings on line %d of %s!\n", | |
389 | linenum, f); | |
390 | cupsFileClose(fp); | |
391 | return (-1); | |
392 | } | |
ac884b6a | 393 | |
839a51c8 MS |
394 | strlcpy(str, ptr, sizeof(str)); |
395 | } | |
396 | else if (line[0] == '\"' && str[0]) | |
397 | strlcat(str, ptr, sizeof(str)); | |
398 | else if (line[0] == '\"' && id[0]) | |
399 | strlcat(id, ptr, sizeof(id)); | |
400 | else | |
ac884b6a | 401 | { |
839a51c8 | 402 | fprintf(stderr, "ERROR: Unexpected text on line %d of %s!\n", |
ac884b6a MS |
403 | linenum, f); |
404 | cupsFileClose(fp); | |
405 | return (-1); | |
406 | } | |
ac884b6a | 407 | } |
839a51c8 MS |
408 | |
409 | if (id[0] && str[0]) | |
ac884b6a | 410 | { |
839a51c8 MS |
411 | temp = new ppdcMessage(id, str); |
412 | ||
413 | messages->add(temp); | |
ac884b6a MS |
414 | } |
415 | } | |
839a51c8 MS |
416 | else |
417 | goto unknown_load_format; | |
ac884b6a | 418 | |
839a51c8 MS |
419 | /* |
420 | * Close the file and return... | |
421 | */ | |
ac884b6a MS |
422 | |
423 | cupsFileClose(fp); | |
424 | ||
425 | return (0); | |
839a51c8 MS |
426 | |
427 | /* | |
428 | * Unknown format error... | |
429 | */ | |
430 | ||
431 | unknown_load_format: | |
432 | ||
433 | fprintf(stderr, "ERROR: Unknown message catalog format for \"%s\"!\n", f); | |
434 | cupsFileClose(fp); | |
435 | return (-1); | |
ac884b6a MS |
436 | } |
437 | ||
438 | ||
439 | // | |
440 | // 'ppdcCatalog::save_messages()' - Save the messages to a .po file. | |
441 | // | |
442 | ||
443 | int // O - 0 on success, -1 on error | |
444 | ppdcCatalog::save_messages( | |
445 | const char *f) // I - File to save to | |
446 | { | |
447 | cups_file_t *fp; // Message file | |
448 | ppdcMessage *m; // Current message | |
839a51c8 MS |
449 | char *ptr; // Pointer into string |
450 | int utf16; // Output UTF-16 .strings file? | |
451 | int ch; // Current character | |
ac884b6a MS |
452 | |
453 | ||
839a51c8 | 454 | // Open the file... |
ae71f5de | 455 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
ac884b6a MS |
456 | return (-1); |
457 | ||
839a51c8 MS |
458 | if (!strcmp(ptr, ".gz")) |
459 | fp = cupsFileOpen(f, "w9"); | |
460 | else | |
461 | fp = cupsFileOpen(f, "w"); | |
462 | ||
463 | if (!fp) | |
464 | return (-1); | |
465 | ||
466 | // For .strings files, write a BOM for big-endian output... | |
467 | utf16 = !strcmp(ptr, ".strings"); | |
468 | ||
469 | if (utf16) | |
470 | put_utf16(fp, 0xfeff); | |
471 | ||
472 | // Loop through all of the messages... | |
ac884b6a MS |
473 | for (m = (ppdcMessage *)messages->first(); |
474 | m; | |
475 | m = (ppdcMessage *)messages->next()) | |
476 | { | |
839a51c8 MS |
477 | if (utf16) |
478 | { | |
479 | put_utf16(fp, '\"'); | |
ac884b6a | 480 | |
839a51c8 MS |
481 | ptr = m->id->value; |
482 | while ((ch = get_utf8(ptr)) != 0) | |
483 | switch (ch) | |
484 | { | |
485 | case '\n' : | |
486 | put_utf16(fp, '\\'); | |
487 | put_utf16(fp, 'n'); | |
488 | break; | |
489 | case '\\' : | |
490 | put_utf16(fp, '\\'); | |
491 | put_utf16(fp, '\\'); | |
492 | break; | |
493 | case '\"' : | |
494 | put_utf16(fp, '\\'); | |
495 | put_utf16(fp, '\"'); | |
496 | break; | |
497 | default : | |
498 | put_utf16(fp, ch); | |
499 | break; | |
500 | } | |
501 | ||
502 | put_utf16(fp, '\"'); | |
503 | put_utf16(fp, ' '); | |
504 | put_utf16(fp, '='); | |
505 | put_utf16(fp, ' '); | |
506 | put_utf16(fp, '\"'); | |
507 | ||
508 | ptr = m->string->value; | |
509 | while ((ch = get_utf8(ptr)) != 0) | |
510 | switch (ch) | |
511 | { | |
512 | case '\n' : | |
513 | put_utf16(fp, '\\'); | |
514 | put_utf16(fp, 'n'); | |
515 | break; | |
516 | case '\\' : | |
517 | put_utf16(fp, '\\'); | |
518 | put_utf16(fp, '\\'); | |
519 | break; | |
520 | case '\"' : | |
521 | put_utf16(fp, '\\'); | |
522 | put_utf16(fp, '\"'); | |
523 | break; | |
524 | default : | |
525 | put_utf16(fp, ch); | |
526 | break; | |
527 | } | |
ac884b6a | 528 | |
839a51c8 MS |
529 | put_utf16(fp, '\"'); |
530 | put_utf16(fp, ';'); | |
531 | put_utf16(fp, '\n'); | |
532 | } | |
533 | else | |
534 | { | |
535 | cupsFilePuts(fp, "msgid \""); | |
536 | for (ptr = m->id->value; *ptr; ptr ++) | |
537 | switch (*ptr) | |
538 | { | |
539 | case '\n' : | |
540 | cupsFilePuts(fp, "\\n"); | |
541 | break; | |
542 | case '\\' : | |
543 | cupsFilePuts(fp, "\\\\"); | |
544 | break; | |
545 | case '\"' : | |
546 | cupsFilePuts(fp, "\\\""); | |
547 | break; | |
548 | default : | |
549 | cupsFilePutChar(fp, *ptr); | |
550 | break; | |
551 | } | |
552 | cupsFilePuts(fp, "\"\n"); | |
553 | ||
554 | cupsFilePuts(fp, "msgstr \""); | |
555 | for (ptr = m->string->value; *ptr; ptr ++) | |
556 | switch (*ptr) | |
557 | { | |
558 | case '\n' : | |
559 | cupsFilePuts(fp, "\\n"); | |
560 | break; | |
561 | case '\\' : | |
562 | cupsFilePuts(fp, "\\\\"); | |
563 | break; | |
564 | case '\"' : | |
565 | cupsFilePuts(fp, "\\\""); | |
566 | break; | |
567 | default : | |
568 | cupsFilePutChar(fp, *ptr); | |
569 | break; | |
570 | } | |
571 | cupsFilePuts(fp, "\"\n"); | |
572 | ||
573 | cupsFilePutChar(fp, '\n'); | |
574 | } | |
ac884b6a MS |
575 | } |
576 | ||
577 | cupsFileClose(fp); | |
578 | ||
579 | return (0); | |
580 | } | |
581 | ||
582 | ||
839a51c8 MS |
583 | // |
584 | // 'get_utf8()' - Get a UTF-8 character. | |
585 | // | |
586 | ||
587 | static int // O - Unicode character or 0 on EOF | |
588 | get_utf8(char *&ptr) // IO - Pointer to character | |
589 | { | |
590 | int ch; // Current character | |
591 | ||
592 | ||
593 | if ((ch = *ptr++ & 255) < 0xc0) | |
594 | return (ch); | |
595 | ||
596 | if ((ch & 0xe0) == 0xc0) | |
597 | { | |
598 | // Two-byte UTF-8... | |
599 | if ((*ptr & 0xc0) != 0x80) | |
600 | return (0); | |
601 | ||
602 | ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f); | |
603 | } | |
604 | else if ((ch & 0xf0) == 0xe0) | |
605 | { | |
606 | // Three-byte UTF-8... | |
607 | if ((*ptr & 0xc0) != 0x80) | |
608 | return (0); | |
609 | ||
610 | ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f); | |
611 | ||
612 | if ((*ptr & 0xc0) != 0x80) | |
613 | return (0); | |
614 | ||
615 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
616 | } | |
617 | else if ((ch & 0xf8) == 0xf0) | |
618 | { | |
619 | // Four-byte UTF-8... | |
620 | if ((*ptr & 0xc0) != 0x80) | |
621 | return (0); | |
622 | ||
623 | ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f); | |
624 | ||
625 | if ((*ptr & 0xc0) != 0x80) | |
626 | return (0); | |
627 | ||
628 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
629 | ||
630 | if ((*ptr & 0xc0) != 0x80) | |
631 | return (0); | |
632 | ||
633 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
634 | } | |
635 | ||
636 | return (ch); | |
637 | } | |
638 | ||
639 | ||
640 | // | |
641 | // 'get_utf16()' - Get a UTF-16 character... | |
642 | // | |
643 | ||
644 | static int // O - Unicode character or 0 on EOF | |
645 | get_utf16(cups_file_t *fp, // I - File to read from | |
646 | ppdc_cs_t &cs) // IO - Character set of file | |
647 | { | |
648 | int ch; // Current character | |
649 | unsigned char buffer[3]; // Bytes | |
650 | ||
651 | ||
652 | if (cs == PPDC_CS_AUTO) | |
653 | { | |
654 | // Get byte-order-mark, if present... | |
655 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
656 | return (0); | |
657 | ||
658 | if (buffer[0] == 0xfe && buffer[1] == 0xff) | |
659 | { | |
660 | // Big-endian UTF-16... | |
661 | cs = PPDC_CS_UTF16BE; | |
662 | ||
663 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
664 | return (0); | |
665 | } | |
666 | else if (buffer[0] == 0xff && buffer[1] == 0xfe) | |
667 | { | |
668 | // Little-endian UTF-16... | |
669 | cs = PPDC_CS_UTF16LE; | |
670 | ||
671 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
672 | return (0); | |
673 | } | |
674 | else if (buffer[0] == 0x00 && buffer[1] != 0x00) | |
675 | { | |
676 | // No BOM, assume big-endian UTF-16... | |
677 | cs = PPDC_CS_UTF16BE; | |
678 | } | |
679 | else if (buffer[0] != 0x00 && buffer[1] == 0x00) | |
680 | { | |
681 | // No BOM, assume little-endian UTF-16... | |
682 | cs = PPDC_CS_UTF16LE; | |
683 | } | |
684 | else | |
685 | { | |
686 | // No BOM, assume UTF-8... | |
687 | cs = PPDC_CS_UTF8; | |
688 | ||
689 | cupsFileRewind(fp); | |
690 | } | |
691 | } | |
692 | else if (cs != PPDC_CS_UTF8) | |
693 | { | |
694 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
695 | return (0); | |
696 | } | |
697 | ||
698 | if (cs == PPDC_CS_UTF8) | |
699 | { | |
700 | // UTF-8 character... | |
4509bb49 MS |
701 | if ((ch = cupsFileGetChar(fp)) < 0) |
702 | return (0); | |
839a51c8 MS |
703 | |
704 | if ((ch & 0xe0) == 0xc0) | |
705 | { | |
706 | // Two-byte UTF-8... | |
707 | if (cupsFileRead(fp, (char *)buffer, 1) != 1) | |
708 | return (0); | |
709 | ||
710 | if ((buffer[0] & 0xc0) != 0x80) | |
711 | return (0); | |
712 | ||
713 | ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f); | |
714 | } | |
715 | else if ((ch & 0xf0) == 0xe0) | |
716 | { | |
717 | // Three-byte UTF-8... | |
718 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
719 | return (0); | |
720 | ||
721 | if ((buffer[0] & 0xc0) != 0x80 || | |
722 | (buffer[1] & 0xc0) != 0x80) | |
723 | return (0); | |
724 | ||
725 | ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) | | |
726 | (buffer[1] & 0x3f); | |
727 | } | |
728 | else if ((ch & 0xf8) == 0xf0) | |
729 | { | |
730 | // Four-byte UTF-8... | |
731 | if (cupsFileRead(fp, (char *)buffer, 3) != 3) | |
732 | return (0); | |
733 | ||
734 | if ((buffer[0] & 0xc0) != 0x80 || | |
735 | (buffer[1] & 0xc0) != 0x80 || | |
736 | (buffer[2] & 0xc0) != 0x80) | |
737 | return (0); | |
738 | ||
739 | ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) | | |
740 | (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f); | |
741 | } | |
742 | } | |
743 | else | |
744 | { | |
745 | // UTF-16 character... | |
746 | if (cs == PPDC_CS_UTF16BE) | |
747 | ch = (buffer[0] << 8) | buffer[1]; | |
748 | else | |
749 | ch = (buffer[1] << 8) | buffer[0]; | |
750 | ||
751 | if (ch >= 0xd800 && ch <= 0xdbff) | |
752 | { | |
753 | // Handle multi-word encoding... | |
754 | int lch; | |
755 | ||
756 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
757 | return (0); | |
758 | ||
759 | if (cs == PPDC_CS_UTF16BE) | |
760 | lch = (buffer[0] << 8) | buffer[1]; | |
761 | else | |
762 | lch = (buffer[1] << 8) | buffer[0]; | |
763 | ||
764 | if (lch < 0xdc00 || lch >= 0xdfff) | |
765 | return (0); | |
766 | ||
767 | ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; | |
768 | } | |
769 | } | |
770 | ||
771 | return (ch); | |
772 | } | |
773 | ||
774 | ||
775 | // | |
776 | // 'put_utf8()' - Add a UTF-8 character to a string. | |
777 | // | |
778 | ||
779 | static int // O - 0 on success, -1 on failure | |
780 | put_utf8(int ch, // I - Unicode character | |
781 | char *&ptr, // IO - String pointer | |
782 | char *end) // I - End of buffer | |
783 | { | |
784 | if (ch < 0x80) | |
785 | { | |
786 | // One-byte ASCII... | |
787 | if (ptr >= end) | |
788 | return (-1); | |
789 | ||
790 | *ptr++ = ch; | |
791 | } | |
792 | else if (ch < 0x800) | |
793 | { | |
794 | // Two-byte UTF-8... | |
795 | if ((ptr + 1) >= end) | |
796 | return (-1); | |
797 | ||
798 | *ptr++ = 0xc0 | (ch >> 6); | |
799 | *ptr++ = 0x80 | (ch & 0x3f); | |
800 | } | |
801 | else if (ch < 0x10000) | |
802 | { | |
803 | // Three-byte UTF-8... | |
804 | if ((ptr + 2) >= end) | |
805 | return (-1); | |
806 | ||
807 | *ptr++ = 0xe0 | (ch >> 12); | |
808 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
809 | *ptr++ = 0x80 | (ch & 0x3f); | |
810 | } | |
811 | else | |
812 | { | |
813 | // Four-byte UTF-8... | |
814 | if ((ptr + 3) >= end) | |
815 | return (-1); | |
816 | ||
817 | *ptr++ = 0xf0 | (ch >> 18); | |
818 | *ptr++ = 0x80 | ((ch >> 12) & 0x3f); | |
819 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
820 | *ptr++ = 0x80 | (ch & 0x3f); | |
821 | } | |
822 | ||
823 | return (0); | |
824 | } | |
825 | ||
826 | ||
827 | // | |
828 | // 'put_utf16()' - Write a UTF-16 character to a file. | |
829 | // | |
830 | ||
831 | static int // O - 0 on success, -1 on failure | |
832 | put_utf16(cups_file_t *fp, // I - File to write to | |
833 | int ch) // I - Unicode character | |
834 | { | |
835 | unsigned char buffer[4]; // Output buffer | |
836 | ||
837 | ||
838 | if (ch < 0x10000) | |
839 | { | |
840 | // One-word UTF-16 big-endian... | |
841 | buffer[0] = ch >> 8; | |
842 | buffer[1] = ch; | |
843 | ||
844 | if (cupsFileWrite(fp, (char *)buffer, 2) == 2) | |
845 | return (0); | |
846 | } | |
847 | else | |
848 | { | |
849 | // Two-word UTF-16 big-endian... | |
850 | ch -= 0x10000; | |
851 | ||
852 | buffer[0] = 0xd8 | (ch >> 18); | |
853 | buffer[1] = ch >> 10; | |
854 | buffer[2] = 0xdc | ((ch >> 8) & 0x03); | |
855 | buffer[3] = ch; | |
856 | ||
857 | if (cupsFileWrite(fp, (char *)buffer, 4) == 4) | |
858 | return (0); | |
859 | } | |
860 | ||
861 | return (-1); | |
862 | } | |
863 | ||
864 | ||
ac884b6a MS |
865 | // |
866 | // End of "$Id$". | |
867 | // |