]>
Commit | Line | Data |
---|---|---|
ac884b6a MS |
1 | // |
2 | // "$Id$" | |
3 | // | |
4 | // Shared message catalog class for the CUPS PPD Compiler. | |
5 | // | |
0837b7e8 | 6 | // Copyright 2007-2010 by Apple Inc. |
ac884b6a MS |
7 | // Copyright 2002-2006 by Easy Software Products. |
8 | // | |
9 | // These coded instructions, statements, and computer programs are the | |
10 | // property of Apple Inc. and are protected by Federal copyright | |
11 | // law. Distribution and use rights are outlined in the file "LICENSE.txt" | |
12 | // which should have been included with this file. If this file is | |
13 | // file is missing or damaged, see the license at "http://www.cups.org/". | |
14 | // | |
15 | // Contents: | |
16 | // | |
e6013cfa MS |
17 | // ppdcCatalog::ppdcCatalog() - Create a shared message catalog. |
18 | // ppdcCatalog::~ppdcCatalog() - Destroy a shared message catalog. | |
19 | // ppdcCatalog::add_message() - Add a new message. | |
20 | // ppdcCatalog::find_message() - Find a message in a catalog... | |
21 | // ppdcCatalog::load_messages() - Load messages from a .po file. | |
22 | // ppdcCatalog::save_messages() - Save the messages to a .po file. | |
23 | // get_utf8() - Get a UTF-8 character. | |
24 | // get_utf16() - Get a UTF-16 character... | |
25 | // put_utf8() - Add a UTF-8 character to a string. | |
26 | // put_utf16() - Write a UTF-16 character to a file. | |
ac884b6a MS |
27 | // |
28 | ||
29 | // | |
30 | // Include necessary headers... | |
31 | // | |
32 | ||
38e73f87 | 33 | #include "ppdc-private.h" |
ac884b6a MS |
34 | |
35 | ||
839a51c8 MS |
36 | // |
37 | // Character encodings... | |
38 | // | |
39 | ||
40 | typedef enum | |
41 | { | |
42 | PPDC_CS_AUTO, | |
43 | PPDC_CS_UTF8, | |
44 | PPDC_CS_UTF16BE, | |
45 | PPDC_CS_UTF16LE | |
46 | } ppdc_cs_t; | |
47 | ||
48 | ||
49 | // | |
50 | // Local functions... | |
51 | // | |
52 | ||
53 | static int get_utf8(char *&ptr); | |
54 | static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs); | |
55 | static int put_utf8(int ch, char *&ptr, char *end); | |
56 | static int put_utf16(cups_file_t *fp, int ch); | |
57 | ||
58 | ||
ac884b6a MS |
59 | // |
60 | // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog. | |
61 | // | |
62 | ||
63 | ppdcCatalog::ppdcCatalog(const char *l, // I - Locale | |
64 | const char *f) // I - Message catalog file | |
65 | : ppdcShared() | |
66 | { | |
67 | _cups_globals_t *cg = _cupsGlobals(); | |
68 | // Global information | |
69 | ||
70 | ||
94da7e34 MS |
71 | PPDC_NEW; |
72 | ||
ac884b6a MS |
73 | locale = new ppdcString(l); |
74 | filename = new ppdcString(f); | |
75 | messages = new ppdcArray(); | |
76 | ||
77 | if (l) | |
78 | { | |
79 | // Try loading the base messages for this locale... | |
80 | char pofile[1024]; // Message catalog file | |
81 | ||
82 | ||
61cf44e2 | 83 | snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, l, l); |
ac884b6a MS |
84 | |
85 | if (load_messages(pofile) && strchr(l, '_')) | |
86 | { | |
87 | // Try the base locale... | |
88 | char baseloc[3]; // Base locale... | |
89 | ||
90 | ||
91 | strlcpy(baseloc, l, sizeof(baseloc)); | |
61cf44e2 | 92 | snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, |
ac884b6a MS |
93 | baseloc, baseloc); |
94 | ||
95 | load_messages(pofile); | |
96 | } | |
97 | } | |
98 | ||
99 | if (f) | |
100 | load_messages(f); | |
101 | } | |
102 | ||
103 | ||
104 | // | |
105 | // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog. | |
106 | // | |
107 | ||
108 | ppdcCatalog::~ppdcCatalog() | |
109 | { | |
94da7e34 MS |
110 | PPDC_DELETE; |
111 | ||
e4572d57 MS |
112 | locale->release(); |
113 | filename->release(); | |
114 | messages->release(); | |
ac884b6a MS |
115 | } |
116 | ||
117 | ||
118 | // | |
119 | // 'ppdcCatalog::add_message()' - Add a new message. | |
120 | // | |
121 | ||
122 | void | |
61cf44e2 MS |
123 | ppdcCatalog::add_message( |
124 | const char *id, // I - Message ID to add | |
125 | const char *string) // I - Translation string | |
ac884b6a MS |
126 | { |
127 | ppdcMessage *m; // Current message | |
128 | char text[1024]; // Text to translate | |
129 | ||
130 | ||
131 | // Range check input... | |
61cf44e2 | 132 | if (!id) |
ac884b6a MS |
133 | return; |
134 | ||
135 | // Verify that we don't already have the message ID... | |
136 | for (m = (ppdcMessage *)messages->first(); | |
137 | m; | |
138 | m = (ppdcMessage *)messages->next()) | |
139 | if (!strcmp(m->id->value, id)) | |
61cf44e2 MS |
140 | { |
141 | if (string) | |
142 | { | |
143 | m->string->release(); | |
144 | m->string = new ppdcString(string); | |
145 | } | |
ac884b6a | 146 | return; |
61cf44e2 | 147 | } |
ac884b6a MS |
148 | |
149 | // Add the message... | |
61cf44e2 MS |
150 | if (!string) |
151 | { | |
152 | snprintf(text, sizeof(text), "TRANSLATE %s", id); | |
153 | string = text; | |
154 | } | |
155 | ||
e6013cfa | 156 | messages->add(new ppdcMessage(id, string)); |
ac884b6a MS |
157 | } |
158 | ||
159 | ||
160 | // | |
161 | // 'ppdcCatalog::find_message()' - Find a message in a catalog... | |
162 | // | |
163 | ||
164 | const char * // O - Message text | |
165 | ppdcCatalog::find_message( | |
166 | const char *id) // I - Message ID | |
167 | { | |
168 | ppdcMessage *m; // Current message | |
169 | ||
170 | ||
ef55b745 MS |
171 | if (!*id) |
172 | return (id); | |
173 | ||
ac884b6a MS |
174 | for (m = (ppdcMessage *)messages->first(); |
175 | m; | |
176 | m = (ppdcMessage *)messages->next()) | |
177 | if (!strcmp(m->id->value, id)) | |
178 | return (m->string->value); | |
179 | ||
180 | return (id); | |
181 | } | |
182 | ||
183 | ||
184 | // | |
185 | // 'ppdcCatalog::load_messages()' - Load messages from a .po file. | |
186 | // | |
187 | ||
188 | int // O - 0 on success, -1 on failure | |
189 | ppdcCatalog::load_messages( | |
190 | const char *f) // I - Message catalog file | |
191 | { | |
192 | cups_file_t *fp; // Message file | |
ac884b6a MS |
193 | char line[4096], // Line buffer |
194 | *ptr, // Pointer into buffer | |
195 | id[4096], // Translation ID | |
196 | str[4096]; // Translation string | |
197 | int linenum; // Line number | |
198 | ||
199 | ||
200 | // Open the message catalog file... | |
201 | if ((fp = cupsFileOpen(f, "r")) == NULL) | |
202 | return (-1); | |
203 | ||
ae71f5de | 204 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
839a51c8 MS |
205 | goto unknown_load_format; |
206 | else if (!strcmp(ptr, ".strings")) | |
207 | { | |
208 | /* | |
209 | * Read messages in Mac OS X ".strings" format, which are UTF-16 text | |
210 | * files of the format: | |
211 | * | |
212 | * "id" = "str"; | |
213 | * | |
214 | * Strings files can also contain C-style comments. | |
215 | */ | |
216 | ||
217 | ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file | |
218 | int ch; // Current character from file | |
219 | char *end; // End of buffer | |
220 | ||
221 | ||
222 | id[0] = '\0'; | |
223 | str[0] = '\0'; | |
224 | ptr = NULL; | |
225 | end = NULL; | |
226 | ||
227 | while ((ch = get_utf16(fp, cs)) != 0) | |
228 | { | |
229 | if (ptr) | |
230 | { | |
231 | if (ch == '\\') | |
232 | { | |
233 | if ((ch = get_utf16(fp, cs)) == 0) | |
234 | break; | |
ac884b6a | 235 | |
4509bb49 | 236 | if (ch == 'n') |
839a51c8 MS |
237 | ch = '\n'; |
238 | else if (ch == 't') | |
239 | ch = '\t'; | |
240 | } | |
4509bb49 MS |
241 | else if (ch == '\"') |
242 | { | |
243 | *ptr = '\0'; | |
244 | ptr = NULL; | |
245 | } | |
ac884b6a | 246 | |
4509bb49 MS |
247 | if (ptr) |
248 | put_utf8(ch, ptr, end); | |
839a51c8 MS |
249 | } |
250 | else if (ch == '/') | |
251 | { | |
252 | // Start of a comment? | |
253 | if ((ch = get_utf16(fp, cs)) == 0) | |
254 | break; | |
ac884b6a | 255 | |
839a51c8 MS |
256 | if (ch == '*') |
257 | { | |
258 | // Skip C comment... | |
259 | int lastch = 0; | |
ac884b6a | 260 | |
839a51c8 MS |
261 | while ((ch = get_utf16(fp, cs)) != 0) |
262 | { | |
263 | if (ch == '/' && lastch == '*') | |
264 | break; | |
ac884b6a | 265 | |
839a51c8 MS |
266 | lastch = ch; |
267 | } | |
268 | } | |
269 | else if (ch == '/') | |
270 | { | |
271 | // Skip C++ comment... | |
272 | while ((ch = get_utf16(fp, cs)) != 0) | |
273 | if (ch == '\n') | |
274 | break; | |
275 | } | |
276 | } | |
277 | else if (ch == '\"') | |
278 | { | |
4509bb49 MS |
279 | // Start quoted string... |
280 | if (id[0]) | |
839a51c8 MS |
281 | { |
282 | ptr = str; | |
283 | end = str + sizeof(str) - 1; | |
284 | } | |
285 | else | |
286 | { | |
287 | ptr = id; | |
288 | end = id + sizeof(id) - 1; | |
289 | } | |
290 | } | |
291 | else if (ch == ';') | |
292 | { | |
293 | // Add string... | |
61cf44e2 | 294 | add_message(id, str); |
e6013cfa | 295 | id[0] = '\0'; |
839a51c8 | 296 | } |
ac884b6a | 297 | } |
839a51c8 MS |
298 | } |
299 | else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz")) | |
300 | { | |
301 | /* | |
302 | * Read messages from the catalog file until EOF... | |
303 | * | |
304 | * The format is the GNU gettext .po format, which is fairly simple: | |
305 | * | |
306 | * msgid "some text" | |
307 | * msgstr "localized text" | |
308 | * | |
309 | * The ID and localized text can span multiple lines using the form: | |
310 | * | |
311 | * msgid "" | |
312 | * "some long text" | |
313 | * msgstr "" | |
314 | * "localized text spanning " | |
315 | * "multiple lines" | |
316 | */ | |
317 | ||
61cf44e2 MS |
318 | int which, // In msgid? |
319 | haveid, // Did we get a msgid string? | |
320 | havestr; // Did we get a msgstr string? | |
321 | ||
839a51c8 MS |
322 | linenum = 0; |
323 | id[0] = '\0'; | |
324 | str[0] = '\0'; | |
61cf44e2 MS |
325 | haveid = 0; |
326 | havestr = 0; | |
327 | which = 0; | |
839a51c8 MS |
328 | |
329 | while (cupsFileGets(fp, line, sizeof(line))) | |
330 | { | |
331 | linenum ++; | |
ac884b6a | 332 | |
839a51c8 MS |
333 | // Skip blank and comment lines... |
334 | if (line[0] == '#' || !line[0]) | |
335 | continue; | |
ac884b6a | 336 | |
839a51c8 | 337 | // Strip the trailing quote... |
ae71f5de | 338 | if ((ptr = (char *)strrchr(line, '\"')) == NULL) |
839a51c8 | 339 | { |
61cf44e2 | 340 | _cupsLangPrintf(stderr, |
0837b7e8 | 341 | _("ppdc: Expected quoted string on line %d of %s."), |
61cf44e2 | 342 | linenum, f); |
839a51c8 MS |
343 | cupsFileClose(fp); |
344 | return (-1); | |
345 | } | |
ac884b6a | 346 | |
839a51c8 MS |
347 | *ptr = '\0'; |
348 | ||
349 | // Find start of value... | |
350 | if ((ptr = strchr(line, '\"')) == NULL) | |
351 | { | |
61cf44e2 | 352 | _cupsLangPrintf(stderr, |
0837b7e8 | 353 | _("ppdc: Expected quoted string on line %d of %s."), |
61cf44e2 | 354 | linenum, f); |
839a51c8 MS |
355 | cupsFileClose(fp); |
356 | return (-1); | |
357 | } | |
358 | ||
359 | ptr ++; | |
360 | ||
361 | // Unquote the text... | |
362 | char *sptr, *dptr; // Source/destination pointers | |
363 | ||
364 | for (sptr = ptr, dptr = ptr; *sptr;) | |
ac884b6a | 365 | { |
839a51c8 | 366 | if (*sptr == '\\') |
ac884b6a | 367 | { |
839a51c8 MS |
368 | sptr ++; |
369 | if (isdigit(*sptr)) | |
370 | { | |
371 | *dptr = 0; | |
372 | ||
373 | while (isdigit(*sptr)) | |
374 | { | |
375 | *dptr = *dptr * 8 + *sptr - '0'; | |
376 | sptr ++; | |
377 | } | |
ac884b6a | 378 | |
839a51c8 MS |
379 | dptr ++; |
380 | } | |
381 | else | |
ac884b6a | 382 | { |
839a51c8 MS |
383 | if (*sptr == 'n') |
384 | *dptr++ = '\n'; | |
385 | else if (*sptr == 'r') | |
386 | *dptr++ = '\r'; | |
387 | else if (*sptr == 't') | |
388 | *dptr++ = '\t'; | |
389 | else | |
390 | *dptr++ = *sptr; | |
391 | ||
ac884b6a MS |
392 | sptr ++; |
393 | } | |
ac884b6a MS |
394 | } |
395 | else | |
839a51c8 | 396 | *dptr++ = *sptr++; |
ac884b6a | 397 | } |
ac884b6a | 398 | |
839a51c8 | 399 | *dptr = '\0'; |
ac884b6a | 400 | |
839a51c8 MS |
401 | // Create or add to a message... |
402 | if (!strncmp(line, "msgid", 5)) | |
ac884b6a | 403 | { |
61cf44e2 MS |
404 | if (haveid && havestr) |
405 | add_message(id, str); | |
839a51c8 MS |
406 | |
407 | strlcpy(id, ptr, sizeof(id)); | |
408 | str[0] = '\0'; | |
61cf44e2 MS |
409 | haveid = 1; |
410 | havestr = 0; | |
411 | which = 1; | |
ac884b6a | 412 | } |
839a51c8 MS |
413 | else if (!strncmp(line, "msgstr", 6)) |
414 | { | |
61cf44e2 | 415 | if (!haveid) |
839a51c8 | 416 | { |
61cf44e2 | 417 | _cupsLangPrintf(stderr, |
0837b7e8 MS |
418 | _("ppdc: Need a msgid line before any " |
419 | "translation strings on line %d of %s."), | |
61cf44e2 | 420 | linenum, f); |
839a51c8 MS |
421 | cupsFileClose(fp); |
422 | return (-1); | |
423 | } | |
ac884b6a | 424 | |
839a51c8 | 425 | strlcpy(str, ptr, sizeof(str)); |
61cf44e2 MS |
426 | havestr = 1; |
427 | which = 2; | |
839a51c8 | 428 | } |
61cf44e2 | 429 | else if (line[0] == '\"' && which == 2) |
839a51c8 | 430 | strlcat(str, ptr, sizeof(str)); |
61cf44e2 | 431 | else if (line[0] == '\"' && which == 1) |
839a51c8 MS |
432 | strlcat(id, ptr, sizeof(id)); |
433 | else | |
ac884b6a | 434 | { |
0837b7e8 | 435 | _cupsLangPrintf(stderr, _("ppdc: Unexpected text on line %d of %s."), |
61cf44e2 | 436 | linenum, f); |
ac884b6a MS |
437 | cupsFileClose(fp); |
438 | return (-1); | |
439 | } | |
ac884b6a | 440 | } |
839a51c8 | 441 | |
61cf44e2 MS |
442 | if (haveid && havestr) |
443 | add_message(id, str); | |
ac884b6a | 444 | } |
839a51c8 MS |
445 | else |
446 | goto unknown_load_format; | |
ac884b6a | 447 | |
839a51c8 MS |
448 | /* |
449 | * Close the file and return... | |
450 | */ | |
ac884b6a MS |
451 | |
452 | cupsFileClose(fp); | |
453 | ||
454 | return (0); | |
839a51c8 MS |
455 | |
456 | /* | |
457 | * Unknown format error... | |
458 | */ | |
459 | ||
460 | unknown_load_format: | |
461 | ||
61cf44e2 | 462 | _cupsLangPrintf(stderr, |
0837b7e8 | 463 | _("ppdc: Unknown message catalog format for \"%s\"."), f); |
839a51c8 MS |
464 | cupsFileClose(fp); |
465 | return (-1); | |
ac884b6a MS |
466 | } |
467 | ||
468 | ||
469 | // | |
470 | // 'ppdcCatalog::save_messages()' - Save the messages to a .po file. | |
471 | // | |
472 | ||
473 | int // O - 0 on success, -1 on error | |
474 | ppdcCatalog::save_messages( | |
475 | const char *f) // I - File to save to | |
476 | { | |
477 | cups_file_t *fp; // Message file | |
478 | ppdcMessage *m; // Current message | |
839a51c8 MS |
479 | char *ptr; // Pointer into string |
480 | int utf16; // Output UTF-16 .strings file? | |
481 | int ch; // Current character | |
ac884b6a MS |
482 | |
483 | ||
839a51c8 | 484 | // Open the file... |
ae71f5de | 485 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
ac884b6a MS |
486 | return (-1); |
487 | ||
839a51c8 MS |
488 | if (!strcmp(ptr, ".gz")) |
489 | fp = cupsFileOpen(f, "w9"); | |
490 | else | |
491 | fp = cupsFileOpen(f, "w"); | |
492 | ||
493 | if (!fp) | |
494 | return (-1); | |
495 | ||
496 | // For .strings files, write a BOM for big-endian output... | |
497 | utf16 = !strcmp(ptr, ".strings"); | |
498 | ||
499 | if (utf16) | |
500 | put_utf16(fp, 0xfeff); | |
501 | ||
502 | // Loop through all of the messages... | |
ac884b6a MS |
503 | for (m = (ppdcMessage *)messages->first(); |
504 | m; | |
505 | m = (ppdcMessage *)messages->next()) | |
506 | { | |
839a51c8 MS |
507 | if (utf16) |
508 | { | |
509 | put_utf16(fp, '\"'); | |
ac884b6a | 510 | |
839a51c8 MS |
511 | ptr = m->id->value; |
512 | while ((ch = get_utf8(ptr)) != 0) | |
513 | switch (ch) | |
514 | { | |
515 | case '\n' : | |
516 | put_utf16(fp, '\\'); | |
517 | put_utf16(fp, 'n'); | |
518 | break; | |
519 | case '\\' : | |
520 | put_utf16(fp, '\\'); | |
521 | put_utf16(fp, '\\'); | |
522 | break; | |
523 | case '\"' : | |
524 | put_utf16(fp, '\\'); | |
525 | put_utf16(fp, '\"'); | |
526 | break; | |
527 | default : | |
528 | put_utf16(fp, ch); | |
529 | break; | |
530 | } | |
531 | ||
532 | put_utf16(fp, '\"'); | |
533 | put_utf16(fp, ' '); | |
534 | put_utf16(fp, '='); | |
535 | put_utf16(fp, ' '); | |
536 | put_utf16(fp, '\"'); | |
537 | ||
538 | ptr = m->string->value; | |
539 | while ((ch = get_utf8(ptr)) != 0) | |
540 | switch (ch) | |
541 | { | |
542 | case '\n' : | |
543 | put_utf16(fp, '\\'); | |
544 | put_utf16(fp, 'n'); | |
545 | break; | |
546 | case '\\' : | |
547 | put_utf16(fp, '\\'); | |
548 | put_utf16(fp, '\\'); | |
549 | break; | |
550 | case '\"' : | |
551 | put_utf16(fp, '\\'); | |
552 | put_utf16(fp, '\"'); | |
553 | break; | |
554 | default : | |
555 | put_utf16(fp, ch); | |
556 | break; | |
557 | } | |
ac884b6a | 558 | |
839a51c8 MS |
559 | put_utf16(fp, '\"'); |
560 | put_utf16(fp, ';'); | |
561 | put_utf16(fp, '\n'); | |
562 | } | |
563 | else | |
564 | { | |
565 | cupsFilePuts(fp, "msgid \""); | |
566 | for (ptr = m->id->value; *ptr; ptr ++) | |
567 | switch (*ptr) | |
568 | { | |
569 | case '\n' : | |
570 | cupsFilePuts(fp, "\\n"); | |
571 | break; | |
572 | case '\\' : | |
573 | cupsFilePuts(fp, "\\\\"); | |
574 | break; | |
575 | case '\"' : | |
576 | cupsFilePuts(fp, "\\\""); | |
577 | break; | |
578 | default : | |
579 | cupsFilePutChar(fp, *ptr); | |
580 | break; | |
581 | } | |
582 | cupsFilePuts(fp, "\"\n"); | |
583 | ||
584 | cupsFilePuts(fp, "msgstr \""); | |
585 | for (ptr = m->string->value; *ptr; ptr ++) | |
586 | switch (*ptr) | |
587 | { | |
588 | case '\n' : | |
589 | cupsFilePuts(fp, "\\n"); | |
590 | break; | |
591 | case '\\' : | |
592 | cupsFilePuts(fp, "\\\\"); | |
593 | break; | |
594 | case '\"' : | |
595 | cupsFilePuts(fp, "\\\""); | |
596 | break; | |
597 | default : | |
598 | cupsFilePutChar(fp, *ptr); | |
599 | break; | |
600 | } | |
601 | cupsFilePuts(fp, "\"\n"); | |
602 | ||
603 | cupsFilePutChar(fp, '\n'); | |
604 | } | |
ac884b6a MS |
605 | } |
606 | ||
607 | cupsFileClose(fp); | |
608 | ||
609 | return (0); | |
610 | } | |
611 | ||
612 | ||
839a51c8 MS |
613 | // |
614 | // 'get_utf8()' - Get a UTF-8 character. | |
615 | // | |
616 | ||
617 | static int // O - Unicode character or 0 on EOF | |
618 | get_utf8(char *&ptr) // IO - Pointer to character | |
619 | { | |
620 | int ch; // Current character | |
621 | ||
622 | ||
623 | if ((ch = *ptr++ & 255) < 0xc0) | |
624 | return (ch); | |
625 | ||
626 | if ((ch & 0xe0) == 0xc0) | |
627 | { | |
628 | // Two-byte UTF-8... | |
629 | if ((*ptr & 0xc0) != 0x80) | |
630 | return (0); | |
631 | ||
632 | ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f); | |
633 | } | |
634 | else if ((ch & 0xf0) == 0xe0) | |
635 | { | |
636 | // Three-byte UTF-8... | |
637 | if ((*ptr & 0xc0) != 0x80) | |
638 | return (0); | |
639 | ||
640 | ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f); | |
641 | ||
642 | if ((*ptr & 0xc0) != 0x80) | |
643 | return (0); | |
644 | ||
645 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
646 | } | |
647 | else if ((ch & 0xf8) == 0xf0) | |
648 | { | |
649 | // Four-byte UTF-8... | |
650 | if ((*ptr & 0xc0) != 0x80) | |
651 | return (0); | |
652 | ||
653 | ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f); | |
654 | ||
655 | if ((*ptr & 0xc0) != 0x80) | |
656 | return (0); | |
657 | ||
658 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
659 | ||
660 | if ((*ptr & 0xc0) != 0x80) | |
661 | return (0); | |
662 | ||
663 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
664 | } | |
665 | ||
666 | return (ch); | |
667 | } | |
668 | ||
669 | ||
670 | // | |
671 | // 'get_utf16()' - Get a UTF-16 character... | |
672 | // | |
673 | ||
674 | static int // O - Unicode character or 0 on EOF | |
675 | get_utf16(cups_file_t *fp, // I - File to read from | |
676 | ppdc_cs_t &cs) // IO - Character set of file | |
677 | { | |
678 | int ch; // Current character | |
679 | unsigned char buffer[3]; // Bytes | |
680 | ||
681 | ||
682 | if (cs == PPDC_CS_AUTO) | |
683 | { | |
684 | // Get byte-order-mark, if present... | |
685 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
686 | return (0); | |
687 | ||
688 | if (buffer[0] == 0xfe && buffer[1] == 0xff) | |
689 | { | |
690 | // Big-endian UTF-16... | |
691 | cs = PPDC_CS_UTF16BE; | |
692 | ||
693 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
694 | return (0); | |
695 | } | |
696 | else if (buffer[0] == 0xff && buffer[1] == 0xfe) | |
697 | { | |
698 | // Little-endian UTF-16... | |
699 | cs = PPDC_CS_UTF16LE; | |
700 | ||
701 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
702 | return (0); | |
703 | } | |
704 | else if (buffer[0] == 0x00 && buffer[1] != 0x00) | |
705 | { | |
706 | // No BOM, assume big-endian UTF-16... | |
707 | cs = PPDC_CS_UTF16BE; | |
708 | } | |
709 | else if (buffer[0] != 0x00 && buffer[1] == 0x00) | |
710 | { | |
711 | // No BOM, assume little-endian UTF-16... | |
712 | cs = PPDC_CS_UTF16LE; | |
713 | } | |
714 | else | |
715 | { | |
716 | // No BOM, assume UTF-8... | |
717 | cs = PPDC_CS_UTF8; | |
718 | ||
719 | cupsFileRewind(fp); | |
720 | } | |
721 | } | |
722 | else if (cs != PPDC_CS_UTF8) | |
723 | { | |
724 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
725 | return (0); | |
726 | } | |
727 | ||
728 | if (cs == PPDC_CS_UTF8) | |
729 | { | |
730 | // UTF-8 character... | |
4509bb49 MS |
731 | if ((ch = cupsFileGetChar(fp)) < 0) |
732 | return (0); | |
839a51c8 MS |
733 | |
734 | if ((ch & 0xe0) == 0xc0) | |
735 | { | |
736 | // Two-byte UTF-8... | |
737 | if (cupsFileRead(fp, (char *)buffer, 1) != 1) | |
738 | return (0); | |
739 | ||
740 | if ((buffer[0] & 0xc0) != 0x80) | |
741 | return (0); | |
742 | ||
743 | ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f); | |
744 | } | |
745 | else if ((ch & 0xf0) == 0xe0) | |
746 | { | |
747 | // Three-byte UTF-8... | |
748 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
749 | return (0); | |
750 | ||
751 | if ((buffer[0] & 0xc0) != 0x80 || | |
752 | (buffer[1] & 0xc0) != 0x80) | |
753 | return (0); | |
754 | ||
755 | ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) | | |
756 | (buffer[1] & 0x3f); | |
757 | } | |
758 | else if ((ch & 0xf8) == 0xf0) | |
759 | { | |
760 | // Four-byte UTF-8... | |
761 | if (cupsFileRead(fp, (char *)buffer, 3) != 3) | |
762 | return (0); | |
763 | ||
764 | if ((buffer[0] & 0xc0) != 0x80 || | |
765 | (buffer[1] & 0xc0) != 0x80 || | |
766 | (buffer[2] & 0xc0) != 0x80) | |
767 | return (0); | |
768 | ||
769 | ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) | | |
770 | (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f); | |
771 | } | |
772 | } | |
773 | else | |
774 | { | |
775 | // UTF-16 character... | |
776 | if (cs == PPDC_CS_UTF16BE) | |
777 | ch = (buffer[0] << 8) | buffer[1]; | |
778 | else | |
779 | ch = (buffer[1] << 8) | buffer[0]; | |
780 | ||
781 | if (ch >= 0xd800 && ch <= 0xdbff) | |
782 | { | |
783 | // Handle multi-word encoding... | |
784 | int lch; | |
785 | ||
786 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
787 | return (0); | |
788 | ||
789 | if (cs == PPDC_CS_UTF16BE) | |
790 | lch = (buffer[0] << 8) | buffer[1]; | |
791 | else | |
792 | lch = (buffer[1] << 8) | buffer[0]; | |
793 | ||
794 | if (lch < 0xdc00 || lch >= 0xdfff) | |
795 | return (0); | |
796 | ||
797 | ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; | |
798 | } | |
799 | } | |
800 | ||
801 | return (ch); | |
802 | } | |
803 | ||
804 | ||
805 | // | |
806 | // 'put_utf8()' - Add a UTF-8 character to a string. | |
807 | // | |
808 | ||
809 | static int // O - 0 on success, -1 on failure | |
810 | put_utf8(int ch, // I - Unicode character | |
811 | char *&ptr, // IO - String pointer | |
812 | char *end) // I - End of buffer | |
813 | { | |
814 | if (ch < 0x80) | |
815 | { | |
816 | // One-byte ASCII... | |
817 | if (ptr >= end) | |
818 | return (-1); | |
819 | ||
820 | *ptr++ = ch; | |
821 | } | |
822 | else if (ch < 0x800) | |
823 | { | |
824 | // Two-byte UTF-8... | |
825 | if ((ptr + 1) >= end) | |
826 | return (-1); | |
827 | ||
828 | *ptr++ = 0xc0 | (ch >> 6); | |
829 | *ptr++ = 0x80 | (ch & 0x3f); | |
830 | } | |
831 | else if (ch < 0x10000) | |
832 | { | |
833 | // Three-byte UTF-8... | |
834 | if ((ptr + 2) >= end) | |
835 | return (-1); | |
836 | ||
837 | *ptr++ = 0xe0 | (ch >> 12); | |
838 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
839 | *ptr++ = 0x80 | (ch & 0x3f); | |
840 | } | |
841 | else | |
842 | { | |
843 | // Four-byte UTF-8... | |
844 | if ((ptr + 3) >= end) | |
845 | return (-1); | |
846 | ||
847 | *ptr++ = 0xf0 | (ch >> 18); | |
848 | *ptr++ = 0x80 | ((ch >> 12) & 0x3f); | |
849 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
850 | *ptr++ = 0x80 | (ch & 0x3f); | |
851 | } | |
852 | ||
853 | return (0); | |
854 | } | |
855 | ||
856 | ||
857 | // | |
858 | // 'put_utf16()' - Write a UTF-16 character to a file. | |
859 | // | |
860 | ||
861 | static int // O - 0 on success, -1 on failure | |
862 | put_utf16(cups_file_t *fp, // I - File to write to | |
863 | int ch) // I - Unicode character | |
864 | { | |
865 | unsigned char buffer[4]; // Output buffer | |
866 | ||
867 | ||
868 | if (ch < 0x10000) | |
869 | { | |
870 | // One-word UTF-16 big-endian... | |
871 | buffer[0] = ch >> 8; | |
872 | buffer[1] = ch; | |
873 | ||
874 | if (cupsFileWrite(fp, (char *)buffer, 2) == 2) | |
875 | return (0); | |
876 | } | |
877 | else | |
878 | { | |
879 | // Two-word UTF-16 big-endian... | |
880 | ch -= 0x10000; | |
881 | ||
882 | buffer[0] = 0xd8 | (ch >> 18); | |
883 | buffer[1] = ch >> 10; | |
884 | buffer[2] = 0xdc | ((ch >> 8) & 0x03); | |
885 | buffer[3] = ch; | |
886 | ||
887 | if (cupsFileWrite(fp, (char *)buffer, 4) == 4) | |
888 | return (0); | |
889 | } | |
890 | ||
891 | return (-1); | |
892 | } | |
893 | ||
894 | ||
ac884b6a MS |
895 | // |
896 | // End of "$Id$". | |
897 | // |