]>
Commit | Line | Data |
---|---|---|
ac884b6a MS |
1 | // |
2 | // "$Id$" | |
3 | // | |
4 | // Shared message catalog class for the CUPS PPD Compiler. | |
5 | // | |
94da7e34 | 6 | // Copyright 2007-2009 by Apple Inc. |
ac884b6a MS |
7 | // Copyright 2002-2006 by Easy Software Products. |
8 | // | |
9 | // These coded instructions, statements, and computer programs are the | |
10 | // property of Apple Inc. and are protected by Federal copyright | |
11 | // law. Distribution and use rights are outlined in the file "LICENSE.txt" | |
12 | // which should have been included with this file. If this file is | |
13 | // file is missing or damaged, see the license at "http://www.cups.org/". | |
14 | // | |
15 | // Contents: | |
16 | // | |
e6013cfa MS |
17 | // ppdcCatalog::ppdcCatalog() - Create a shared message catalog. |
18 | // ppdcCatalog::~ppdcCatalog() - Destroy a shared message catalog. | |
19 | // ppdcCatalog::add_message() - Add a new message. | |
20 | // ppdcCatalog::find_message() - Find a message in a catalog... | |
21 | // ppdcCatalog::load_messages() - Load messages from a .po file. | |
22 | // ppdcCatalog::save_messages() - Save the messages to a .po file. | |
23 | // get_utf8() - Get a UTF-8 character. | |
24 | // get_utf16() - Get a UTF-16 character... | |
25 | // put_utf8() - Add a UTF-8 character to a string. | |
26 | // put_utf16() - Write a UTF-16 character to a file. | |
ac884b6a MS |
27 | // |
28 | ||
29 | // | |
30 | // Include necessary headers... | |
31 | // | |
32 | ||
33 | #include "ppdc.h" | |
34 | #include <cups/globals.h> | |
35 | ||
36 | ||
839a51c8 MS |
37 | // |
38 | // Character encodings... | |
39 | // | |
40 | ||
41 | typedef enum | |
42 | { | |
43 | PPDC_CS_AUTO, | |
44 | PPDC_CS_UTF8, | |
45 | PPDC_CS_UTF16BE, | |
46 | PPDC_CS_UTF16LE | |
47 | } ppdc_cs_t; | |
48 | ||
49 | ||
50 | // | |
51 | // Local functions... | |
52 | // | |
53 | ||
54 | static int get_utf8(char *&ptr); | |
55 | static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs); | |
56 | static int put_utf8(int ch, char *&ptr, char *end); | |
57 | static int put_utf16(cups_file_t *fp, int ch); | |
58 | ||
59 | ||
ac884b6a MS |
60 | // |
61 | // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog. | |
62 | // | |
63 | ||
64 | ppdcCatalog::ppdcCatalog(const char *l, // I - Locale | |
65 | const char *f) // I - Message catalog file | |
66 | : ppdcShared() | |
67 | { | |
68 | _cups_globals_t *cg = _cupsGlobals(); | |
69 | // Global information | |
70 | ||
71 | ||
94da7e34 MS |
72 | PPDC_NEW; |
73 | ||
ac884b6a MS |
74 | locale = new ppdcString(l); |
75 | filename = new ppdcString(f); | |
76 | messages = new ppdcArray(); | |
77 | ||
78 | if (l) | |
79 | { | |
80 | // Try loading the base messages for this locale... | |
81 | char pofile[1024]; // Message catalog file | |
82 | ||
83 | ||
61cf44e2 | 84 | snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, l, l); |
ac884b6a MS |
85 | |
86 | if (load_messages(pofile) && strchr(l, '_')) | |
87 | { | |
88 | // Try the base locale... | |
89 | char baseloc[3]; // Base locale... | |
90 | ||
91 | ||
92 | strlcpy(baseloc, l, sizeof(baseloc)); | |
61cf44e2 | 93 | snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, |
ac884b6a MS |
94 | baseloc, baseloc); |
95 | ||
96 | load_messages(pofile); | |
97 | } | |
98 | } | |
99 | ||
100 | if (f) | |
101 | load_messages(f); | |
102 | } | |
103 | ||
104 | ||
105 | // | |
106 | // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog. | |
107 | // | |
108 | ||
109 | ppdcCatalog::~ppdcCatalog() | |
110 | { | |
94da7e34 MS |
111 | PPDC_DELETE; |
112 | ||
e4572d57 MS |
113 | locale->release(); |
114 | filename->release(); | |
115 | messages->release(); | |
ac884b6a MS |
116 | } |
117 | ||
118 | ||
119 | // | |
120 | // 'ppdcCatalog::add_message()' - Add a new message. | |
121 | // | |
122 | ||
123 | void | |
61cf44e2 MS |
124 | ppdcCatalog::add_message( |
125 | const char *id, // I - Message ID to add | |
126 | const char *string) // I - Translation string | |
ac884b6a MS |
127 | { |
128 | ppdcMessage *m; // Current message | |
129 | char text[1024]; // Text to translate | |
130 | ||
131 | ||
132 | // Range check input... | |
61cf44e2 | 133 | if (!id) |
ac884b6a MS |
134 | return; |
135 | ||
136 | // Verify that we don't already have the message ID... | |
137 | for (m = (ppdcMessage *)messages->first(); | |
138 | m; | |
139 | m = (ppdcMessage *)messages->next()) | |
140 | if (!strcmp(m->id->value, id)) | |
61cf44e2 MS |
141 | { |
142 | if (string) | |
143 | { | |
144 | m->string->release(); | |
145 | m->string = new ppdcString(string); | |
146 | } | |
ac884b6a | 147 | return; |
61cf44e2 | 148 | } |
ac884b6a MS |
149 | |
150 | // Add the message... | |
61cf44e2 MS |
151 | if (!string) |
152 | { | |
153 | snprintf(text, sizeof(text), "TRANSLATE %s", id); | |
154 | string = text; | |
155 | } | |
156 | ||
e6013cfa | 157 | messages->add(new ppdcMessage(id, string)); |
ac884b6a MS |
158 | } |
159 | ||
160 | ||
161 | // | |
162 | // 'ppdcCatalog::find_message()' - Find a message in a catalog... | |
163 | // | |
164 | ||
165 | const char * // O - Message text | |
166 | ppdcCatalog::find_message( | |
167 | const char *id) // I - Message ID | |
168 | { | |
169 | ppdcMessage *m; // Current message | |
170 | ||
171 | ||
172 | for (m = (ppdcMessage *)messages->first(); | |
173 | m; | |
174 | m = (ppdcMessage *)messages->next()) | |
175 | if (!strcmp(m->id->value, id)) | |
176 | return (m->string->value); | |
177 | ||
178 | return (id); | |
179 | } | |
180 | ||
181 | ||
182 | // | |
183 | // 'ppdcCatalog::load_messages()' - Load messages from a .po file. | |
184 | // | |
185 | ||
186 | int // O - 0 on success, -1 on failure | |
187 | ppdcCatalog::load_messages( | |
188 | const char *f) // I - Message catalog file | |
189 | { | |
190 | cups_file_t *fp; // Message file | |
ac884b6a MS |
191 | char line[4096], // Line buffer |
192 | *ptr, // Pointer into buffer | |
193 | id[4096], // Translation ID | |
194 | str[4096]; // Translation string | |
195 | int linenum; // Line number | |
196 | ||
197 | ||
198 | // Open the message catalog file... | |
199 | if ((fp = cupsFileOpen(f, "r")) == NULL) | |
200 | return (-1); | |
201 | ||
ae71f5de | 202 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
839a51c8 MS |
203 | goto unknown_load_format; |
204 | else if (!strcmp(ptr, ".strings")) | |
205 | { | |
206 | /* | |
207 | * Read messages in Mac OS X ".strings" format, which are UTF-16 text | |
208 | * files of the format: | |
209 | * | |
210 | * "id" = "str"; | |
211 | * | |
212 | * Strings files can also contain C-style comments. | |
213 | */ | |
214 | ||
215 | ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file | |
216 | int ch; // Current character from file | |
217 | char *end; // End of buffer | |
218 | ||
219 | ||
220 | id[0] = '\0'; | |
221 | str[0] = '\0'; | |
222 | ptr = NULL; | |
223 | end = NULL; | |
224 | ||
225 | while ((ch = get_utf16(fp, cs)) != 0) | |
226 | { | |
227 | if (ptr) | |
228 | { | |
229 | if (ch == '\\') | |
230 | { | |
231 | if ((ch = get_utf16(fp, cs)) == 0) | |
232 | break; | |
ac884b6a | 233 | |
4509bb49 | 234 | if (ch == 'n') |
839a51c8 MS |
235 | ch = '\n'; |
236 | else if (ch == 't') | |
237 | ch = '\t'; | |
238 | } | |
4509bb49 MS |
239 | else if (ch == '\"') |
240 | { | |
241 | *ptr = '\0'; | |
242 | ptr = NULL; | |
243 | } | |
ac884b6a | 244 | |
4509bb49 MS |
245 | if (ptr) |
246 | put_utf8(ch, ptr, end); | |
839a51c8 MS |
247 | } |
248 | else if (ch == '/') | |
249 | { | |
250 | // Start of a comment? | |
251 | if ((ch = get_utf16(fp, cs)) == 0) | |
252 | break; | |
ac884b6a | 253 | |
839a51c8 MS |
254 | if (ch == '*') |
255 | { | |
256 | // Skip C comment... | |
257 | int lastch = 0; | |
ac884b6a | 258 | |
839a51c8 MS |
259 | while ((ch = get_utf16(fp, cs)) != 0) |
260 | { | |
261 | if (ch == '/' && lastch == '*') | |
262 | break; | |
ac884b6a | 263 | |
839a51c8 MS |
264 | lastch = ch; |
265 | } | |
266 | } | |
267 | else if (ch == '/') | |
268 | { | |
269 | // Skip C++ comment... | |
270 | while ((ch = get_utf16(fp, cs)) != 0) | |
271 | if (ch == '\n') | |
272 | break; | |
273 | } | |
274 | } | |
275 | else if (ch == '\"') | |
276 | { | |
4509bb49 MS |
277 | // Start quoted string... |
278 | if (id[0]) | |
839a51c8 MS |
279 | { |
280 | ptr = str; | |
281 | end = str + sizeof(str) - 1; | |
282 | } | |
283 | else | |
284 | { | |
285 | ptr = id; | |
286 | end = id + sizeof(id) - 1; | |
287 | } | |
288 | } | |
289 | else if (ch == ';') | |
290 | { | |
291 | // Add string... | |
61cf44e2 | 292 | add_message(id, str); |
e6013cfa | 293 | id[0] = '\0'; |
839a51c8 | 294 | } |
ac884b6a | 295 | } |
839a51c8 MS |
296 | } |
297 | else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz")) | |
298 | { | |
299 | /* | |
300 | * Read messages from the catalog file until EOF... | |
301 | * | |
302 | * The format is the GNU gettext .po format, which is fairly simple: | |
303 | * | |
304 | * msgid "some text" | |
305 | * msgstr "localized text" | |
306 | * | |
307 | * The ID and localized text can span multiple lines using the form: | |
308 | * | |
309 | * msgid "" | |
310 | * "some long text" | |
311 | * msgstr "" | |
312 | * "localized text spanning " | |
313 | * "multiple lines" | |
314 | */ | |
315 | ||
61cf44e2 MS |
316 | int which, // In msgid? |
317 | haveid, // Did we get a msgid string? | |
318 | havestr; // Did we get a msgstr string? | |
319 | ||
839a51c8 MS |
320 | linenum = 0; |
321 | id[0] = '\0'; | |
322 | str[0] = '\0'; | |
61cf44e2 MS |
323 | haveid = 0; |
324 | havestr = 0; | |
325 | which = 0; | |
839a51c8 MS |
326 | |
327 | while (cupsFileGets(fp, line, sizeof(line))) | |
328 | { | |
329 | linenum ++; | |
ac884b6a | 330 | |
839a51c8 MS |
331 | // Skip blank and comment lines... |
332 | if (line[0] == '#' || !line[0]) | |
333 | continue; | |
ac884b6a | 334 | |
839a51c8 | 335 | // Strip the trailing quote... |
ae71f5de | 336 | if ((ptr = (char *)strrchr(line, '\"')) == NULL) |
839a51c8 | 337 | { |
61cf44e2 MS |
338 | _cupsLangPrintf(stderr, |
339 | _("ERROR: Expected quoted string on line %d of %s!\n"), | |
340 | linenum, f); | |
839a51c8 MS |
341 | cupsFileClose(fp); |
342 | return (-1); | |
343 | } | |
ac884b6a | 344 | |
839a51c8 MS |
345 | *ptr = '\0'; |
346 | ||
347 | // Find start of value... | |
348 | if ((ptr = strchr(line, '\"')) == NULL) | |
349 | { | |
61cf44e2 MS |
350 | _cupsLangPrintf(stderr, |
351 | _("ERROR: Expected quoted string on line %d of %s!\n"), | |
352 | linenum, f); | |
839a51c8 MS |
353 | cupsFileClose(fp); |
354 | return (-1); | |
355 | } | |
356 | ||
357 | ptr ++; | |
358 | ||
359 | // Unquote the text... | |
360 | char *sptr, *dptr; // Source/destination pointers | |
361 | ||
362 | for (sptr = ptr, dptr = ptr; *sptr;) | |
ac884b6a | 363 | { |
839a51c8 | 364 | if (*sptr == '\\') |
ac884b6a | 365 | { |
839a51c8 MS |
366 | sptr ++; |
367 | if (isdigit(*sptr)) | |
368 | { | |
369 | *dptr = 0; | |
370 | ||
371 | while (isdigit(*sptr)) | |
372 | { | |
373 | *dptr = *dptr * 8 + *sptr - '0'; | |
374 | sptr ++; | |
375 | } | |
ac884b6a | 376 | |
839a51c8 MS |
377 | dptr ++; |
378 | } | |
379 | else | |
ac884b6a | 380 | { |
839a51c8 MS |
381 | if (*sptr == 'n') |
382 | *dptr++ = '\n'; | |
383 | else if (*sptr == 'r') | |
384 | *dptr++ = '\r'; | |
385 | else if (*sptr == 't') | |
386 | *dptr++ = '\t'; | |
387 | else | |
388 | *dptr++ = *sptr; | |
389 | ||
ac884b6a MS |
390 | sptr ++; |
391 | } | |
ac884b6a MS |
392 | } |
393 | else | |
839a51c8 | 394 | *dptr++ = *sptr++; |
ac884b6a | 395 | } |
ac884b6a | 396 | |
839a51c8 | 397 | *dptr = '\0'; |
ac884b6a | 398 | |
839a51c8 MS |
399 | // Create or add to a message... |
400 | if (!strncmp(line, "msgid", 5)) | |
ac884b6a | 401 | { |
61cf44e2 MS |
402 | if (haveid && havestr) |
403 | add_message(id, str); | |
839a51c8 MS |
404 | |
405 | strlcpy(id, ptr, sizeof(id)); | |
406 | str[0] = '\0'; | |
61cf44e2 MS |
407 | haveid = 1; |
408 | havestr = 0; | |
409 | which = 1; | |
ac884b6a | 410 | } |
839a51c8 MS |
411 | else if (!strncmp(line, "msgstr", 6)) |
412 | { | |
61cf44e2 | 413 | if (!haveid) |
839a51c8 | 414 | { |
61cf44e2 MS |
415 | _cupsLangPrintf(stderr, |
416 | _("ERROR: Need a msgid line before any " | |
417 | "translation strings on line %d of %s!\n"), | |
418 | linenum, f); | |
839a51c8 MS |
419 | cupsFileClose(fp); |
420 | return (-1); | |
421 | } | |
ac884b6a | 422 | |
839a51c8 | 423 | strlcpy(str, ptr, sizeof(str)); |
61cf44e2 MS |
424 | havestr = 1; |
425 | which = 2; | |
839a51c8 | 426 | } |
61cf44e2 | 427 | else if (line[0] == '\"' && which == 2) |
839a51c8 | 428 | strlcat(str, ptr, sizeof(str)); |
61cf44e2 | 429 | else if (line[0] == '\"' && which == 1) |
839a51c8 MS |
430 | strlcat(id, ptr, sizeof(id)); |
431 | else | |
ac884b6a | 432 | { |
61cf44e2 MS |
433 | _cupsLangPrintf(stderr, _("ERROR: Unexpected text on line %d of %s!\n"), |
434 | linenum, f); | |
ac884b6a MS |
435 | cupsFileClose(fp); |
436 | return (-1); | |
437 | } | |
ac884b6a | 438 | } |
839a51c8 | 439 | |
61cf44e2 MS |
440 | if (haveid && havestr) |
441 | add_message(id, str); | |
ac884b6a | 442 | } |
839a51c8 MS |
443 | else |
444 | goto unknown_load_format; | |
ac884b6a | 445 | |
839a51c8 MS |
446 | /* |
447 | * Close the file and return... | |
448 | */ | |
ac884b6a MS |
449 | |
450 | cupsFileClose(fp); | |
451 | ||
452 | return (0); | |
839a51c8 MS |
453 | |
454 | /* | |
455 | * Unknown format error... | |
456 | */ | |
457 | ||
458 | unknown_load_format: | |
459 | ||
61cf44e2 MS |
460 | _cupsLangPrintf(stderr, |
461 | _("ERROR: Unknown message catalog format for \"%s\"!\n"), f); | |
839a51c8 MS |
462 | cupsFileClose(fp); |
463 | return (-1); | |
ac884b6a MS |
464 | } |
465 | ||
466 | ||
467 | // | |
468 | // 'ppdcCatalog::save_messages()' - Save the messages to a .po file. | |
469 | // | |
470 | ||
471 | int // O - 0 on success, -1 on error | |
472 | ppdcCatalog::save_messages( | |
473 | const char *f) // I - File to save to | |
474 | { | |
475 | cups_file_t *fp; // Message file | |
476 | ppdcMessage *m; // Current message | |
839a51c8 MS |
477 | char *ptr; // Pointer into string |
478 | int utf16; // Output UTF-16 .strings file? | |
479 | int ch; // Current character | |
ac884b6a MS |
480 | |
481 | ||
839a51c8 | 482 | // Open the file... |
ae71f5de | 483 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
ac884b6a MS |
484 | return (-1); |
485 | ||
839a51c8 MS |
486 | if (!strcmp(ptr, ".gz")) |
487 | fp = cupsFileOpen(f, "w9"); | |
488 | else | |
489 | fp = cupsFileOpen(f, "w"); | |
490 | ||
491 | if (!fp) | |
492 | return (-1); | |
493 | ||
494 | // For .strings files, write a BOM for big-endian output... | |
495 | utf16 = !strcmp(ptr, ".strings"); | |
496 | ||
497 | if (utf16) | |
498 | put_utf16(fp, 0xfeff); | |
499 | ||
500 | // Loop through all of the messages... | |
ac884b6a MS |
501 | for (m = (ppdcMessage *)messages->first(); |
502 | m; | |
503 | m = (ppdcMessage *)messages->next()) | |
504 | { | |
839a51c8 MS |
505 | if (utf16) |
506 | { | |
507 | put_utf16(fp, '\"'); | |
ac884b6a | 508 | |
839a51c8 MS |
509 | ptr = m->id->value; |
510 | while ((ch = get_utf8(ptr)) != 0) | |
511 | switch (ch) | |
512 | { | |
513 | case '\n' : | |
514 | put_utf16(fp, '\\'); | |
515 | put_utf16(fp, 'n'); | |
516 | break; | |
517 | case '\\' : | |
518 | put_utf16(fp, '\\'); | |
519 | put_utf16(fp, '\\'); | |
520 | break; | |
521 | case '\"' : | |
522 | put_utf16(fp, '\\'); | |
523 | put_utf16(fp, '\"'); | |
524 | break; | |
525 | default : | |
526 | put_utf16(fp, ch); | |
527 | break; | |
528 | } | |
529 | ||
530 | put_utf16(fp, '\"'); | |
531 | put_utf16(fp, ' '); | |
532 | put_utf16(fp, '='); | |
533 | put_utf16(fp, ' '); | |
534 | put_utf16(fp, '\"'); | |
535 | ||
536 | ptr = m->string->value; | |
537 | while ((ch = get_utf8(ptr)) != 0) | |
538 | switch (ch) | |
539 | { | |
540 | case '\n' : | |
541 | put_utf16(fp, '\\'); | |
542 | put_utf16(fp, 'n'); | |
543 | break; | |
544 | case '\\' : | |
545 | put_utf16(fp, '\\'); | |
546 | put_utf16(fp, '\\'); | |
547 | break; | |
548 | case '\"' : | |
549 | put_utf16(fp, '\\'); | |
550 | put_utf16(fp, '\"'); | |
551 | break; | |
552 | default : | |
553 | put_utf16(fp, ch); | |
554 | break; | |
555 | } | |
ac884b6a | 556 | |
839a51c8 MS |
557 | put_utf16(fp, '\"'); |
558 | put_utf16(fp, ';'); | |
559 | put_utf16(fp, '\n'); | |
560 | } | |
561 | else | |
562 | { | |
563 | cupsFilePuts(fp, "msgid \""); | |
564 | for (ptr = m->id->value; *ptr; ptr ++) | |
565 | switch (*ptr) | |
566 | { | |
567 | case '\n' : | |
568 | cupsFilePuts(fp, "\\n"); | |
569 | break; | |
570 | case '\\' : | |
571 | cupsFilePuts(fp, "\\\\"); | |
572 | break; | |
573 | case '\"' : | |
574 | cupsFilePuts(fp, "\\\""); | |
575 | break; | |
576 | default : | |
577 | cupsFilePutChar(fp, *ptr); | |
578 | break; | |
579 | } | |
580 | cupsFilePuts(fp, "\"\n"); | |
581 | ||
582 | cupsFilePuts(fp, "msgstr \""); | |
583 | for (ptr = m->string->value; *ptr; ptr ++) | |
584 | switch (*ptr) | |
585 | { | |
586 | case '\n' : | |
587 | cupsFilePuts(fp, "\\n"); | |
588 | break; | |
589 | case '\\' : | |
590 | cupsFilePuts(fp, "\\\\"); | |
591 | break; | |
592 | case '\"' : | |
593 | cupsFilePuts(fp, "\\\""); | |
594 | break; | |
595 | default : | |
596 | cupsFilePutChar(fp, *ptr); | |
597 | break; | |
598 | } | |
599 | cupsFilePuts(fp, "\"\n"); | |
600 | ||
601 | cupsFilePutChar(fp, '\n'); | |
602 | } | |
ac884b6a MS |
603 | } |
604 | ||
605 | cupsFileClose(fp); | |
606 | ||
607 | return (0); | |
608 | } | |
609 | ||
610 | ||
839a51c8 MS |
611 | // |
612 | // 'get_utf8()' - Get a UTF-8 character. | |
613 | // | |
614 | ||
615 | static int // O - Unicode character or 0 on EOF | |
616 | get_utf8(char *&ptr) // IO - Pointer to character | |
617 | { | |
618 | int ch; // Current character | |
619 | ||
620 | ||
621 | if ((ch = *ptr++ & 255) < 0xc0) | |
622 | return (ch); | |
623 | ||
624 | if ((ch & 0xe0) == 0xc0) | |
625 | { | |
626 | // Two-byte UTF-8... | |
627 | if ((*ptr & 0xc0) != 0x80) | |
628 | return (0); | |
629 | ||
630 | ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f); | |
631 | } | |
632 | else if ((ch & 0xf0) == 0xe0) | |
633 | { | |
634 | // Three-byte UTF-8... | |
635 | if ((*ptr & 0xc0) != 0x80) | |
636 | return (0); | |
637 | ||
638 | ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f); | |
639 | ||
640 | if ((*ptr & 0xc0) != 0x80) | |
641 | return (0); | |
642 | ||
643 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
644 | } | |
645 | else if ((ch & 0xf8) == 0xf0) | |
646 | { | |
647 | // Four-byte UTF-8... | |
648 | if ((*ptr & 0xc0) != 0x80) | |
649 | return (0); | |
650 | ||
651 | ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f); | |
652 | ||
653 | if ((*ptr & 0xc0) != 0x80) | |
654 | return (0); | |
655 | ||
656 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
657 | ||
658 | if ((*ptr & 0xc0) != 0x80) | |
659 | return (0); | |
660 | ||
661 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
662 | } | |
663 | ||
664 | return (ch); | |
665 | } | |
666 | ||
667 | ||
668 | // | |
669 | // 'get_utf16()' - Get a UTF-16 character... | |
670 | // | |
671 | ||
672 | static int // O - Unicode character or 0 on EOF | |
673 | get_utf16(cups_file_t *fp, // I - File to read from | |
674 | ppdc_cs_t &cs) // IO - Character set of file | |
675 | { | |
676 | int ch; // Current character | |
677 | unsigned char buffer[3]; // Bytes | |
678 | ||
679 | ||
680 | if (cs == PPDC_CS_AUTO) | |
681 | { | |
682 | // Get byte-order-mark, if present... | |
683 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
684 | return (0); | |
685 | ||
686 | if (buffer[0] == 0xfe && buffer[1] == 0xff) | |
687 | { | |
688 | // Big-endian UTF-16... | |
689 | cs = PPDC_CS_UTF16BE; | |
690 | ||
691 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
692 | return (0); | |
693 | } | |
694 | else if (buffer[0] == 0xff && buffer[1] == 0xfe) | |
695 | { | |
696 | // Little-endian UTF-16... | |
697 | cs = PPDC_CS_UTF16LE; | |
698 | ||
699 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
700 | return (0); | |
701 | } | |
702 | else if (buffer[0] == 0x00 && buffer[1] != 0x00) | |
703 | { | |
704 | // No BOM, assume big-endian UTF-16... | |
705 | cs = PPDC_CS_UTF16BE; | |
706 | } | |
707 | else if (buffer[0] != 0x00 && buffer[1] == 0x00) | |
708 | { | |
709 | // No BOM, assume little-endian UTF-16... | |
710 | cs = PPDC_CS_UTF16LE; | |
711 | } | |
712 | else | |
713 | { | |
714 | // No BOM, assume UTF-8... | |
715 | cs = PPDC_CS_UTF8; | |
716 | ||
717 | cupsFileRewind(fp); | |
718 | } | |
719 | } | |
720 | else if (cs != PPDC_CS_UTF8) | |
721 | { | |
722 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
723 | return (0); | |
724 | } | |
725 | ||
726 | if (cs == PPDC_CS_UTF8) | |
727 | { | |
728 | // UTF-8 character... | |
4509bb49 MS |
729 | if ((ch = cupsFileGetChar(fp)) < 0) |
730 | return (0); | |
839a51c8 MS |
731 | |
732 | if ((ch & 0xe0) == 0xc0) | |
733 | { | |
734 | // Two-byte UTF-8... | |
735 | if (cupsFileRead(fp, (char *)buffer, 1) != 1) | |
736 | return (0); | |
737 | ||
738 | if ((buffer[0] & 0xc0) != 0x80) | |
739 | return (0); | |
740 | ||
741 | ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f); | |
742 | } | |
743 | else if ((ch & 0xf0) == 0xe0) | |
744 | { | |
745 | // Three-byte UTF-8... | |
746 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
747 | return (0); | |
748 | ||
749 | if ((buffer[0] & 0xc0) != 0x80 || | |
750 | (buffer[1] & 0xc0) != 0x80) | |
751 | return (0); | |
752 | ||
753 | ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) | | |
754 | (buffer[1] & 0x3f); | |
755 | } | |
756 | else if ((ch & 0xf8) == 0xf0) | |
757 | { | |
758 | // Four-byte UTF-8... | |
759 | if (cupsFileRead(fp, (char *)buffer, 3) != 3) | |
760 | return (0); | |
761 | ||
762 | if ((buffer[0] & 0xc0) != 0x80 || | |
763 | (buffer[1] & 0xc0) != 0x80 || | |
764 | (buffer[2] & 0xc0) != 0x80) | |
765 | return (0); | |
766 | ||
767 | ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) | | |
768 | (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f); | |
769 | } | |
770 | } | |
771 | else | |
772 | { | |
773 | // UTF-16 character... | |
774 | if (cs == PPDC_CS_UTF16BE) | |
775 | ch = (buffer[0] << 8) | buffer[1]; | |
776 | else | |
777 | ch = (buffer[1] << 8) | buffer[0]; | |
778 | ||
779 | if (ch >= 0xd800 && ch <= 0xdbff) | |
780 | { | |
781 | // Handle multi-word encoding... | |
782 | int lch; | |
783 | ||
784 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
785 | return (0); | |
786 | ||
787 | if (cs == PPDC_CS_UTF16BE) | |
788 | lch = (buffer[0] << 8) | buffer[1]; | |
789 | else | |
790 | lch = (buffer[1] << 8) | buffer[0]; | |
791 | ||
792 | if (lch < 0xdc00 || lch >= 0xdfff) | |
793 | return (0); | |
794 | ||
795 | ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; | |
796 | } | |
797 | } | |
798 | ||
799 | return (ch); | |
800 | } | |
801 | ||
802 | ||
803 | // | |
804 | // 'put_utf8()' - Add a UTF-8 character to a string. | |
805 | // | |
806 | ||
807 | static int // O - 0 on success, -1 on failure | |
808 | put_utf8(int ch, // I - Unicode character | |
809 | char *&ptr, // IO - String pointer | |
810 | char *end) // I - End of buffer | |
811 | { | |
812 | if (ch < 0x80) | |
813 | { | |
814 | // One-byte ASCII... | |
815 | if (ptr >= end) | |
816 | return (-1); | |
817 | ||
818 | *ptr++ = ch; | |
819 | } | |
820 | else if (ch < 0x800) | |
821 | { | |
822 | // Two-byte UTF-8... | |
823 | if ((ptr + 1) >= end) | |
824 | return (-1); | |
825 | ||
826 | *ptr++ = 0xc0 | (ch >> 6); | |
827 | *ptr++ = 0x80 | (ch & 0x3f); | |
828 | } | |
829 | else if (ch < 0x10000) | |
830 | { | |
831 | // Three-byte UTF-8... | |
832 | if ((ptr + 2) >= end) | |
833 | return (-1); | |
834 | ||
835 | *ptr++ = 0xe0 | (ch >> 12); | |
836 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
837 | *ptr++ = 0x80 | (ch & 0x3f); | |
838 | } | |
839 | else | |
840 | { | |
841 | // Four-byte UTF-8... | |
842 | if ((ptr + 3) >= end) | |
843 | return (-1); | |
844 | ||
845 | *ptr++ = 0xf0 | (ch >> 18); | |
846 | *ptr++ = 0x80 | ((ch >> 12) & 0x3f); | |
847 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
848 | *ptr++ = 0x80 | (ch & 0x3f); | |
849 | } | |
850 | ||
851 | return (0); | |
852 | } | |
853 | ||
854 | ||
855 | // | |
856 | // 'put_utf16()' - Write a UTF-16 character to a file. | |
857 | // | |
858 | ||
859 | static int // O - 0 on success, -1 on failure | |
860 | put_utf16(cups_file_t *fp, // I - File to write to | |
861 | int ch) // I - Unicode character | |
862 | { | |
863 | unsigned char buffer[4]; // Output buffer | |
864 | ||
865 | ||
866 | if (ch < 0x10000) | |
867 | { | |
868 | // One-word UTF-16 big-endian... | |
869 | buffer[0] = ch >> 8; | |
870 | buffer[1] = ch; | |
871 | ||
872 | if (cupsFileWrite(fp, (char *)buffer, 2) == 2) | |
873 | return (0); | |
874 | } | |
875 | else | |
876 | { | |
877 | // Two-word UTF-16 big-endian... | |
878 | ch -= 0x10000; | |
879 | ||
880 | buffer[0] = 0xd8 | (ch >> 18); | |
881 | buffer[1] = ch >> 10; | |
882 | buffer[2] = 0xdc | ((ch >> 8) & 0x03); | |
883 | buffer[3] = ch; | |
884 | ||
885 | if (cupsFileWrite(fp, (char *)buffer, 4) == 4) | |
886 | return (0); | |
887 | } | |
888 | ||
889 | return (-1); | |
890 | } | |
891 | ||
892 | ||
ac884b6a MS |
893 | // |
894 | // End of "$Id$". | |
895 | // |