]>
Commit | Line | Data |
---|---|---|
ac884b6a MS |
1 | // |
2 | // "$Id$" | |
3 | // | |
4 | // Shared message catalog class for the CUPS PPD Compiler. | |
5 | // | |
6 | // Copyright 2007-2008 by Apple Inc. | |
7 | // Copyright 2002-2006 by Easy Software Products. | |
8 | // | |
9 | // These coded instructions, statements, and computer programs are the | |
10 | // property of Apple Inc. and are protected by Federal copyright | |
11 | // law. Distribution and use rights are outlined in the file "LICENSE.txt" | |
12 | // which should have been included with this file. If this file is | |
13 | // file is missing or damaged, see the license at "http://www.cups.org/". | |
14 | // | |
15 | // Contents: | |
16 | // | |
e6013cfa MS |
17 | // ppdcCatalog::ppdcCatalog() - Create a shared message catalog. |
18 | // ppdcCatalog::~ppdcCatalog() - Destroy a shared message catalog. | |
19 | // ppdcCatalog::add_message() - Add a new message. | |
20 | // ppdcCatalog::find_message() - Find a message in a catalog... | |
21 | // ppdcCatalog::load_messages() - Load messages from a .po file. | |
22 | // ppdcCatalog::save_messages() - Save the messages to a .po file. | |
23 | // get_utf8() - Get a UTF-8 character. | |
24 | // get_utf16() - Get a UTF-16 character... | |
25 | // put_utf8() - Add a UTF-8 character to a string. | |
26 | // put_utf16() - Write a UTF-16 character to a file. | |
ac884b6a MS |
27 | // |
28 | ||
29 | // | |
30 | // Include necessary headers... | |
31 | // | |
32 | ||
33 | #include "ppdc.h" | |
34 | #include <cups/globals.h> | |
35 | ||
36 | ||
839a51c8 MS |
37 | // |
38 | // Character encodings... | |
39 | // | |
40 | ||
41 | typedef enum | |
42 | { | |
43 | PPDC_CS_AUTO, | |
44 | PPDC_CS_UTF8, | |
45 | PPDC_CS_UTF16BE, | |
46 | PPDC_CS_UTF16LE | |
47 | } ppdc_cs_t; | |
48 | ||
49 | ||
50 | // | |
51 | // Local functions... | |
52 | // | |
53 | ||
54 | static int get_utf8(char *&ptr); | |
55 | static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs); | |
56 | static int put_utf8(int ch, char *&ptr, char *end); | |
57 | static int put_utf16(cups_file_t *fp, int ch); | |
58 | ||
59 | ||
ac884b6a MS |
60 | // |
61 | // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog. | |
62 | // | |
63 | ||
64 | ppdcCatalog::ppdcCatalog(const char *l, // I - Locale | |
65 | const char *f) // I - Message catalog file | |
66 | : ppdcShared() | |
67 | { | |
68 | _cups_globals_t *cg = _cupsGlobals(); | |
69 | // Global information | |
70 | ||
71 | ||
72 | locale = new ppdcString(l); | |
73 | filename = new ppdcString(f); | |
74 | messages = new ppdcArray(); | |
75 | ||
76 | if (l) | |
77 | { | |
78 | // Try loading the base messages for this locale... | |
79 | char pofile[1024]; // Message catalog file | |
80 | ||
81 | ||
61cf44e2 | 82 | snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, l, l); |
ac884b6a MS |
83 | |
84 | if (load_messages(pofile) && strchr(l, '_')) | |
85 | { | |
86 | // Try the base locale... | |
87 | char baseloc[3]; // Base locale... | |
88 | ||
89 | ||
90 | strlcpy(baseloc, l, sizeof(baseloc)); | |
61cf44e2 | 91 | snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, |
ac884b6a MS |
92 | baseloc, baseloc); |
93 | ||
94 | load_messages(pofile); | |
95 | } | |
96 | } | |
97 | ||
98 | if (f) | |
99 | load_messages(f); | |
100 | } | |
101 | ||
102 | ||
103 | // | |
104 | // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog. | |
105 | // | |
106 | ||
107 | ppdcCatalog::~ppdcCatalog() | |
108 | { | |
e4572d57 MS |
109 | locale->release(); |
110 | filename->release(); | |
111 | messages->release(); | |
ac884b6a MS |
112 | } |
113 | ||
114 | ||
115 | // | |
116 | // 'ppdcCatalog::add_message()' - Add a new message. | |
117 | // | |
118 | ||
119 | void | |
61cf44e2 MS |
120 | ppdcCatalog::add_message( |
121 | const char *id, // I - Message ID to add | |
122 | const char *string) // I - Translation string | |
ac884b6a MS |
123 | { |
124 | ppdcMessage *m; // Current message | |
125 | char text[1024]; // Text to translate | |
126 | ||
127 | ||
128 | // Range check input... | |
61cf44e2 | 129 | if (!id) |
ac884b6a MS |
130 | return; |
131 | ||
132 | // Verify that we don't already have the message ID... | |
133 | for (m = (ppdcMessage *)messages->first(); | |
134 | m; | |
135 | m = (ppdcMessage *)messages->next()) | |
136 | if (!strcmp(m->id->value, id)) | |
61cf44e2 MS |
137 | { |
138 | if (string) | |
139 | { | |
140 | m->string->release(); | |
141 | m->string = new ppdcString(string); | |
142 | } | |
ac884b6a | 143 | return; |
61cf44e2 | 144 | } |
ac884b6a MS |
145 | |
146 | // Add the message... | |
61cf44e2 MS |
147 | if (!string) |
148 | { | |
149 | snprintf(text, sizeof(text), "TRANSLATE %s", id); | |
150 | string = text; | |
151 | } | |
152 | ||
e6013cfa | 153 | messages->add(new ppdcMessage(id, string)); |
ac884b6a MS |
154 | } |
155 | ||
156 | ||
157 | // | |
158 | // 'ppdcCatalog::find_message()' - Find a message in a catalog... | |
159 | // | |
160 | ||
161 | const char * // O - Message text | |
162 | ppdcCatalog::find_message( | |
163 | const char *id) // I - Message ID | |
164 | { | |
165 | ppdcMessage *m; // Current message | |
166 | ||
167 | ||
168 | for (m = (ppdcMessage *)messages->first(); | |
169 | m; | |
170 | m = (ppdcMessage *)messages->next()) | |
171 | if (!strcmp(m->id->value, id)) | |
172 | return (m->string->value); | |
173 | ||
174 | return (id); | |
175 | } | |
176 | ||
177 | ||
178 | // | |
179 | // 'ppdcCatalog::load_messages()' - Load messages from a .po file. | |
180 | // | |
181 | ||
182 | int // O - 0 on success, -1 on failure | |
183 | ppdcCatalog::load_messages( | |
184 | const char *f) // I - Message catalog file | |
185 | { | |
186 | cups_file_t *fp; // Message file | |
ac884b6a MS |
187 | char line[4096], // Line buffer |
188 | *ptr, // Pointer into buffer | |
189 | id[4096], // Translation ID | |
190 | str[4096]; // Translation string | |
191 | int linenum; // Line number | |
192 | ||
193 | ||
194 | // Open the message catalog file... | |
195 | if ((fp = cupsFileOpen(f, "r")) == NULL) | |
196 | return (-1); | |
197 | ||
ae71f5de | 198 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
839a51c8 MS |
199 | goto unknown_load_format; |
200 | else if (!strcmp(ptr, ".strings")) | |
201 | { | |
202 | /* | |
203 | * Read messages in Mac OS X ".strings" format, which are UTF-16 text | |
204 | * files of the format: | |
205 | * | |
206 | * "id" = "str"; | |
207 | * | |
208 | * Strings files can also contain C-style comments. | |
209 | */ | |
210 | ||
211 | ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file | |
212 | int ch; // Current character from file | |
213 | char *end; // End of buffer | |
214 | ||
215 | ||
216 | id[0] = '\0'; | |
217 | str[0] = '\0'; | |
218 | ptr = NULL; | |
219 | end = NULL; | |
220 | ||
221 | while ((ch = get_utf16(fp, cs)) != 0) | |
222 | { | |
223 | if (ptr) | |
224 | { | |
225 | if (ch == '\\') | |
226 | { | |
227 | if ((ch = get_utf16(fp, cs)) == 0) | |
228 | break; | |
ac884b6a | 229 | |
4509bb49 | 230 | if (ch == 'n') |
839a51c8 MS |
231 | ch = '\n'; |
232 | else if (ch == 't') | |
233 | ch = '\t'; | |
234 | } | |
4509bb49 MS |
235 | else if (ch == '\"') |
236 | { | |
237 | *ptr = '\0'; | |
238 | ptr = NULL; | |
239 | } | |
ac884b6a | 240 | |
4509bb49 MS |
241 | if (ptr) |
242 | put_utf8(ch, ptr, end); | |
839a51c8 MS |
243 | } |
244 | else if (ch == '/') | |
245 | { | |
246 | // Start of a comment? | |
247 | if ((ch = get_utf16(fp, cs)) == 0) | |
248 | break; | |
ac884b6a | 249 | |
839a51c8 MS |
250 | if (ch == '*') |
251 | { | |
252 | // Skip C comment... | |
253 | int lastch = 0; | |
ac884b6a | 254 | |
839a51c8 MS |
255 | while ((ch = get_utf16(fp, cs)) != 0) |
256 | { | |
257 | if (ch == '/' && lastch == '*') | |
258 | break; | |
ac884b6a | 259 | |
839a51c8 MS |
260 | lastch = ch; |
261 | } | |
262 | } | |
263 | else if (ch == '/') | |
264 | { | |
265 | // Skip C++ comment... | |
266 | while ((ch = get_utf16(fp, cs)) != 0) | |
267 | if (ch == '\n') | |
268 | break; | |
269 | } | |
270 | } | |
271 | else if (ch == '\"') | |
272 | { | |
4509bb49 MS |
273 | // Start quoted string... |
274 | if (id[0]) | |
839a51c8 MS |
275 | { |
276 | ptr = str; | |
277 | end = str + sizeof(str) - 1; | |
278 | } | |
279 | else | |
280 | { | |
281 | ptr = id; | |
282 | end = id + sizeof(id) - 1; | |
283 | } | |
284 | } | |
285 | else if (ch == ';') | |
286 | { | |
287 | // Add string... | |
61cf44e2 | 288 | add_message(id, str); |
e6013cfa | 289 | id[0] = '\0'; |
839a51c8 | 290 | } |
ac884b6a | 291 | } |
839a51c8 MS |
292 | } |
293 | else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz")) | |
294 | { | |
295 | /* | |
296 | * Read messages from the catalog file until EOF... | |
297 | * | |
298 | * The format is the GNU gettext .po format, which is fairly simple: | |
299 | * | |
300 | * msgid "some text" | |
301 | * msgstr "localized text" | |
302 | * | |
303 | * The ID and localized text can span multiple lines using the form: | |
304 | * | |
305 | * msgid "" | |
306 | * "some long text" | |
307 | * msgstr "" | |
308 | * "localized text spanning " | |
309 | * "multiple lines" | |
310 | */ | |
311 | ||
61cf44e2 MS |
312 | int which, // In msgid? |
313 | haveid, // Did we get a msgid string? | |
314 | havestr; // Did we get a msgstr string? | |
315 | ||
839a51c8 MS |
316 | linenum = 0; |
317 | id[0] = '\0'; | |
318 | str[0] = '\0'; | |
61cf44e2 MS |
319 | haveid = 0; |
320 | havestr = 0; | |
321 | which = 0; | |
839a51c8 MS |
322 | |
323 | while (cupsFileGets(fp, line, sizeof(line))) | |
324 | { | |
325 | linenum ++; | |
ac884b6a | 326 | |
839a51c8 MS |
327 | // Skip blank and comment lines... |
328 | if (line[0] == '#' || !line[0]) | |
329 | continue; | |
ac884b6a | 330 | |
839a51c8 | 331 | // Strip the trailing quote... |
ae71f5de | 332 | if ((ptr = (char *)strrchr(line, '\"')) == NULL) |
839a51c8 | 333 | { |
61cf44e2 MS |
334 | _cupsLangPrintf(stderr, |
335 | _("ERROR: Expected quoted string on line %d of %s!\n"), | |
336 | linenum, f); | |
839a51c8 MS |
337 | cupsFileClose(fp); |
338 | return (-1); | |
339 | } | |
ac884b6a | 340 | |
839a51c8 MS |
341 | *ptr = '\0'; |
342 | ||
343 | // Find start of value... | |
344 | if ((ptr = strchr(line, '\"')) == NULL) | |
345 | { | |
61cf44e2 MS |
346 | _cupsLangPrintf(stderr, |
347 | _("ERROR: Expected quoted string on line %d of %s!\n"), | |
348 | linenum, f); | |
839a51c8 MS |
349 | cupsFileClose(fp); |
350 | return (-1); | |
351 | } | |
352 | ||
353 | ptr ++; | |
354 | ||
355 | // Unquote the text... | |
356 | char *sptr, *dptr; // Source/destination pointers | |
357 | ||
358 | for (sptr = ptr, dptr = ptr; *sptr;) | |
ac884b6a | 359 | { |
839a51c8 | 360 | if (*sptr == '\\') |
ac884b6a | 361 | { |
839a51c8 MS |
362 | sptr ++; |
363 | if (isdigit(*sptr)) | |
364 | { | |
365 | *dptr = 0; | |
366 | ||
367 | while (isdigit(*sptr)) | |
368 | { | |
369 | *dptr = *dptr * 8 + *sptr - '0'; | |
370 | sptr ++; | |
371 | } | |
ac884b6a | 372 | |
839a51c8 MS |
373 | dptr ++; |
374 | } | |
375 | else | |
ac884b6a | 376 | { |
839a51c8 MS |
377 | if (*sptr == 'n') |
378 | *dptr++ = '\n'; | |
379 | else if (*sptr == 'r') | |
380 | *dptr++ = '\r'; | |
381 | else if (*sptr == 't') | |
382 | *dptr++ = '\t'; | |
383 | else | |
384 | *dptr++ = *sptr; | |
385 | ||
ac884b6a MS |
386 | sptr ++; |
387 | } | |
ac884b6a MS |
388 | } |
389 | else | |
839a51c8 | 390 | *dptr++ = *sptr++; |
ac884b6a | 391 | } |
ac884b6a | 392 | |
839a51c8 | 393 | *dptr = '\0'; |
ac884b6a | 394 | |
839a51c8 MS |
395 | // Create or add to a message... |
396 | if (!strncmp(line, "msgid", 5)) | |
ac884b6a | 397 | { |
61cf44e2 MS |
398 | if (haveid && havestr) |
399 | add_message(id, str); | |
839a51c8 MS |
400 | |
401 | strlcpy(id, ptr, sizeof(id)); | |
402 | str[0] = '\0'; | |
61cf44e2 MS |
403 | haveid = 1; |
404 | havestr = 0; | |
405 | which = 1; | |
ac884b6a | 406 | } |
839a51c8 MS |
407 | else if (!strncmp(line, "msgstr", 6)) |
408 | { | |
61cf44e2 | 409 | if (!haveid) |
839a51c8 | 410 | { |
61cf44e2 MS |
411 | _cupsLangPrintf(stderr, |
412 | _("ERROR: Need a msgid line before any " | |
413 | "translation strings on line %d of %s!\n"), | |
414 | linenum, f); | |
839a51c8 MS |
415 | cupsFileClose(fp); |
416 | return (-1); | |
417 | } | |
ac884b6a | 418 | |
839a51c8 | 419 | strlcpy(str, ptr, sizeof(str)); |
61cf44e2 MS |
420 | havestr = 1; |
421 | which = 2; | |
839a51c8 | 422 | } |
61cf44e2 | 423 | else if (line[0] == '\"' && which == 2) |
839a51c8 | 424 | strlcat(str, ptr, sizeof(str)); |
61cf44e2 | 425 | else if (line[0] == '\"' && which == 1) |
839a51c8 MS |
426 | strlcat(id, ptr, sizeof(id)); |
427 | else | |
ac884b6a | 428 | { |
61cf44e2 MS |
429 | _cupsLangPrintf(stderr, _("ERROR: Unexpected text on line %d of %s!\n"), |
430 | linenum, f); | |
ac884b6a MS |
431 | cupsFileClose(fp); |
432 | return (-1); | |
433 | } | |
ac884b6a | 434 | } |
839a51c8 | 435 | |
61cf44e2 MS |
436 | if (haveid && havestr) |
437 | add_message(id, str); | |
ac884b6a | 438 | } |
839a51c8 MS |
439 | else |
440 | goto unknown_load_format; | |
ac884b6a | 441 | |
839a51c8 MS |
442 | /* |
443 | * Close the file and return... | |
444 | */ | |
ac884b6a MS |
445 | |
446 | cupsFileClose(fp); | |
447 | ||
448 | return (0); | |
839a51c8 MS |
449 | |
450 | /* | |
451 | * Unknown format error... | |
452 | */ | |
453 | ||
454 | unknown_load_format: | |
455 | ||
61cf44e2 MS |
456 | _cupsLangPrintf(stderr, |
457 | _("ERROR: Unknown message catalog format for \"%s\"!\n"), f); | |
839a51c8 MS |
458 | cupsFileClose(fp); |
459 | return (-1); | |
ac884b6a MS |
460 | } |
461 | ||
462 | ||
463 | // | |
464 | // 'ppdcCatalog::save_messages()' - Save the messages to a .po file. | |
465 | // | |
466 | ||
467 | int // O - 0 on success, -1 on error | |
468 | ppdcCatalog::save_messages( | |
469 | const char *f) // I - File to save to | |
470 | { | |
471 | cups_file_t *fp; // Message file | |
472 | ppdcMessage *m; // Current message | |
839a51c8 MS |
473 | char *ptr; // Pointer into string |
474 | int utf16; // Output UTF-16 .strings file? | |
475 | int ch; // Current character | |
ac884b6a MS |
476 | |
477 | ||
839a51c8 | 478 | // Open the file... |
ae71f5de | 479 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
ac884b6a MS |
480 | return (-1); |
481 | ||
839a51c8 MS |
482 | if (!strcmp(ptr, ".gz")) |
483 | fp = cupsFileOpen(f, "w9"); | |
484 | else | |
485 | fp = cupsFileOpen(f, "w"); | |
486 | ||
487 | if (!fp) | |
488 | return (-1); | |
489 | ||
490 | // For .strings files, write a BOM for big-endian output... | |
491 | utf16 = !strcmp(ptr, ".strings"); | |
492 | ||
493 | if (utf16) | |
494 | put_utf16(fp, 0xfeff); | |
495 | ||
496 | // Loop through all of the messages... | |
ac884b6a MS |
497 | for (m = (ppdcMessage *)messages->first(); |
498 | m; | |
499 | m = (ppdcMessage *)messages->next()) | |
500 | { | |
839a51c8 MS |
501 | if (utf16) |
502 | { | |
503 | put_utf16(fp, '\"'); | |
ac884b6a | 504 | |
839a51c8 MS |
505 | ptr = m->id->value; |
506 | while ((ch = get_utf8(ptr)) != 0) | |
507 | switch (ch) | |
508 | { | |
509 | case '\n' : | |
510 | put_utf16(fp, '\\'); | |
511 | put_utf16(fp, 'n'); | |
512 | break; | |
513 | case '\\' : | |
514 | put_utf16(fp, '\\'); | |
515 | put_utf16(fp, '\\'); | |
516 | break; | |
517 | case '\"' : | |
518 | put_utf16(fp, '\\'); | |
519 | put_utf16(fp, '\"'); | |
520 | break; | |
521 | default : | |
522 | put_utf16(fp, ch); | |
523 | break; | |
524 | } | |
525 | ||
526 | put_utf16(fp, '\"'); | |
527 | put_utf16(fp, ' '); | |
528 | put_utf16(fp, '='); | |
529 | put_utf16(fp, ' '); | |
530 | put_utf16(fp, '\"'); | |
531 | ||
532 | ptr = m->string->value; | |
533 | while ((ch = get_utf8(ptr)) != 0) | |
534 | switch (ch) | |
535 | { | |
536 | case '\n' : | |
537 | put_utf16(fp, '\\'); | |
538 | put_utf16(fp, 'n'); | |
539 | break; | |
540 | case '\\' : | |
541 | put_utf16(fp, '\\'); | |
542 | put_utf16(fp, '\\'); | |
543 | break; | |
544 | case '\"' : | |
545 | put_utf16(fp, '\\'); | |
546 | put_utf16(fp, '\"'); | |
547 | break; | |
548 | default : | |
549 | put_utf16(fp, ch); | |
550 | break; | |
551 | } | |
ac884b6a | 552 | |
839a51c8 MS |
553 | put_utf16(fp, '\"'); |
554 | put_utf16(fp, ';'); | |
555 | put_utf16(fp, '\n'); | |
556 | } | |
557 | else | |
558 | { | |
559 | cupsFilePuts(fp, "msgid \""); | |
560 | for (ptr = m->id->value; *ptr; ptr ++) | |
561 | switch (*ptr) | |
562 | { | |
563 | case '\n' : | |
564 | cupsFilePuts(fp, "\\n"); | |
565 | break; | |
566 | case '\\' : | |
567 | cupsFilePuts(fp, "\\\\"); | |
568 | break; | |
569 | case '\"' : | |
570 | cupsFilePuts(fp, "\\\""); | |
571 | break; | |
572 | default : | |
573 | cupsFilePutChar(fp, *ptr); | |
574 | break; | |
575 | } | |
576 | cupsFilePuts(fp, "\"\n"); | |
577 | ||
578 | cupsFilePuts(fp, "msgstr \""); | |
579 | for (ptr = m->string->value; *ptr; ptr ++) | |
580 | switch (*ptr) | |
581 | { | |
582 | case '\n' : | |
583 | cupsFilePuts(fp, "\\n"); | |
584 | break; | |
585 | case '\\' : | |
586 | cupsFilePuts(fp, "\\\\"); | |
587 | break; | |
588 | case '\"' : | |
589 | cupsFilePuts(fp, "\\\""); | |
590 | break; | |
591 | default : | |
592 | cupsFilePutChar(fp, *ptr); | |
593 | break; | |
594 | } | |
595 | cupsFilePuts(fp, "\"\n"); | |
596 | ||
597 | cupsFilePutChar(fp, '\n'); | |
598 | } | |
ac884b6a MS |
599 | } |
600 | ||
601 | cupsFileClose(fp); | |
602 | ||
603 | return (0); | |
604 | } | |
605 | ||
606 | ||
839a51c8 MS |
607 | // |
608 | // 'get_utf8()' - Get a UTF-8 character. | |
609 | // | |
610 | ||
611 | static int // O - Unicode character or 0 on EOF | |
612 | get_utf8(char *&ptr) // IO - Pointer to character | |
613 | { | |
614 | int ch; // Current character | |
615 | ||
616 | ||
617 | if ((ch = *ptr++ & 255) < 0xc0) | |
618 | return (ch); | |
619 | ||
620 | if ((ch & 0xe0) == 0xc0) | |
621 | { | |
622 | // Two-byte UTF-8... | |
623 | if ((*ptr & 0xc0) != 0x80) | |
624 | return (0); | |
625 | ||
626 | ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f); | |
627 | } | |
628 | else if ((ch & 0xf0) == 0xe0) | |
629 | { | |
630 | // Three-byte UTF-8... | |
631 | if ((*ptr & 0xc0) != 0x80) | |
632 | return (0); | |
633 | ||
634 | ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f); | |
635 | ||
636 | if ((*ptr & 0xc0) != 0x80) | |
637 | return (0); | |
638 | ||
639 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
640 | } | |
641 | else if ((ch & 0xf8) == 0xf0) | |
642 | { | |
643 | // Four-byte UTF-8... | |
644 | if ((*ptr & 0xc0) != 0x80) | |
645 | return (0); | |
646 | ||
647 | ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f); | |
648 | ||
649 | if ((*ptr & 0xc0) != 0x80) | |
650 | return (0); | |
651 | ||
652 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
653 | ||
654 | if ((*ptr & 0xc0) != 0x80) | |
655 | return (0); | |
656 | ||
657 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
658 | } | |
659 | ||
660 | return (ch); | |
661 | } | |
662 | ||
663 | ||
664 | // | |
665 | // 'get_utf16()' - Get a UTF-16 character... | |
666 | // | |
667 | ||
668 | static int // O - Unicode character or 0 on EOF | |
669 | get_utf16(cups_file_t *fp, // I - File to read from | |
670 | ppdc_cs_t &cs) // IO - Character set of file | |
671 | { | |
672 | int ch; // Current character | |
673 | unsigned char buffer[3]; // Bytes | |
674 | ||
675 | ||
676 | if (cs == PPDC_CS_AUTO) | |
677 | { | |
678 | // Get byte-order-mark, if present... | |
679 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
680 | return (0); | |
681 | ||
682 | if (buffer[0] == 0xfe && buffer[1] == 0xff) | |
683 | { | |
684 | // Big-endian UTF-16... | |
685 | cs = PPDC_CS_UTF16BE; | |
686 | ||
687 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
688 | return (0); | |
689 | } | |
690 | else if (buffer[0] == 0xff && buffer[1] == 0xfe) | |
691 | { | |
692 | // Little-endian UTF-16... | |
693 | cs = PPDC_CS_UTF16LE; | |
694 | ||
695 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
696 | return (0); | |
697 | } | |
698 | else if (buffer[0] == 0x00 && buffer[1] != 0x00) | |
699 | { | |
700 | // No BOM, assume big-endian UTF-16... | |
701 | cs = PPDC_CS_UTF16BE; | |
702 | } | |
703 | else if (buffer[0] != 0x00 && buffer[1] == 0x00) | |
704 | { | |
705 | // No BOM, assume little-endian UTF-16... | |
706 | cs = PPDC_CS_UTF16LE; | |
707 | } | |
708 | else | |
709 | { | |
710 | // No BOM, assume UTF-8... | |
711 | cs = PPDC_CS_UTF8; | |
712 | ||
713 | cupsFileRewind(fp); | |
714 | } | |
715 | } | |
716 | else if (cs != PPDC_CS_UTF8) | |
717 | { | |
718 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
719 | return (0); | |
720 | } | |
721 | ||
722 | if (cs == PPDC_CS_UTF8) | |
723 | { | |
724 | // UTF-8 character... | |
4509bb49 MS |
725 | if ((ch = cupsFileGetChar(fp)) < 0) |
726 | return (0); | |
839a51c8 MS |
727 | |
728 | if ((ch & 0xe0) == 0xc0) | |
729 | { | |
730 | // Two-byte UTF-8... | |
731 | if (cupsFileRead(fp, (char *)buffer, 1) != 1) | |
732 | return (0); | |
733 | ||
734 | if ((buffer[0] & 0xc0) != 0x80) | |
735 | return (0); | |
736 | ||
737 | ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f); | |
738 | } | |
739 | else if ((ch & 0xf0) == 0xe0) | |
740 | { | |
741 | // Three-byte UTF-8... | |
742 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
743 | return (0); | |
744 | ||
745 | if ((buffer[0] & 0xc0) != 0x80 || | |
746 | (buffer[1] & 0xc0) != 0x80) | |
747 | return (0); | |
748 | ||
749 | ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) | | |
750 | (buffer[1] & 0x3f); | |
751 | } | |
752 | else if ((ch & 0xf8) == 0xf0) | |
753 | { | |
754 | // Four-byte UTF-8... | |
755 | if (cupsFileRead(fp, (char *)buffer, 3) != 3) | |
756 | return (0); | |
757 | ||
758 | if ((buffer[0] & 0xc0) != 0x80 || | |
759 | (buffer[1] & 0xc0) != 0x80 || | |
760 | (buffer[2] & 0xc0) != 0x80) | |
761 | return (0); | |
762 | ||
763 | ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) | | |
764 | (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f); | |
765 | } | |
766 | } | |
767 | else | |
768 | { | |
769 | // UTF-16 character... | |
770 | if (cs == PPDC_CS_UTF16BE) | |
771 | ch = (buffer[0] << 8) | buffer[1]; | |
772 | else | |
773 | ch = (buffer[1] << 8) | buffer[0]; | |
774 | ||
775 | if (ch >= 0xd800 && ch <= 0xdbff) | |
776 | { | |
777 | // Handle multi-word encoding... | |
778 | int lch; | |
779 | ||
780 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
781 | return (0); | |
782 | ||
783 | if (cs == PPDC_CS_UTF16BE) | |
784 | lch = (buffer[0] << 8) | buffer[1]; | |
785 | else | |
786 | lch = (buffer[1] << 8) | buffer[0]; | |
787 | ||
788 | if (lch < 0xdc00 || lch >= 0xdfff) | |
789 | return (0); | |
790 | ||
791 | ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; | |
792 | } | |
793 | } | |
794 | ||
795 | return (ch); | |
796 | } | |
797 | ||
798 | ||
799 | // | |
800 | // 'put_utf8()' - Add a UTF-8 character to a string. | |
801 | // | |
802 | ||
803 | static int // O - 0 on success, -1 on failure | |
804 | put_utf8(int ch, // I - Unicode character | |
805 | char *&ptr, // IO - String pointer | |
806 | char *end) // I - End of buffer | |
807 | { | |
808 | if (ch < 0x80) | |
809 | { | |
810 | // One-byte ASCII... | |
811 | if (ptr >= end) | |
812 | return (-1); | |
813 | ||
814 | *ptr++ = ch; | |
815 | } | |
816 | else if (ch < 0x800) | |
817 | { | |
818 | // Two-byte UTF-8... | |
819 | if ((ptr + 1) >= end) | |
820 | return (-1); | |
821 | ||
822 | *ptr++ = 0xc0 | (ch >> 6); | |
823 | *ptr++ = 0x80 | (ch & 0x3f); | |
824 | } | |
825 | else if (ch < 0x10000) | |
826 | { | |
827 | // Three-byte UTF-8... | |
828 | if ((ptr + 2) >= end) | |
829 | return (-1); | |
830 | ||
831 | *ptr++ = 0xe0 | (ch >> 12); | |
832 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
833 | *ptr++ = 0x80 | (ch & 0x3f); | |
834 | } | |
835 | else | |
836 | { | |
837 | // Four-byte UTF-8... | |
838 | if ((ptr + 3) >= end) | |
839 | return (-1); | |
840 | ||
841 | *ptr++ = 0xf0 | (ch >> 18); | |
842 | *ptr++ = 0x80 | ((ch >> 12) & 0x3f); | |
843 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
844 | *ptr++ = 0x80 | (ch & 0x3f); | |
845 | } | |
846 | ||
847 | return (0); | |
848 | } | |
849 | ||
850 | ||
851 | // | |
852 | // 'put_utf16()' - Write a UTF-16 character to a file. | |
853 | // | |
854 | ||
855 | static int // O - 0 on success, -1 on failure | |
856 | put_utf16(cups_file_t *fp, // I - File to write to | |
857 | int ch) // I - Unicode character | |
858 | { | |
859 | unsigned char buffer[4]; // Output buffer | |
860 | ||
861 | ||
862 | if (ch < 0x10000) | |
863 | { | |
864 | // One-word UTF-16 big-endian... | |
865 | buffer[0] = ch >> 8; | |
866 | buffer[1] = ch; | |
867 | ||
868 | if (cupsFileWrite(fp, (char *)buffer, 2) == 2) | |
869 | return (0); | |
870 | } | |
871 | else | |
872 | { | |
873 | // Two-word UTF-16 big-endian... | |
874 | ch -= 0x10000; | |
875 | ||
876 | buffer[0] = 0xd8 | (ch >> 18); | |
877 | buffer[1] = ch >> 10; | |
878 | buffer[2] = 0xdc | ((ch >> 8) & 0x03); | |
879 | buffer[3] = ch; | |
880 | ||
881 | if (cupsFileWrite(fp, (char *)buffer, 4) == 4) | |
882 | return (0); | |
883 | } | |
884 | ||
885 | return (-1); | |
886 | } | |
887 | ||
888 | ||
ac884b6a MS |
889 | // |
890 | // End of "$Id$". | |
891 | // |