]>
Commit | Line | Data |
---|---|---|
ac884b6a MS |
1 | // |
2 | // "$Id$" | |
3 | // | |
4 | // Shared message catalog class for the CUPS PPD Compiler. | |
5 | // | |
94da7e34 | 6 | // Copyright 2007-2009 by Apple Inc. |
ac884b6a MS |
7 | // Copyright 2002-2006 by Easy Software Products. |
8 | // | |
9 | // These coded instructions, statements, and computer programs are the | |
10 | // property of Apple Inc. and are protected by Federal copyright | |
11 | // law. Distribution and use rights are outlined in the file "LICENSE.txt" | |
12 | // which should have been included with this file. If this file is | |
13 | // file is missing or damaged, see the license at "http://www.cups.org/". | |
14 | // | |
15 | // Contents: | |
16 | // | |
e6013cfa MS |
17 | // ppdcCatalog::ppdcCatalog() - Create a shared message catalog. |
18 | // ppdcCatalog::~ppdcCatalog() - Destroy a shared message catalog. | |
19 | // ppdcCatalog::add_message() - Add a new message. | |
20 | // ppdcCatalog::find_message() - Find a message in a catalog... | |
21 | // ppdcCatalog::load_messages() - Load messages from a .po file. | |
22 | // ppdcCatalog::save_messages() - Save the messages to a .po file. | |
23 | // get_utf8() - Get a UTF-8 character. | |
24 | // get_utf16() - Get a UTF-16 character... | |
25 | // put_utf8() - Add a UTF-8 character to a string. | |
26 | // put_utf16() - Write a UTF-16 character to a file. | |
ac884b6a MS |
27 | // |
28 | ||
29 | // | |
30 | // Include necessary headers... | |
31 | // | |
32 | ||
38e73f87 | 33 | #include "ppdc-private.h" |
ac884b6a MS |
34 | |
35 | ||
839a51c8 MS |
36 | // |
37 | // Character encodings... | |
38 | // | |
39 | ||
40 | typedef enum | |
41 | { | |
42 | PPDC_CS_AUTO, | |
43 | PPDC_CS_UTF8, | |
44 | PPDC_CS_UTF16BE, | |
45 | PPDC_CS_UTF16LE | |
46 | } ppdc_cs_t; | |
47 | ||
48 | ||
49 | // | |
50 | // Local functions... | |
51 | // | |
52 | ||
53 | static int get_utf8(char *&ptr); | |
54 | static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs); | |
55 | static int put_utf8(int ch, char *&ptr, char *end); | |
56 | static int put_utf16(cups_file_t *fp, int ch); | |
57 | ||
58 | ||
ac884b6a MS |
59 | // |
60 | // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog. | |
61 | // | |
62 | ||
63 | ppdcCatalog::ppdcCatalog(const char *l, // I - Locale | |
64 | const char *f) // I - Message catalog file | |
65 | : ppdcShared() | |
66 | { | |
67 | _cups_globals_t *cg = _cupsGlobals(); | |
68 | // Global information | |
69 | ||
70 | ||
94da7e34 MS |
71 | PPDC_NEW; |
72 | ||
ac884b6a MS |
73 | locale = new ppdcString(l); |
74 | filename = new ppdcString(f); | |
75 | messages = new ppdcArray(); | |
76 | ||
77 | if (l) | |
78 | { | |
79 | // Try loading the base messages for this locale... | |
80 | char pofile[1024]; // Message catalog file | |
81 | ||
82 | ||
61cf44e2 | 83 | snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, l, l); |
ac884b6a MS |
84 | |
85 | if (load_messages(pofile) && strchr(l, '_')) | |
86 | { | |
87 | // Try the base locale... | |
88 | char baseloc[3]; // Base locale... | |
89 | ||
90 | ||
91 | strlcpy(baseloc, l, sizeof(baseloc)); | |
61cf44e2 | 92 | snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, |
ac884b6a MS |
93 | baseloc, baseloc); |
94 | ||
95 | load_messages(pofile); | |
96 | } | |
97 | } | |
98 | ||
99 | if (f) | |
100 | load_messages(f); | |
101 | } | |
102 | ||
103 | ||
104 | // | |
105 | // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog. | |
106 | // | |
107 | ||
108 | ppdcCatalog::~ppdcCatalog() | |
109 | { | |
94da7e34 MS |
110 | PPDC_DELETE; |
111 | ||
e4572d57 MS |
112 | locale->release(); |
113 | filename->release(); | |
114 | messages->release(); | |
ac884b6a MS |
115 | } |
116 | ||
117 | ||
118 | // | |
119 | // 'ppdcCatalog::add_message()' - Add a new message. | |
120 | // | |
121 | ||
122 | void | |
61cf44e2 MS |
123 | ppdcCatalog::add_message( |
124 | const char *id, // I - Message ID to add | |
125 | const char *string) // I - Translation string | |
ac884b6a MS |
126 | { |
127 | ppdcMessage *m; // Current message | |
128 | char text[1024]; // Text to translate | |
129 | ||
130 | ||
131 | // Range check input... | |
61cf44e2 | 132 | if (!id) |
ac884b6a MS |
133 | return; |
134 | ||
135 | // Verify that we don't already have the message ID... | |
136 | for (m = (ppdcMessage *)messages->first(); | |
137 | m; | |
138 | m = (ppdcMessage *)messages->next()) | |
139 | if (!strcmp(m->id->value, id)) | |
61cf44e2 MS |
140 | { |
141 | if (string) | |
142 | { | |
143 | m->string->release(); | |
144 | m->string = new ppdcString(string); | |
145 | } | |
ac884b6a | 146 | return; |
61cf44e2 | 147 | } |
ac884b6a MS |
148 | |
149 | // Add the message... | |
61cf44e2 MS |
150 | if (!string) |
151 | { | |
152 | snprintf(text, sizeof(text), "TRANSLATE %s", id); | |
153 | string = text; | |
154 | } | |
155 | ||
e6013cfa | 156 | messages->add(new ppdcMessage(id, string)); |
ac884b6a MS |
157 | } |
158 | ||
159 | ||
160 | // | |
161 | // 'ppdcCatalog::find_message()' - Find a message in a catalog... | |
162 | // | |
163 | ||
164 | const char * // O - Message text | |
165 | ppdcCatalog::find_message( | |
166 | const char *id) // I - Message ID | |
167 | { | |
168 | ppdcMessage *m; // Current message | |
169 | ||
170 | ||
171 | for (m = (ppdcMessage *)messages->first(); | |
172 | m; | |
173 | m = (ppdcMessage *)messages->next()) | |
174 | if (!strcmp(m->id->value, id)) | |
175 | return (m->string->value); | |
176 | ||
177 | return (id); | |
178 | } | |
179 | ||
180 | ||
181 | // | |
182 | // 'ppdcCatalog::load_messages()' - Load messages from a .po file. | |
183 | // | |
184 | ||
185 | int // O - 0 on success, -1 on failure | |
186 | ppdcCatalog::load_messages( | |
187 | const char *f) // I - Message catalog file | |
188 | { | |
189 | cups_file_t *fp; // Message file | |
ac884b6a MS |
190 | char line[4096], // Line buffer |
191 | *ptr, // Pointer into buffer | |
192 | id[4096], // Translation ID | |
193 | str[4096]; // Translation string | |
194 | int linenum; // Line number | |
195 | ||
196 | ||
197 | // Open the message catalog file... | |
198 | if ((fp = cupsFileOpen(f, "r")) == NULL) | |
199 | return (-1); | |
200 | ||
ae71f5de | 201 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
839a51c8 MS |
202 | goto unknown_load_format; |
203 | else if (!strcmp(ptr, ".strings")) | |
204 | { | |
205 | /* | |
206 | * Read messages in Mac OS X ".strings" format, which are UTF-16 text | |
207 | * files of the format: | |
208 | * | |
209 | * "id" = "str"; | |
210 | * | |
211 | * Strings files can also contain C-style comments. | |
212 | */ | |
213 | ||
214 | ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file | |
215 | int ch; // Current character from file | |
216 | char *end; // End of buffer | |
217 | ||
218 | ||
219 | id[0] = '\0'; | |
220 | str[0] = '\0'; | |
221 | ptr = NULL; | |
222 | end = NULL; | |
223 | ||
224 | while ((ch = get_utf16(fp, cs)) != 0) | |
225 | { | |
226 | if (ptr) | |
227 | { | |
228 | if (ch == '\\') | |
229 | { | |
230 | if ((ch = get_utf16(fp, cs)) == 0) | |
231 | break; | |
ac884b6a | 232 | |
4509bb49 | 233 | if (ch == 'n') |
839a51c8 MS |
234 | ch = '\n'; |
235 | else if (ch == 't') | |
236 | ch = '\t'; | |
237 | } | |
4509bb49 MS |
238 | else if (ch == '\"') |
239 | { | |
240 | *ptr = '\0'; | |
241 | ptr = NULL; | |
242 | } | |
ac884b6a | 243 | |
4509bb49 MS |
244 | if (ptr) |
245 | put_utf8(ch, ptr, end); | |
839a51c8 MS |
246 | } |
247 | else if (ch == '/') | |
248 | { | |
249 | // Start of a comment? | |
250 | if ((ch = get_utf16(fp, cs)) == 0) | |
251 | break; | |
ac884b6a | 252 | |
839a51c8 MS |
253 | if (ch == '*') |
254 | { | |
255 | // Skip C comment... | |
256 | int lastch = 0; | |
ac884b6a | 257 | |
839a51c8 MS |
258 | while ((ch = get_utf16(fp, cs)) != 0) |
259 | { | |
260 | if (ch == '/' && lastch == '*') | |
261 | break; | |
ac884b6a | 262 | |
839a51c8 MS |
263 | lastch = ch; |
264 | } | |
265 | } | |
266 | else if (ch == '/') | |
267 | { | |
268 | // Skip C++ comment... | |
269 | while ((ch = get_utf16(fp, cs)) != 0) | |
270 | if (ch == '\n') | |
271 | break; | |
272 | } | |
273 | } | |
274 | else if (ch == '\"') | |
275 | { | |
4509bb49 MS |
276 | // Start quoted string... |
277 | if (id[0]) | |
839a51c8 MS |
278 | { |
279 | ptr = str; | |
280 | end = str + sizeof(str) - 1; | |
281 | } | |
282 | else | |
283 | { | |
284 | ptr = id; | |
285 | end = id + sizeof(id) - 1; | |
286 | } | |
287 | } | |
288 | else if (ch == ';') | |
289 | { | |
290 | // Add string... | |
61cf44e2 | 291 | add_message(id, str); |
e6013cfa | 292 | id[0] = '\0'; |
839a51c8 | 293 | } |
ac884b6a | 294 | } |
839a51c8 MS |
295 | } |
296 | else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz")) | |
297 | { | |
298 | /* | |
299 | * Read messages from the catalog file until EOF... | |
300 | * | |
301 | * The format is the GNU gettext .po format, which is fairly simple: | |
302 | * | |
303 | * msgid "some text" | |
304 | * msgstr "localized text" | |
305 | * | |
306 | * The ID and localized text can span multiple lines using the form: | |
307 | * | |
308 | * msgid "" | |
309 | * "some long text" | |
310 | * msgstr "" | |
311 | * "localized text spanning " | |
312 | * "multiple lines" | |
313 | */ | |
314 | ||
61cf44e2 MS |
315 | int which, // In msgid? |
316 | haveid, // Did we get a msgid string? | |
317 | havestr; // Did we get a msgstr string? | |
318 | ||
839a51c8 MS |
319 | linenum = 0; |
320 | id[0] = '\0'; | |
321 | str[0] = '\0'; | |
61cf44e2 MS |
322 | haveid = 0; |
323 | havestr = 0; | |
324 | which = 0; | |
839a51c8 MS |
325 | |
326 | while (cupsFileGets(fp, line, sizeof(line))) | |
327 | { | |
328 | linenum ++; | |
ac884b6a | 329 | |
839a51c8 MS |
330 | // Skip blank and comment lines... |
331 | if (line[0] == '#' || !line[0]) | |
332 | continue; | |
ac884b6a | 333 | |
839a51c8 | 334 | // Strip the trailing quote... |
ae71f5de | 335 | if ((ptr = (char *)strrchr(line, '\"')) == NULL) |
839a51c8 | 336 | { |
61cf44e2 | 337 | _cupsLangPrintf(stderr, |
4d301e69 | 338 | _("ERROR: Expected quoted string on line %d of %s\n"), |
61cf44e2 | 339 | linenum, f); |
839a51c8 MS |
340 | cupsFileClose(fp); |
341 | return (-1); | |
342 | } | |
ac884b6a | 343 | |
839a51c8 MS |
344 | *ptr = '\0'; |
345 | ||
346 | // Find start of value... | |
347 | if ((ptr = strchr(line, '\"')) == NULL) | |
348 | { | |
61cf44e2 | 349 | _cupsLangPrintf(stderr, |
4d301e69 | 350 | _("ERROR: Expected quoted string on line %d of %s\n"), |
61cf44e2 | 351 | linenum, f); |
839a51c8 MS |
352 | cupsFileClose(fp); |
353 | return (-1); | |
354 | } | |
355 | ||
356 | ptr ++; | |
357 | ||
358 | // Unquote the text... | |
359 | char *sptr, *dptr; // Source/destination pointers | |
360 | ||
361 | for (sptr = ptr, dptr = ptr; *sptr;) | |
ac884b6a | 362 | { |
839a51c8 | 363 | if (*sptr == '\\') |
ac884b6a | 364 | { |
839a51c8 MS |
365 | sptr ++; |
366 | if (isdigit(*sptr)) | |
367 | { | |
368 | *dptr = 0; | |
369 | ||
370 | while (isdigit(*sptr)) | |
371 | { | |
372 | *dptr = *dptr * 8 + *sptr - '0'; | |
373 | sptr ++; | |
374 | } | |
ac884b6a | 375 | |
839a51c8 MS |
376 | dptr ++; |
377 | } | |
378 | else | |
ac884b6a | 379 | { |
839a51c8 MS |
380 | if (*sptr == 'n') |
381 | *dptr++ = '\n'; | |
382 | else if (*sptr == 'r') | |
383 | *dptr++ = '\r'; | |
384 | else if (*sptr == 't') | |
385 | *dptr++ = '\t'; | |
386 | else | |
387 | *dptr++ = *sptr; | |
388 | ||
ac884b6a MS |
389 | sptr ++; |
390 | } | |
ac884b6a MS |
391 | } |
392 | else | |
839a51c8 | 393 | *dptr++ = *sptr++; |
ac884b6a | 394 | } |
ac884b6a | 395 | |
839a51c8 | 396 | *dptr = '\0'; |
ac884b6a | 397 | |
839a51c8 MS |
398 | // Create or add to a message... |
399 | if (!strncmp(line, "msgid", 5)) | |
ac884b6a | 400 | { |
61cf44e2 MS |
401 | if (haveid && havestr) |
402 | add_message(id, str); | |
839a51c8 MS |
403 | |
404 | strlcpy(id, ptr, sizeof(id)); | |
405 | str[0] = '\0'; | |
61cf44e2 MS |
406 | haveid = 1; |
407 | havestr = 0; | |
408 | which = 1; | |
ac884b6a | 409 | } |
839a51c8 MS |
410 | else if (!strncmp(line, "msgstr", 6)) |
411 | { | |
61cf44e2 | 412 | if (!haveid) |
839a51c8 | 413 | { |
61cf44e2 MS |
414 | _cupsLangPrintf(stderr, |
415 | _("ERROR: Need a msgid line before any " | |
4d301e69 | 416 | "translation strings on line %d of %s\n"), |
61cf44e2 | 417 | linenum, f); |
839a51c8 MS |
418 | cupsFileClose(fp); |
419 | return (-1); | |
420 | } | |
ac884b6a | 421 | |
839a51c8 | 422 | strlcpy(str, ptr, sizeof(str)); |
61cf44e2 MS |
423 | havestr = 1; |
424 | which = 2; | |
839a51c8 | 425 | } |
61cf44e2 | 426 | else if (line[0] == '\"' && which == 2) |
839a51c8 | 427 | strlcat(str, ptr, sizeof(str)); |
61cf44e2 | 428 | else if (line[0] == '\"' && which == 1) |
839a51c8 MS |
429 | strlcat(id, ptr, sizeof(id)); |
430 | else | |
ac884b6a | 431 | { |
4d301e69 | 432 | _cupsLangPrintf(stderr, _("ERROR: Unexpected text on line %d of %s\n"), |
61cf44e2 | 433 | linenum, f); |
ac884b6a MS |
434 | cupsFileClose(fp); |
435 | return (-1); | |
436 | } | |
ac884b6a | 437 | } |
839a51c8 | 438 | |
61cf44e2 MS |
439 | if (haveid && havestr) |
440 | add_message(id, str); | |
ac884b6a | 441 | } |
839a51c8 MS |
442 | else |
443 | goto unknown_load_format; | |
ac884b6a | 444 | |
839a51c8 MS |
445 | /* |
446 | * Close the file and return... | |
447 | */ | |
ac884b6a MS |
448 | |
449 | cupsFileClose(fp); | |
450 | ||
451 | return (0); | |
839a51c8 MS |
452 | |
453 | /* | |
454 | * Unknown format error... | |
455 | */ | |
456 | ||
457 | unknown_load_format: | |
458 | ||
61cf44e2 | 459 | _cupsLangPrintf(stderr, |
4d301e69 | 460 | _("ERROR: Unknown message catalog format for \"%s\"\n"), f); |
839a51c8 MS |
461 | cupsFileClose(fp); |
462 | return (-1); | |
ac884b6a MS |
463 | } |
464 | ||
465 | ||
466 | // | |
467 | // 'ppdcCatalog::save_messages()' - Save the messages to a .po file. | |
468 | // | |
469 | ||
470 | int // O - 0 on success, -1 on error | |
471 | ppdcCatalog::save_messages( | |
472 | const char *f) // I - File to save to | |
473 | { | |
474 | cups_file_t *fp; // Message file | |
475 | ppdcMessage *m; // Current message | |
839a51c8 MS |
476 | char *ptr; // Pointer into string |
477 | int utf16; // Output UTF-16 .strings file? | |
478 | int ch; // Current character | |
ac884b6a MS |
479 | |
480 | ||
839a51c8 | 481 | // Open the file... |
ae71f5de | 482 | if ((ptr = (char *)strrchr(f, '.')) == NULL) |
ac884b6a MS |
483 | return (-1); |
484 | ||
839a51c8 MS |
485 | if (!strcmp(ptr, ".gz")) |
486 | fp = cupsFileOpen(f, "w9"); | |
487 | else | |
488 | fp = cupsFileOpen(f, "w"); | |
489 | ||
490 | if (!fp) | |
491 | return (-1); | |
492 | ||
493 | // For .strings files, write a BOM for big-endian output... | |
494 | utf16 = !strcmp(ptr, ".strings"); | |
495 | ||
496 | if (utf16) | |
497 | put_utf16(fp, 0xfeff); | |
498 | ||
499 | // Loop through all of the messages... | |
ac884b6a MS |
500 | for (m = (ppdcMessage *)messages->first(); |
501 | m; | |
502 | m = (ppdcMessage *)messages->next()) | |
503 | { | |
839a51c8 MS |
504 | if (utf16) |
505 | { | |
506 | put_utf16(fp, '\"'); | |
ac884b6a | 507 | |
839a51c8 MS |
508 | ptr = m->id->value; |
509 | while ((ch = get_utf8(ptr)) != 0) | |
510 | switch (ch) | |
511 | { | |
512 | case '\n' : | |
513 | put_utf16(fp, '\\'); | |
514 | put_utf16(fp, 'n'); | |
515 | break; | |
516 | case '\\' : | |
517 | put_utf16(fp, '\\'); | |
518 | put_utf16(fp, '\\'); | |
519 | break; | |
520 | case '\"' : | |
521 | put_utf16(fp, '\\'); | |
522 | put_utf16(fp, '\"'); | |
523 | break; | |
524 | default : | |
525 | put_utf16(fp, ch); | |
526 | break; | |
527 | } | |
528 | ||
529 | put_utf16(fp, '\"'); | |
530 | put_utf16(fp, ' '); | |
531 | put_utf16(fp, '='); | |
532 | put_utf16(fp, ' '); | |
533 | put_utf16(fp, '\"'); | |
534 | ||
535 | ptr = m->string->value; | |
536 | while ((ch = get_utf8(ptr)) != 0) | |
537 | switch (ch) | |
538 | { | |
539 | case '\n' : | |
540 | put_utf16(fp, '\\'); | |
541 | put_utf16(fp, 'n'); | |
542 | break; | |
543 | case '\\' : | |
544 | put_utf16(fp, '\\'); | |
545 | put_utf16(fp, '\\'); | |
546 | break; | |
547 | case '\"' : | |
548 | put_utf16(fp, '\\'); | |
549 | put_utf16(fp, '\"'); | |
550 | break; | |
551 | default : | |
552 | put_utf16(fp, ch); | |
553 | break; | |
554 | } | |
ac884b6a | 555 | |
839a51c8 MS |
556 | put_utf16(fp, '\"'); |
557 | put_utf16(fp, ';'); | |
558 | put_utf16(fp, '\n'); | |
559 | } | |
560 | else | |
561 | { | |
562 | cupsFilePuts(fp, "msgid \""); | |
563 | for (ptr = m->id->value; *ptr; ptr ++) | |
564 | switch (*ptr) | |
565 | { | |
566 | case '\n' : | |
567 | cupsFilePuts(fp, "\\n"); | |
568 | break; | |
569 | case '\\' : | |
570 | cupsFilePuts(fp, "\\\\"); | |
571 | break; | |
572 | case '\"' : | |
573 | cupsFilePuts(fp, "\\\""); | |
574 | break; | |
575 | default : | |
576 | cupsFilePutChar(fp, *ptr); | |
577 | break; | |
578 | } | |
579 | cupsFilePuts(fp, "\"\n"); | |
580 | ||
581 | cupsFilePuts(fp, "msgstr \""); | |
582 | for (ptr = m->string->value; *ptr; ptr ++) | |
583 | switch (*ptr) | |
584 | { | |
585 | case '\n' : | |
586 | cupsFilePuts(fp, "\\n"); | |
587 | break; | |
588 | case '\\' : | |
589 | cupsFilePuts(fp, "\\\\"); | |
590 | break; | |
591 | case '\"' : | |
592 | cupsFilePuts(fp, "\\\""); | |
593 | break; | |
594 | default : | |
595 | cupsFilePutChar(fp, *ptr); | |
596 | break; | |
597 | } | |
598 | cupsFilePuts(fp, "\"\n"); | |
599 | ||
600 | cupsFilePutChar(fp, '\n'); | |
601 | } | |
ac884b6a MS |
602 | } |
603 | ||
604 | cupsFileClose(fp); | |
605 | ||
606 | return (0); | |
607 | } | |
608 | ||
609 | ||
839a51c8 MS |
610 | // |
611 | // 'get_utf8()' - Get a UTF-8 character. | |
612 | // | |
613 | ||
614 | static int // O - Unicode character or 0 on EOF | |
615 | get_utf8(char *&ptr) // IO - Pointer to character | |
616 | { | |
617 | int ch; // Current character | |
618 | ||
619 | ||
620 | if ((ch = *ptr++ & 255) < 0xc0) | |
621 | return (ch); | |
622 | ||
623 | if ((ch & 0xe0) == 0xc0) | |
624 | { | |
625 | // Two-byte UTF-8... | |
626 | if ((*ptr & 0xc0) != 0x80) | |
627 | return (0); | |
628 | ||
629 | ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f); | |
630 | } | |
631 | else if ((ch & 0xf0) == 0xe0) | |
632 | { | |
633 | // Three-byte UTF-8... | |
634 | if ((*ptr & 0xc0) != 0x80) | |
635 | return (0); | |
636 | ||
637 | ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f); | |
638 | ||
639 | if ((*ptr & 0xc0) != 0x80) | |
640 | return (0); | |
641 | ||
642 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
643 | } | |
644 | else if ((ch & 0xf8) == 0xf0) | |
645 | { | |
646 | // Four-byte UTF-8... | |
647 | if ((*ptr & 0xc0) != 0x80) | |
648 | return (0); | |
649 | ||
650 | ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f); | |
651 | ||
652 | if ((*ptr & 0xc0) != 0x80) | |
653 | return (0); | |
654 | ||
655 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
656 | ||
657 | if ((*ptr & 0xc0) != 0x80) | |
658 | return (0); | |
659 | ||
660 | ch = (ch << 6) | (*ptr++ & 0x3f); | |
661 | } | |
662 | ||
663 | return (ch); | |
664 | } | |
665 | ||
666 | ||
667 | // | |
668 | // 'get_utf16()' - Get a UTF-16 character... | |
669 | // | |
670 | ||
671 | static int // O - Unicode character or 0 on EOF | |
672 | get_utf16(cups_file_t *fp, // I - File to read from | |
673 | ppdc_cs_t &cs) // IO - Character set of file | |
674 | { | |
675 | int ch; // Current character | |
676 | unsigned char buffer[3]; // Bytes | |
677 | ||
678 | ||
679 | if (cs == PPDC_CS_AUTO) | |
680 | { | |
681 | // Get byte-order-mark, if present... | |
682 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
683 | return (0); | |
684 | ||
685 | if (buffer[0] == 0xfe && buffer[1] == 0xff) | |
686 | { | |
687 | // Big-endian UTF-16... | |
688 | cs = PPDC_CS_UTF16BE; | |
689 | ||
690 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
691 | return (0); | |
692 | } | |
693 | else if (buffer[0] == 0xff && buffer[1] == 0xfe) | |
694 | { | |
695 | // Little-endian UTF-16... | |
696 | cs = PPDC_CS_UTF16LE; | |
697 | ||
698 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
699 | return (0); | |
700 | } | |
701 | else if (buffer[0] == 0x00 && buffer[1] != 0x00) | |
702 | { | |
703 | // No BOM, assume big-endian UTF-16... | |
704 | cs = PPDC_CS_UTF16BE; | |
705 | } | |
706 | else if (buffer[0] != 0x00 && buffer[1] == 0x00) | |
707 | { | |
708 | // No BOM, assume little-endian UTF-16... | |
709 | cs = PPDC_CS_UTF16LE; | |
710 | } | |
711 | else | |
712 | { | |
713 | // No BOM, assume UTF-8... | |
714 | cs = PPDC_CS_UTF8; | |
715 | ||
716 | cupsFileRewind(fp); | |
717 | } | |
718 | } | |
719 | else if (cs != PPDC_CS_UTF8) | |
720 | { | |
721 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
722 | return (0); | |
723 | } | |
724 | ||
725 | if (cs == PPDC_CS_UTF8) | |
726 | { | |
727 | // UTF-8 character... | |
4509bb49 MS |
728 | if ((ch = cupsFileGetChar(fp)) < 0) |
729 | return (0); | |
839a51c8 MS |
730 | |
731 | if ((ch & 0xe0) == 0xc0) | |
732 | { | |
733 | // Two-byte UTF-8... | |
734 | if (cupsFileRead(fp, (char *)buffer, 1) != 1) | |
735 | return (0); | |
736 | ||
737 | if ((buffer[0] & 0xc0) != 0x80) | |
738 | return (0); | |
739 | ||
740 | ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f); | |
741 | } | |
742 | else if ((ch & 0xf0) == 0xe0) | |
743 | { | |
744 | // Three-byte UTF-8... | |
745 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
746 | return (0); | |
747 | ||
748 | if ((buffer[0] & 0xc0) != 0x80 || | |
749 | (buffer[1] & 0xc0) != 0x80) | |
750 | return (0); | |
751 | ||
752 | ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) | | |
753 | (buffer[1] & 0x3f); | |
754 | } | |
755 | else if ((ch & 0xf8) == 0xf0) | |
756 | { | |
757 | // Four-byte UTF-8... | |
758 | if (cupsFileRead(fp, (char *)buffer, 3) != 3) | |
759 | return (0); | |
760 | ||
761 | if ((buffer[0] & 0xc0) != 0x80 || | |
762 | (buffer[1] & 0xc0) != 0x80 || | |
763 | (buffer[2] & 0xc0) != 0x80) | |
764 | return (0); | |
765 | ||
766 | ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) | | |
767 | (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f); | |
768 | } | |
769 | } | |
770 | else | |
771 | { | |
772 | // UTF-16 character... | |
773 | if (cs == PPDC_CS_UTF16BE) | |
774 | ch = (buffer[0] << 8) | buffer[1]; | |
775 | else | |
776 | ch = (buffer[1] << 8) | buffer[0]; | |
777 | ||
778 | if (ch >= 0xd800 && ch <= 0xdbff) | |
779 | { | |
780 | // Handle multi-word encoding... | |
781 | int lch; | |
782 | ||
783 | if (cupsFileRead(fp, (char *)buffer, 2) != 2) | |
784 | return (0); | |
785 | ||
786 | if (cs == PPDC_CS_UTF16BE) | |
787 | lch = (buffer[0] << 8) | buffer[1]; | |
788 | else | |
789 | lch = (buffer[1] << 8) | buffer[0]; | |
790 | ||
791 | if (lch < 0xdc00 || lch >= 0xdfff) | |
792 | return (0); | |
793 | ||
794 | ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; | |
795 | } | |
796 | } | |
797 | ||
798 | return (ch); | |
799 | } | |
800 | ||
801 | ||
802 | // | |
803 | // 'put_utf8()' - Add a UTF-8 character to a string. | |
804 | // | |
805 | ||
806 | static int // O - 0 on success, -1 on failure | |
807 | put_utf8(int ch, // I - Unicode character | |
808 | char *&ptr, // IO - String pointer | |
809 | char *end) // I - End of buffer | |
810 | { | |
811 | if (ch < 0x80) | |
812 | { | |
813 | // One-byte ASCII... | |
814 | if (ptr >= end) | |
815 | return (-1); | |
816 | ||
817 | *ptr++ = ch; | |
818 | } | |
819 | else if (ch < 0x800) | |
820 | { | |
821 | // Two-byte UTF-8... | |
822 | if ((ptr + 1) >= end) | |
823 | return (-1); | |
824 | ||
825 | *ptr++ = 0xc0 | (ch >> 6); | |
826 | *ptr++ = 0x80 | (ch & 0x3f); | |
827 | } | |
828 | else if (ch < 0x10000) | |
829 | { | |
830 | // Three-byte UTF-8... | |
831 | if ((ptr + 2) >= end) | |
832 | return (-1); | |
833 | ||
834 | *ptr++ = 0xe0 | (ch >> 12); | |
835 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
836 | *ptr++ = 0x80 | (ch & 0x3f); | |
837 | } | |
838 | else | |
839 | { | |
840 | // Four-byte UTF-8... | |
841 | if ((ptr + 3) >= end) | |
842 | return (-1); | |
843 | ||
844 | *ptr++ = 0xf0 | (ch >> 18); | |
845 | *ptr++ = 0x80 | ((ch >> 12) & 0x3f); | |
846 | *ptr++ = 0x80 | ((ch >> 6) & 0x3f); | |
847 | *ptr++ = 0x80 | (ch & 0x3f); | |
848 | } | |
849 | ||
850 | return (0); | |
851 | } | |
852 | ||
853 | ||
854 | // | |
855 | // 'put_utf16()' - Write a UTF-16 character to a file. | |
856 | // | |
857 | ||
858 | static int // O - 0 on success, -1 on failure | |
859 | put_utf16(cups_file_t *fp, // I - File to write to | |
860 | int ch) // I - Unicode character | |
861 | { | |
862 | unsigned char buffer[4]; // Output buffer | |
863 | ||
864 | ||
865 | if (ch < 0x10000) | |
866 | { | |
867 | // One-word UTF-16 big-endian... | |
868 | buffer[0] = ch >> 8; | |
869 | buffer[1] = ch; | |
870 | ||
871 | if (cupsFileWrite(fp, (char *)buffer, 2) == 2) | |
872 | return (0); | |
873 | } | |
874 | else | |
875 | { | |
876 | // Two-word UTF-16 big-endian... | |
877 | ch -= 0x10000; | |
878 | ||
879 | buffer[0] = 0xd8 | (ch >> 18); | |
880 | buffer[1] = ch >> 10; | |
881 | buffer[2] = 0xdc | ((ch >> 8) & 0x03); | |
882 | buffer[3] = ch; | |
883 | ||
884 | if (cupsFileWrite(fp, (char *)buffer, 4) == 4) | |
885 | return (0); | |
886 | } | |
887 | ||
888 | return (-1); | |
889 | } | |
890 | ||
891 | ||
ac884b6a MS |
892 | // |
893 | // End of "$Id$". | |
894 | // |