]> git.ipfire.org Git - thirdparty/cups.git/blob - ppdc/ppdc-catalog.cxx
Merge changes from CUPS 1.5svn-r8916.
[thirdparty/cups.git] / ppdc / ppdc-catalog.cxx
1 //
2 // "$Id$"
3 //
4 // Shared message catalog class for the CUPS PPD Compiler.
5 //
6 // Copyright 2007-2009 by Apple Inc.
7 // Copyright 2002-2006 by Easy Software Products.
8 //
9 // These coded instructions, statements, and computer programs are the
10 // property of Apple Inc. and are protected by Federal copyright
11 // law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 // which should have been included with this file. If this file is
13 // file is missing or damaged, see the license at "http://www.cups.org/".
14 //
15 // Contents:
16 //
17 // ppdcCatalog::ppdcCatalog() - Create a shared message catalog.
18 // ppdcCatalog::~ppdcCatalog() - Destroy a shared message catalog.
19 // ppdcCatalog::add_message() - Add a new message.
20 // ppdcCatalog::find_message() - Find a message in a catalog...
21 // ppdcCatalog::load_messages() - Load messages from a .po file.
22 // ppdcCatalog::save_messages() - Save the messages to a .po file.
23 // get_utf8() - Get a UTF-8 character.
24 // get_utf16() - Get a UTF-16 character...
25 // put_utf8() - Add a UTF-8 character to a string.
26 // put_utf16() - Write a UTF-16 character to a file.
27 //
28
29 //
30 // Include necessary headers...
31 //
32
33 #include "ppdc-private.h"
34
35
36 //
37 // Character encodings...
38 //
39
40 typedef enum
41 {
42 PPDC_CS_AUTO,
43 PPDC_CS_UTF8,
44 PPDC_CS_UTF16BE,
45 PPDC_CS_UTF16LE
46 } ppdc_cs_t;
47
48
49 //
50 // Local functions...
51 //
52
53 static int get_utf8(char *&ptr);
54 static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs);
55 static int put_utf8(int ch, char *&ptr, char *end);
56 static int put_utf16(cups_file_t *fp, int ch);
57
58
59 //
60 // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog.
61 //
62
63 ppdcCatalog::ppdcCatalog(const char *l, // I - Locale
64 const char *f) // I - Message catalog file
65 : ppdcShared()
66 {
67 _cups_globals_t *cg = _cupsGlobals();
68 // Global information
69
70
71 PPDC_NEW;
72
73 locale = new ppdcString(l);
74 filename = new ppdcString(f);
75 messages = new ppdcArray();
76
77 if (l)
78 {
79 // Try loading the base messages for this locale...
80 char pofile[1024]; // Message catalog file
81
82
83 snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, l, l);
84
85 if (load_messages(pofile) && strchr(l, '_'))
86 {
87 // Try the base locale...
88 char baseloc[3]; // Base locale...
89
90
91 strlcpy(baseloc, l, sizeof(baseloc));
92 snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir,
93 baseloc, baseloc);
94
95 load_messages(pofile);
96 }
97 }
98
99 if (f)
100 load_messages(f);
101 }
102
103
104 //
105 // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog.
106 //
107
108 ppdcCatalog::~ppdcCatalog()
109 {
110 PPDC_DELETE;
111
112 locale->release();
113 filename->release();
114 messages->release();
115 }
116
117
118 //
119 // 'ppdcCatalog::add_message()' - Add a new message.
120 //
121
122 void
123 ppdcCatalog::add_message(
124 const char *id, // I - Message ID to add
125 const char *string) // I - Translation string
126 {
127 ppdcMessage *m; // Current message
128 char text[1024]; // Text to translate
129
130
131 // Range check input...
132 if (!id)
133 return;
134
135 // Verify that we don't already have the message ID...
136 for (m = (ppdcMessage *)messages->first();
137 m;
138 m = (ppdcMessage *)messages->next())
139 if (!strcmp(m->id->value, id))
140 {
141 if (string)
142 {
143 m->string->release();
144 m->string = new ppdcString(string);
145 }
146 return;
147 }
148
149 // Add the message...
150 if (!string)
151 {
152 snprintf(text, sizeof(text), "TRANSLATE %s", id);
153 string = text;
154 }
155
156 messages->add(new ppdcMessage(id, string));
157 }
158
159
160 //
161 // 'ppdcCatalog::find_message()' - Find a message in a catalog...
162 //
163
164 const char * // O - Message text
165 ppdcCatalog::find_message(
166 const char *id) // I - Message ID
167 {
168 ppdcMessage *m; // Current message
169
170
171 if (!*id)
172 return (id);
173
174 for (m = (ppdcMessage *)messages->first();
175 m;
176 m = (ppdcMessage *)messages->next())
177 if (!strcmp(m->id->value, id))
178 return (m->string->value);
179
180 return (id);
181 }
182
183
184 //
185 // 'ppdcCatalog::load_messages()' - Load messages from a .po file.
186 //
187
188 int // O - 0 on success, -1 on failure
189 ppdcCatalog::load_messages(
190 const char *f) // I - Message catalog file
191 {
192 cups_file_t *fp; // Message file
193 char line[4096], // Line buffer
194 *ptr, // Pointer into buffer
195 id[4096], // Translation ID
196 str[4096]; // Translation string
197 int linenum; // Line number
198
199
200 // Open the message catalog file...
201 if ((fp = cupsFileOpen(f, "r")) == NULL)
202 return (-1);
203
204 if ((ptr = (char *)strrchr(f, '.')) == NULL)
205 goto unknown_load_format;
206 else if (!strcmp(ptr, ".strings"))
207 {
208 /*
209 * Read messages in Mac OS X ".strings" format, which are UTF-16 text
210 * files of the format:
211 *
212 * "id" = "str";
213 *
214 * Strings files can also contain C-style comments.
215 */
216
217 ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file
218 int ch; // Current character from file
219 char *end; // End of buffer
220
221
222 id[0] = '\0';
223 str[0] = '\0';
224 ptr = NULL;
225 end = NULL;
226
227 while ((ch = get_utf16(fp, cs)) != 0)
228 {
229 if (ptr)
230 {
231 if (ch == '\\')
232 {
233 if ((ch = get_utf16(fp, cs)) == 0)
234 break;
235
236 if (ch == 'n')
237 ch = '\n';
238 else if (ch == 't')
239 ch = '\t';
240 }
241 else if (ch == '\"')
242 {
243 *ptr = '\0';
244 ptr = NULL;
245 }
246
247 if (ptr)
248 put_utf8(ch, ptr, end);
249 }
250 else if (ch == '/')
251 {
252 // Start of a comment?
253 if ((ch = get_utf16(fp, cs)) == 0)
254 break;
255
256 if (ch == '*')
257 {
258 // Skip C comment...
259 int lastch = 0;
260
261 while ((ch = get_utf16(fp, cs)) != 0)
262 {
263 if (ch == '/' && lastch == '*')
264 break;
265
266 lastch = ch;
267 }
268 }
269 else if (ch == '/')
270 {
271 // Skip C++ comment...
272 while ((ch = get_utf16(fp, cs)) != 0)
273 if (ch == '\n')
274 break;
275 }
276 }
277 else if (ch == '\"')
278 {
279 // Start quoted string...
280 if (id[0])
281 {
282 ptr = str;
283 end = str + sizeof(str) - 1;
284 }
285 else
286 {
287 ptr = id;
288 end = id + sizeof(id) - 1;
289 }
290 }
291 else if (ch == ';')
292 {
293 // Add string...
294 add_message(id, str);
295 id[0] = '\0';
296 }
297 }
298 }
299 else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz"))
300 {
301 /*
302 * Read messages from the catalog file until EOF...
303 *
304 * The format is the GNU gettext .po format, which is fairly simple:
305 *
306 * msgid "some text"
307 * msgstr "localized text"
308 *
309 * The ID and localized text can span multiple lines using the form:
310 *
311 * msgid ""
312 * "some long text"
313 * msgstr ""
314 * "localized text spanning "
315 * "multiple lines"
316 */
317
318 int which, // In msgid?
319 haveid, // Did we get a msgid string?
320 havestr; // Did we get a msgstr string?
321
322 linenum = 0;
323 id[0] = '\0';
324 str[0] = '\0';
325 haveid = 0;
326 havestr = 0;
327 which = 0;
328
329 while (cupsFileGets(fp, line, sizeof(line)))
330 {
331 linenum ++;
332
333 // Skip blank and comment lines...
334 if (line[0] == '#' || !line[0])
335 continue;
336
337 // Strip the trailing quote...
338 if ((ptr = (char *)strrchr(line, '\"')) == NULL)
339 {
340 _cupsLangPrintf(stderr,
341 _("ERROR: Expected quoted string on line %d of %s\n"),
342 linenum, f);
343 cupsFileClose(fp);
344 return (-1);
345 }
346
347 *ptr = '\0';
348
349 // Find start of value...
350 if ((ptr = strchr(line, '\"')) == NULL)
351 {
352 _cupsLangPrintf(stderr,
353 _("ERROR: Expected quoted string on line %d of %s\n"),
354 linenum, f);
355 cupsFileClose(fp);
356 return (-1);
357 }
358
359 ptr ++;
360
361 // Unquote the text...
362 char *sptr, *dptr; // Source/destination pointers
363
364 for (sptr = ptr, dptr = ptr; *sptr;)
365 {
366 if (*sptr == '\\')
367 {
368 sptr ++;
369 if (isdigit(*sptr))
370 {
371 *dptr = 0;
372
373 while (isdigit(*sptr))
374 {
375 *dptr = *dptr * 8 + *sptr - '0';
376 sptr ++;
377 }
378
379 dptr ++;
380 }
381 else
382 {
383 if (*sptr == 'n')
384 *dptr++ = '\n';
385 else if (*sptr == 'r')
386 *dptr++ = '\r';
387 else if (*sptr == 't')
388 *dptr++ = '\t';
389 else
390 *dptr++ = *sptr;
391
392 sptr ++;
393 }
394 }
395 else
396 *dptr++ = *sptr++;
397 }
398
399 *dptr = '\0';
400
401 // Create or add to a message...
402 if (!strncmp(line, "msgid", 5))
403 {
404 if (haveid && havestr)
405 add_message(id, str);
406
407 strlcpy(id, ptr, sizeof(id));
408 str[0] = '\0';
409 haveid = 1;
410 havestr = 0;
411 which = 1;
412 }
413 else if (!strncmp(line, "msgstr", 6))
414 {
415 if (!haveid)
416 {
417 _cupsLangPrintf(stderr,
418 _("ERROR: Need a msgid line before any "
419 "translation strings on line %d of %s\n"),
420 linenum, f);
421 cupsFileClose(fp);
422 return (-1);
423 }
424
425 strlcpy(str, ptr, sizeof(str));
426 havestr = 1;
427 which = 2;
428 }
429 else if (line[0] == '\"' && which == 2)
430 strlcat(str, ptr, sizeof(str));
431 else if (line[0] == '\"' && which == 1)
432 strlcat(id, ptr, sizeof(id));
433 else
434 {
435 _cupsLangPrintf(stderr, _("ERROR: Unexpected text on line %d of %s\n"),
436 linenum, f);
437 cupsFileClose(fp);
438 return (-1);
439 }
440 }
441
442 if (haveid && havestr)
443 add_message(id, str);
444 }
445 else
446 goto unknown_load_format;
447
448 /*
449 * Close the file and return...
450 */
451
452 cupsFileClose(fp);
453
454 return (0);
455
456 /*
457 * Unknown format error...
458 */
459
460 unknown_load_format:
461
462 _cupsLangPrintf(stderr,
463 _("ERROR: Unknown message catalog format for \"%s\"\n"), f);
464 cupsFileClose(fp);
465 return (-1);
466 }
467
468
469 //
470 // 'ppdcCatalog::save_messages()' - Save the messages to a .po file.
471 //
472
473 int // O - 0 on success, -1 on error
474 ppdcCatalog::save_messages(
475 const char *f) // I - File to save to
476 {
477 cups_file_t *fp; // Message file
478 ppdcMessage *m; // Current message
479 char *ptr; // Pointer into string
480 int utf16; // Output UTF-16 .strings file?
481 int ch; // Current character
482
483
484 // Open the file...
485 if ((ptr = (char *)strrchr(f, '.')) == NULL)
486 return (-1);
487
488 if (!strcmp(ptr, ".gz"))
489 fp = cupsFileOpen(f, "w9");
490 else
491 fp = cupsFileOpen(f, "w");
492
493 if (!fp)
494 return (-1);
495
496 // For .strings files, write a BOM for big-endian output...
497 utf16 = !strcmp(ptr, ".strings");
498
499 if (utf16)
500 put_utf16(fp, 0xfeff);
501
502 // Loop through all of the messages...
503 for (m = (ppdcMessage *)messages->first();
504 m;
505 m = (ppdcMessage *)messages->next())
506 {
507 if (utf16)
508 {
509 put_utf16(fp, '\"');
510
511 ptr = m->id->value;
512 while ((ch = get_utf8(ptr)) != 0)
513 switch (ch)
514 {
515 case '\n' :
516 put_utf16(fp, '\\');
517 put_utf16(fp, 'n');
518 break;
519 case '\\' :
520 put_utf16(fp, '\\');
521 put_utf16(fp, '\\');
522 break;
523 case '\"' :
524 put_utf16(fp, '\\');
525 put_utf16(fp, '\"');
526 break;
527 default :
528 put_utf16(fp, ch);
529 break;
530 }
531
532 put_utf16(fp, '\"');
533 put_utf16(fp, ' ');
534 put_utf16(fp, '=');
535 put_utf16(fp, ' ');
536 put_utf16(fp, '\"');
537
538 ptr = m->string->value;
539 while ((ch = get_utf8(ptr)) != 0)
540 switch (ch)
541 {
542 case '\n' :
543 put_utf16(fp, '\\');
544 put_utf16(fp, 'n');
545 break;
546 case '\\' :
547 put_utf16(fp, '\\');
548 put_utf16(fp, '\\');
549 break;
550 case '\"' :
551 put_utf16(fp, '\\');
552 put_utf16(fp, '\"');
553 break;
554 default :
555 put_utf16(fp, ch);
556 break;
557 }
558
559 put_utf16(fp, '\"');
560 put_utf16(fp, ';');
561 put_utf16(fp, '\n');
562 }
563 else
564 {
565 cupsFilePuts(fp, "msgid \"");
566 for (ptr = m->id->value; *ptr; ptr ++)
567 switch (*ptr)
568 {
569 case '\n' :
570 cupsFilePuts(fp, "\\n");
571 break;
572 case '\\' :
573 cupsFilePuts(fp, "\\\\");
574 break;
575 case '\"' :
576 cupsFilePuts(fp, "\\\"");
577 break;
578 default :
579 cupsFilePutChar(fp, *ptr);
580 break;
581 }
582 cupsFilePuts(fp, "\"\n");
583
584 cupsFilePuts(fp, "msgstr \"");
585 for (ptr = m->string->value; *ptr; ptr ++)
586 switch (*ptr)
587 {
588 case '\n' :
589 cupsFilePuts(fp, "\\n");
590 break;
591 case '\\' :
592 cupsFilePuts(fp, "\\\\");
593 break;
594 case '\"' :
595 cupsFilePuts(fp, "\\\"");
596 break;
597 default :
598 cupsFilePutChar(fp, *ptr);
599 break;
600 }
601 cupsFilePuts(fp, "\"\n");
602
603 cupsFilePutChar(fp, '\n');
604 }
605 }
606
607 cupsFileClose(fp);
608
609 return (0);
610 }
611
612
613 //
614 // 'get_utf8()' - Get a UTF-8 character.
615 //
616
617 static int // O - Unicode character or 0 on EOF
618 get_utf8(char *&ptr) // IO - Pointer to character
619 {
620 int ch; // Current character
621
622
623 if ((ch = *ptr++ & 255) < 0xc0)
624 return (ch);
625
626 if ((ch & 0xe0) == 0xc0)
627 {
628 // Two-byte UTF-8...
629 if ((*ptr & 0xc0) != 0x80)
630 return (0);
631
632 ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f);
633 }
634 else if ((ch & 0xf0) == 0xe0)
635 {
636 // Three-byte UTF-8...
637 if ((*ptr & 0xc0) != 0x80)
638 return (0);
639
640 ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f);
641
642 if ((*ptr & 0xc0) != 0x80)
643 return (0);
644
645 ch = (ch << 6) | (*ptr++ & 0x3f);
646 }
647 else if ((ch & 0xf8) == 0xf0)
648 {
649 // Four-byte UTF-8...
650 if ((*ptr & 0xc0) != 0x80)
651 return (0);
652
653 ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f);
654
655 if ((*ptr & 0xc0) != 0x80)
656 return (0);
657
658 ch = (ch << 6) | (*ptr++ & 0x3f);
659
660 if ((*ptr & 0xc0) != 0x80)
661 return (0);
662
663 ch = (ch << 6) | (*ptr++ & 0x3f);
664 }
665
666 return (ch);
667 }
668
669
670 //
671 // 'get_utf16()' - Get a UTF-16 character...
672 //
673
674 static int // O - Unicode character or 0 on EOF
675 get_utf16(cups_file_t *fp, // I - File to read from
676 ppdc_cs_t &cs) // IO - Character set of file
677 {
678 int ch; // Current character
679 unsigned char buffer[3]; // Bytes
680
681
682 if (cs == PPDC_CS_AUTO)
683 {
684 // Get byte-order-mark, if present...
685 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
686 return (0);
687
688 if (buffer[0] == 0xfe && buffer[1] == 0xff)
689 {
690 // Big-endian UTF-16...
691 cs = PPDC_CS_UTF16BE;
692
693 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
694 return (0);
695 }
696 else if (buffer[0] == 0xff && buffer[1] == 0xfe)
697 {
698 // Little-endian UTF-16...
699 cs = PPDC_CS_UTF16LE;
700
701 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
702 return (0);
703 }
704 else if (buffer[0] == 0x00 && buffer[1] != 0x00)
705 {
706 // No BOM, assume big-endian UTF-16...
707 cs = PPDC_CS_UTF16BE;
708 }
709 else if (buffer[0] != 0x00 && buffer[1] == 0x00)
710 {
711 // No BOM, assume little-endian UTF-16...
712 cs = PPDC_CS_UTF16LE;
713 }
714 else
715 {
716 // No BOM, assume UTF-8...
717 cs = PPDC_CS_UTF8;
718
719 cupsFileRewind(fp);
720 }
721 }
722 else if (cs != PPDC_CS_UTF8)
723 {
724 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
725 return (0);
726 }
727
728 if (cs == PPDC_CS_UTF8)
729 {
730 // UTF-8 character...
731 if ((ch = cupsFileGetChar(fp)) < 0)
732 return (0);
733
734 if ((ch & 0xe0) == 0xc0)
735 {
736 // Two-byte UTF-8...
737 if (cupsFileRead(fp, (char *)buffer, 1) != 1)
738 return (0);
739
740 if ((buffer[0] & 0xc0) != 0x80)
741 return (0);
742
743 ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f);
744 }
745 else if ((ch & 0xf0) == 0xe0)
746 {
747 // Three-byte UTF-8...
748 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
749 return (0);
750
751 if ((buffer[0] & 0xc0) != 0x80 ||
752 (buffer[1] & 0xc0) != 0x80)
753 return (0);
754
755 ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) |
756 (buffer[1] & 0x3f);
757 }
758 else if ((ch & 0xf8) == 0xf0)
759 {
760 // Four-byte UTF-8...
761 if (cupsFileRead(fp, (char *)buffer, 3) != 3)
762 return (0);
763
764 if ((buffer[0] & 0xc0) != 0x80 ||
765 (buffer[1] & 0xc0) != 0x80 ||
766 (buffer[2] & 0xc0) != 0x80)
767 return (0);
768
769 ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) |
770 (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f);
771 }
772 }
773 else
774 {
775 // UTF-16 character...
776 if (cs == PPDC_CS_UTF16BE)
777 ch = (buffer[0] << 8) | buffer[1];
778 else
779 ch = (buffer[1] << 8) | buffer[0];
780
781 if (ch >= 0xd800 && ch <= 0xdbff)
782 {
783 // Handle multi-word encoding...
784 int lch;
785
786 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
787 return (0);
788
789 if (cs == PPDC_CS_UTF16BE)
790 lch = (buffer[0] << 8) | buffer[1];
791 else
792 lch = (buffer[1] << 8) | buffer[0];
793
794 if (lch < 0xdc00 || lch >= 0xdfff)
795 return (0);
796
797 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
798 }
799 }
800
801 return (ch);
802 }
803
804
805 //
806 // 'put_utf8()' - Add a UTF-8 character to a string.
807 //
808
809 static int // O - 0 on success, -1 on failure
810 put_utf8(int ch, // I - Unicode character
811 char *&ptr, // IO - String pointer
812 char *end) // I - End of buffer
813 {
814 if (ch < 0x80)
815 {
816 // One-byte ASCII...
817 if (ptr >= end)
818 return (-1);
819
820 *ptr++ = ch;
821 }
822 else if (ch < 0x800)
823 {
824 // Two-byte UTF-8...
825 if ((ptr + 1) >= end)
826 return (-1);
827
828 *ptr++ = 0xc0 | (ch >> 6);
829 *ptr++ = 0x80 | (ch & 0x3f);
830 }
831 else if (ch < 0x10000)
832 {
833 // Three-byte UTF-8...
834 if ((ptr + 2) >= end)
835 return (-1);
836
837 *ptr++ = 0xe0 | (ch >> 12);
838 *ptr++ = 0x80 | ((ch >> 6) & 0x3f);
839 *ptr++ = 0x80 | (ch & 0x3f);
840 }
841 else
842 {
843 // Four-byte UTF-8...
844 if ((ptr + 3) >= end)
845 return (-1);
846
847 *ptr++ = 0xf0 | (ch >> 18);
848 *ptr++ = 0x80 | ((ch >> 12) & 0x3f);
849 *ptr++ = 0x80 | ((ch >> 6) & 0x3f);
850 *ptr++ = 0x80 | (ch & 0x3f);
851 }
852
853 return (0);
854 }
855
856
857 //
858 // 'put_utf16()' - Write a UTF-16 character to a file.
859 //
860
861 static int // O - 0 on success, -1 on failure
862 put_utf16(cups_file_t *fp, // I - File to write to
863 int ch) // I - Unicode character
864 {
865 unsigned char buffer[4]; // Output buffer
866
867
868 if (ch < 0x10000)
869 {
870 // One-word UTF-16 big-endian...
871 buffer[0] = ch >> 8;
872 buffer[1] = ch;
873
874 if (cupsFileWrite(fp, (char *)buffer, 2) == 2)
875 return (0);
876 }
877 else
878 {
879 // Two-word UTF-16 big-endian...
880 ch -= 0x10000;
881
882 buffer[0] = 0xd8 | (ch >> 18);
883 buffer[1] = ch >> 10;
884 buffer[2] = 0xdc | ((ch >> 8) & 0x03);
885 buffer[3] = ch;
886
887 if (cupsFileWrite(fp, (char *)buffer, 4) == 4)
888 return (0);
889 }
890
891 return (-1);
892 }
893
894
895 //
896 // End of "$Id$".
897 //