]> git.ipfire.org Git - thirdparty/cups.git/blob - ppdc/ppdc-catalog.cxx
Merge CUPS 1.4svn-r8052 (tentative 1.4b1)
[thirdparty/cups.git] / ppdc / ppdc-catalog.cxx
1 //
2 // "$Id$"
3 //
4 // Shared message catalog class for the CUPS PPD Compiler.
5 //
6 // Copyright 2007-2008 by Apple Inc.
7 // Copyright 2002-2006 by Easy Software Products.
8 //
9 // These coded instructions, statements, and computer programs are the
10 // property of Apple Inc. and are protected by Federal copyright
11 // law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 // which should have been included with this file. If this file is
13 // file is missing or damaged, see the license at "http://www.cups.org/".
14 //
15 // Contents:
16 //
17 //
18
19 //
20 // Include necessary headers...
21 //
22
23 #include "ppdc.h"
24 #include <cups/globals.h>
25
26
27 //
28 // Character encodings...
29 //
30
31 typedef enum
32 {
33 PPDC_CS_AUTO,
34 PPDC_CS_UTF8,
35 PPDC_CS_UTF16BE,
36 PPDC_CS_UTF16LE
37 } ppdc_cs_t;
38
39
40 //
41 // Local functions...
42 //
43
44 static int get_utf8(char *&ptr);
45 static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs);
46 static int put_utf8(int ch, char *&ptr, char *end);
47 static int put_utf16(cups_file_t *fp, int ch);
48
49
50 //
51 // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog.
52 //
53
54 ppdcCatalog::ppdcCatalog(const char *l, // I - Locale
55 const char *f) // I - Message catalog file
56 : ppdcShared()
57 {
58 _cups_globals_t *cg = _cupsGlobals();
59 // Global information
60
61
62 locale = new ppdcString(l);
63 filename = new ppdcString(f);
64 messages = new ppdcArray();
65
66 if (l)
67 {
68 // Try loading the base messages for this locale...
69 char pofile[1024]; // Message catalog file
70
71
72 snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, l, l);
73
74 if (load_messages(pofile) && strchr(l, '_'))
75 {
76 // Try the base locale...
77 char baseloc[3]; // Base locale...
78
79
80 strlcpy(baseloc, l, sizeof(baseloc));
81 snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir,
82 baseloc, baseloc);
83
84 load_messages(pofile);
85 }
86 }
87
88 if (f)
89 load_messages(f);
90 }
91
92
93 //
94 // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog.
95 //
96
97 ppdcCatalog::~ppdcCatalog()
98 {
99 delete locale;
100 delete filename;
101 delete messages;
102 }
103
104
105 //
106 // 'ppdcCatalog::add_message()' - Add a new message.
107 //
108
109 void
110 ppdcCatalog::add_message(
111 const char *id, // I - Message ID to add
112 const char *string) // I - Translation string
113 {
114 ppdcMessage *m; // Current message
115 char text[1024]; // Text to translate
116
117
118 // Range check input...
119 if (!id)
120 return;
121
122 // Verify that we don't already have the message ID...
123 for (m = (ppdcMessage *)messages->first();
124 m;
125 m = (ppdcMessage *)messages->next())
126 if (!strcmp(m->id->value, id))
127 {
128 if (string)
129 {
130 m->string->release();
131 m->string = new ppdcString(string);
132 }
133 return;
134 }
135
136 // Add the message...
137 if (!string)
138 {
139 snprintf(text, sizeof(text), "TRANSLATE %s", id);
140 string = text;
141 }
142
143 messages->add(new ppdcMessage(id, text));
144 }
145
146
147 //
148 // 'ppdcCatalog::find_message()' - Find a message in a catalog...
149 //
150
151 const char * // O - Message text
152 ppdcCatalog::find_message(
153 const char *id) // I - Message ID
154 {
155 ppdcMessage *m; // Current message
156
157
158 for (m = (ppdcMessage *)messages->first();
159 m;
160 m = (ppdcMessage *)messages->next())
161 if (!strcmp(m->id->value, id))
162 return (m->string->value);
163
164 return (id);
165 }
166
167
168 //
169 // 'ppdcCatalog::load_messages()' - Load messages from a .po file.
170 //
171
172 int // O - 0 on success, -1 on failure
173 ppdcCatalog::load_messages(
174 const char *f) // I - Message catalog file
175 {
176 cups_file_t *fp; // Message file
177 char line[4096], // Line buffer
178 *ptr, // Pointer into buffer
179 id[4096], // Translation ID
180 str[4096]; // Translation string
181 int linenum; // Line number
182
183
184 // Open the message catalog file...
185 if ((fp = cupsFileOpen(f, "r")) == NULL)
186 return (-1);
187
188 if ((ptr = (char *)strrchr(f, '.')) == NULL)
189 goto unknown_load_format;
190 else if (!strcmp(ptr, ".strings"))
191 {
192 /*
193 * Read messages in Mac OS X ".strings" format, which are UTF-16 text
194 * files of the format:
195 *
196 * "id" = "str";
197 *
198 * Strings files can also contain C-style comments.
199 */
200
201 ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file
202 int ch; // Current character from file
203 char *end; // End of buffer
204
205
206 id[0] = '\0';
207 str[0] = '\0';
208 ptr = NULL;
209 end = NULL;
210
211 while ((ch = get_utf16(fp, cs)) != 0)
212 {
213 if (ptr)
214 {
215 if (ch == '\\')
216 {
217 if ((ch = get_utf16(fp, cs)) == 0)
218 break;
219
220 if (ch == 'n')
221 ch = '\n';
222 else if (ch == 't')
223 ch = '\t';
224 }
225 else if (ch == '\"')
226 {
227 *ptr = '\0';
228 ptr = NULL;
229 }
230
231 if (ptr)
232 put_utf8(ch, ptr, end);
233 }
234 else if (ch == '/')
235 {
236 // Start of a comment?
237 if ((ch = get_utf16(fp, cs)) == 0)
238 break;
239
240 if (ch == '*')
241 {
242 // Skip C comment...
243 int lastch = 0;
244
245 while ((ch = get_utf16(fp, cs)) != 0)
246 {
247 if (ch == '/' && lastch == '*')
248 break;
249
250 lastch = ch;
251 }
252 }
253 else if (ch == '/')
254 {
255 // Skip C++ comment...
256 while ((ch = get_utf16(fp, cs)) != 0)
257 if (ch == '\n')
258 break;
259 }
260 }
261 else if (ch == '\"')
262 {
263 // Start quoted string...
264 if (id[0])
265 {
266 ptr = str;
267 end = str + sizeof(str) - 1;
268 }
269 else
270 {
271 ptr = id;
272 end = id + sizeof(id) - 1;
273 }
274 }
275 else if (ch == ';')
276 {
277 // Add string...
278 add_message(id, str);
279 }
280 }
281 }
282 else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz"))
283 {
284 /*
285 * Read messages from the catalog file until EOF...
286 *
287 * The format is the GNU gettext .po format, which is fairly simple:
288 *
289 * msgid "some text"
290 * msgstr "localized text"
291 *
292 * The ID and localized text can span multiple lines using the form:
293 *
294 * msgid ""
295 * "some long text"
296 * msgstr ""
297 * "localized text spanning "
298 * "multiple lines"
299 */
300
301 int which, // In msgid?
302 haveid, // Did we get a msgid string?
303 havestr; // Did we get a msgstr string?
304
305 linenum = 0;
306 id[0] = '\0';
307 str[0] = '\0';
308 haveid = 0;
309 havestr = 0;
310 which = 0;
311
312 while (cupsFileGets(fp, line, sizeof(line)))
313 {
314 linenum ++;
315
316 // Skip blank and comment lines...
317 if (line[0] == '#' || !line[0])
318 continue;
319
320 // Strip the trailing quote...
321 if ((ptr = (char *)strrchr(line, '\"')) == NULL)
322 {
323 _cupsLangPrintf(stderr,
324 _("ERROR: Expected quoted string on line %d of %s!\n"),
325 linenum, f);
326 cupsFileClose(fp);
327 return (-1);
328 }
329
330 *ptr = '\0';
331
332 // Find start of value...
333 if ((ptr = strchr(line, '\"')) == NULL)
334 {
335 _cupsLangPrintf(stderr,
336 _("ERROR: Expected quoted string on line %d of %s!\n"),
337 linenum, f);
338 cupsFileClose(fp);
339 return (-1);
340 }
341
342 ptr ++;
343
344 // Unquote the text...
345 char *sptr, *dptr; // Source/destination pointers
346
347 for (sptr = ptr, dptr = ptr; *sptr;)
348 {
349 if (*sptr == '\\')
350 {
351 sptr ++;
352 if (isdigit(*sptr))
353 {
354 *dptr = 0;
355
356 while (isdigit(*sptr))
357 {
358 *dptr = *dptr * 8 + *sptr - '0';
359 sptr ++;
360 }
361
362 dptr ++;
363 }
364 else
365 {
366 if (*sptr == 'n')
367 *dptr++ = '\n';
368 else if (*sptr == 'r')
369 *dptr++ = '\r';
370 else if (*sptr == 't')
371 *dptr++ = '\t';
372 else
373 *dptr++ = *sptr;
374
375 sptr ++;
376 }
377 }
378 else
379 *dptr++ = *sptr++;
380 }
381
382 *dptr = '\0';
383
384 // Create or add to a message...
385 if (!strncmp(line, "msgid", 5))
386 {
387 if (haveid && havestr)
388 add_message(id, str);
389
390 strlcpy(id, ptr, sizeof(id));
391 str[0] = '\0';
392 haveid = 1;
393 havestr = 0;
394 which = 1;
395 }
396 else if (!strncmp(line, "msgstr", 6))
397 {
398 if (!haveid)
399 {
400 _cupsLangPrintf(stderr,
401 _("ERROR: Need a msgid line before any "
402 "translation strings on line %d of %s!\n"),
403 linenum, f);
404 cupsFileClose(fp);
405 return (-1);
406 }
407
408 strlcpy(str, ptr, sizeof(str));
409 havestr = 1;
410 which = 2;
411 }
412 else if (line[0] == '\"' && which == 2)
413 strlcat(str, ptr, sizeof(str));
414 else if (line[0] == '\"' && which == 1)
415 strlcat(id, ptr, sizeof(id));
416 else
417 {
418 _cupsLangPrintf(stderr, _("ERROR: Unexpected text on line %d of %s!\n"),
419 linenum, f);
420 cupsFileClose(fp);
421 return (-1);
422 }
423 }
424
425 if (haveid && havestr)
426 add_message(id, str);
427 }
428 else
429 goto unknown_load_format;
430
431 /*
432 * Close the file and return...
433 */
434
435 cupsFileClose(fp);
436
437 return (0);
438
439 /*
440 * Unknown format error...
441 */
442
443 unknown_load_format:
444
445 _cupsLangPrintf(stderr,
446 _("ERROR: Unknown message catalog format for \"%s\"!\n"), f);
447 cupsFileClose(fp);
448 return (-1);
449 }
450
451
452 //
453 // 'ppdcCatalog::save_messages()' - Save the messages to a .po file.
454 //
455
456 int // O - 0 on success, -1 on error
457 ppdcCatalog::save_messages(
458 const char *f) // I - File to save to
459 {
460 cups_file_t *fp; // Message file
461 ppdcMessage *m; // Current message
462 char *ptr; // Pointer into string
463 int utf16; // Output UTF-16 .strings file?
464 int ch; // Current character
465
466
467 // Open the file...
468 if ((ptr = (char *)strrchr(f, '.')) == NULL)
469 return (-1);
470
471 if (!strcmp(ptr, ".gz"))
472 fp = cupsFileOpen(f, "w9");
473 else
474 fp = cupsFileOpen(f, "w");
475
476 if (!fp)
477 return (-1);
478
479 // For .strings files, write a BOM for big-endian output...
480 utf16 = !strcmp(ptr, ".strings");
481
482 if (utf16)
483 put_utf16(fp, 0xfeff);
484
485 // Loop through all of the messages...
486 for (m = (ppdcMessage *)messages->first();
487 m;
488 m = (ppdcMessage *)messages->next())
489 {
490 if (utf16)
491 {
492 put_utf16(fp, '\"');
493
494 ptr = m->id->value;
495 while ((ch = get_utf8(ptr)) != 0)
496 switch (ch)
497 {
498 case '\n' :
499 put_utf16(fp, '\\');
500 put_utf16(fp, 'n');
501 break;
502 case '\\' :
503 put_utf16(fp, '\\');
504 put_utf16(fp, '\\');
505 break;
506 case '\"' :
507 put_utf16(fp, '\\');
508 put_utf16(fp, '\"');
509 break;
510 default :
511 put_utf16(fp, ch);
512 break;
513 }
514
515 put_utf16(fp, '\"');
516 put_utf16(fp, ' ');
517 put_utf16(fp, '=');
518 put_utf16(fp, ' ');
519 put_utf16(fp, '\"');
520
521 ptr = m->string->value;
522 while ((ch = get_utf8(ptr)) != 0)
523 switch (ch)
524 {
525 case '\n' :
526 put_utf16(fp, '\\');
527 put_utf16(fp, 'n');
528 break;
529 case '\\' :
530 put_utf16(fp, '\\');
531 put_utf16(fp, '\\');
532 break;
533 case '\"' :
534 put_utf16(fp, '\\');
535 put_utf16(fp, '\"');
536 break;
537 default :
538 put_utf16(fp, ch);
539 break;
540 }
541
542 put_utf16(fp, '\"');
543 put_utf16(fp, ';');
544 put_utf16(fp, '\n');
545 }
546 else
547 {
548 cupsFilePuts(fp, "msgid \"");
549 for (ptr = m->id->value; *ptr; ptr ++)
550 switch (*ptr)
551 {
552 case '\n' :
553 cupsFilePuts(fp, "\\n");
554 break;
555 case '\\' :
556 cupsFilePuts(fp, "\\\\");
557 break;
558 case '\"' :
559 cupsFilePuts(fp, "\\\"");
560 break;
561 default :
562 cupsFilePutChar(fp, *ptr);
563 break;
564 }
565 cupsFilePuts(fp, "\"\n");
566
567 cupsFilePuts(fp, "msgstr \"");
568 for (ptr = m->string->value; *ptr; ptr ++)
569 switch (*ptr)
570 {
571 case '\n' :
572 cupsFilePuts(fp, "\\n");
573 break;
574 case '\\' :
575 cupsFilePuts(fp, "\\\\");
576 break;
577 case '\"' :
578 cupsFilePuts(fp, "\\\"");
579 break;
580 default :
581 cupsFilePutChar(fp, *ptr);
582 break;
583 }
584 cupsFilePuts(fp, "\"\n");
585
586 cupsFilePutChar(fp, '\n');
587 }
588 }
589
590 cupsFileClose(fp);
591
592 return (0);
593 }
594
595
596 //
597 // 'get_utf8()' - Get a UTF-8 character.
598 //
599
600 static int // O - Unicode character or 0 on EOF
601 get_utf8(char *&ptr) // IO - Pointer to character
602 {
603 int ch; // Current character
604
605
606 if ((ch = *ptr++ & 255) < 0xc0)
607 return (ch);
608
609 if ((ch & 0xe0) == 0xc0)
610 {
611 // Two-byte UTF-8...
612 if ((*ptr & 0xc0) != 0x80)
613 return (0);
614
615 ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f);
616 }
617 else if ((ch & 0xf0) == 0xe0)
618 {
619 // Three-byte UTF-8...
620 if ((*ptr & 0xc0) != 0x80)
621 return (0);
622
623 ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f);
624
625 if ((*ptr & 0xc0) != 0x80)
626 return (0);
627
628 ch = (ch << 6) | (*ptr++ & 0x3f);
629 }
630 else if ((ch & 0xf8) == 0xf0)
631 {
632 // Four-byte UTF-8...
633 if ((*ptr & 0xc0) != 0x80)
634 return (0);
635
636 ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f);
637
638 if ((*ptr & 0xc0) != 0x80)
639 return (0);
640
641 ch = (ch << 6) | (*ptr++ & 0x3f);
642
643 if ((*ptr & 0xc0) != 0x80)
644 return (0);
645
646 ch = (ch << 6) | (*ptr++ & 0x3f);
647 }
648
649 return (ch);
650 }
651
652
653 //
654 // 'get_utf16()' - Get a UTF-16 character...
655 //
656
657 static int // O - Unicode character or 0 on EOF
658 get_utf16(cups_file_t *fp, // I - File to read from
659 ppdc_cs_t &cs) // IO - Character set of file
660 {
661 int ch; // Current character
662 unsigned char buffer[3]; // Bytes
663
664
665 if (cs == PPDC_CS_AUTO)
666 {
667 // Get byte-order-mark, if present...
668 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
669 return (0);
670
671 if (buffer[0] == 0xfe && buffer[1] == 0xff)
672 {
673 // Big-endian UTF-16...
674 cs = PPDC_CS_UTF16BE;
675
676 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
677 return (0);
678 }
679 else if (buffer[0] == 0xff && buffer[1] == 0xfe)
680 {
681 // Little-endian UTF-16...
682 cs = PPDC_CS_UTF16LE;
683
684 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
685 return (0);
686 }
687 else if (buffer[0] == 0x00 && buffer[1] != 0x00)
688 {
689 // No BOM, assume big-endian UTF-16...
690 cs = PPDC_CS_UTF16BE;
691 }
692 else if (buffer[0] != 0x00 && buffer[1] == 0x00)
693 {
694 // No BOM, assume little-endian UTF-16...
695 cs = PPDC_CS_UTF16LE;
696 }
697 else
698 {
699 // No BOM, assume UTF-8...
700 cs = PPDC_CS_UTF8;
701
702 cupsFileRewind(fp);
703 }
704 }
705 else if (cs != PPDC_CS_UTF8)
706 {
707 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
708 return (0);
709 }
710
711 if (cs == PPDC_CS_UTF8)
712 {
713 // UTF-8 character...
714 if ((ch = cupsFileGetChar(fp)) < 0)
715 return (0);
716
717 if ((ch & 0xe0) == 0xc0)
718 {
719 // Two-byte UTF-8...
720 if (cupsFileRead(fp, (char *)buffer, 1) != 1)
721 return (0);
722
723 if ((buffer[0] & 0xc0) != 0x80)
724 return (0);
725
726 ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f);
727 }
728 else if ((ch & 0xf0) == 0xe0)
729 {
730 // Three-byte UTF-8...
731 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
732 return (0);
733
734 if ((buffer[0] & 0xc0) != 0x80 ||
735 (buffer[1] & 0xc0) != 0x80)
736 return (0);
737
738 ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) |
739 (buffer[1] & 0x3f);
740 }
741 else if ((ch & 0xf8) == 0xf0)
742 {
743 // Four-byte UTF-8...
744 if (cupsFileRead(fp, (char *)buffer, 3) != 3)
745 return (0);
746
747 if ((buffer[0] & 0xc0) != 0x80 ||
748 (buffer[1] & 0xc0) != 0x80 ||
749 (buffer[2] & 0xc0) != 0x80)
750 return (0);
751
752 ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) |
753 (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f);
754 }
755 }
756 else
757 {
758 // UTF-16 character...
759 if (cs == PPDC_CS_UTF16BE)
760 ch = (buffer[0] << 8) | buffer[1];
761 else
762 ch = (buffer[1] << 8) | buffer[0];
763
764 if (ch >= 0xd800 && ch <= 0xdbff)
765 {
766 // Handle multi-word encoding...
767 int lch;
768
769 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
770 return (0);
771
772 if (cs == PPDC_CS_UTF16BE)
773 lch = (buffer[0] << 8) | buffer[1];
774 else
775 lch = (buffer[1] << 8) | buffer[0];
776
777 if (lch < 0xdc00 || lch >= 0xdfff)
778 return (0);
779
780 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
781 }
782 }
783
784 return (ch);
785 }
786
787
788 //
789 // 'put_utf8()' - Add a UTF-8 character to a string.
790 //
791
792 static int // O - 0 on success, -1 on failure
793 put_utf8(int ch, // I - Unicode character
794 char *&ptr, // IO - String pointer
795 char *end) // I - End of buffer
796 {
797 if (ch < 0x80)
798 {
799 // One-byte ASCII...
800 if (ptr >= end)
801 return (-1);
802
803 *ptr++ = ch;
804 }
805 else if (ch < 0x800)
806 {
807 // Two-byte UTF-8...
808 if ((ptr + 1) >= end)
809 return (-1);
810
811 *ptr++ = 0xc0 | (ch >> 6);
812 *ptr++ = 0x80 | (ch & 0x3f);
813 }
814 else if (ch < 0x10000)
815 {
816 // Three-byte UTF-8...
817 if ((ptr + 2) >= end)
818 return (-1);
819
820 *ptr++ = 0xe0 | (ch >> 12);
821 *ptr++ = 0x80 | ((ch >> 6) & 0x3f);
822 *ptr++ = 0x80 | (ch & 0x3f);
823 }
824 else
825 {
826 // Four-byte UTF-8...
827 if ((ptr + 3) >= end)
828 return (-1);
829
830 *ptr++ = 0xf0 | (ch >> 18);
831 *ptr++ = 0x80 | ((ch >> 12) & 0x3f);
832 *ptr++ = 0x80 | ((ch >> 6) & 0x3f);
833 *ptr++ = 0x80 | (ch & 0x3f);
834 }
835
836 return (0);
837 }
838
839
840 //
841 // 'put_utf16()' - Write a UTF-16 character to a file.
842 //
843
844 static int // O - 0 on success, -1 on failure
845 put_utf16(cups_file_t *fp, // I - File to write to
846 int ch) // I - Unicode character
847 {
848 unsigned char buffer[4]; // Output buffer
849
850
851 if (ch < 0x10000)
852 {
853 // One-word UTF-16 big-endian...
854 buffer[0] = ch >> 8;
855 buffer[1] = ch;
856
857 if (cupsFileWrite(fp, (char *)buffer, 2) == 2)
858 return (0);
859 }
860 else
861 {
862 // Two-word UTF-16 big-endian...
863 ch -= 0x10000;
864
865 buffer[0] = 0xd8 | (ch >> 18);
866 buffer[1] = ch >> 10;
867 buffer[2] = 0xdc | ((ch >> 8) & 0x03);
868 buffer[3] = ch;
869
870 if (cupsFileWrite(fp, (char *)buffer, 4) == 4)
871 return (0);
872 }
873
874 return (-1);
875 }
876
877
878 //
879 // End of "$Id$".
880 //