]> git.ipfire.org Git - thirdparty/cups.git/blob - ppdc/ppdc-catalog.cxx
Merge changes from CUPS 1.4svn-r8469.
[thirdparty/cups.git] / ppdc / ppdc-catalog.cxx
1 //
2 // "$Id$"
3 //
4 // Shared message catalog class for the CUPS PPD Compiler.
5 //
6 // Copyright 2007-2009 by Apple Inc.
7 // Copyright 2002-2006 by Easy Software Products.
8 //
9 // These coded instructions, statements, and computer programs are the
10 // property of Apple Inc. and are protected by Federal copyright
11 // law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 // which should have been included with this file. If this file is
13 // file is missing or damaged, see the license at "http://www.cups.org/".
14 //
15 // Contents:
16 //
17 // ppdcCatalog::ppdcCatalog() - Create a shared message catalog.
18 // ppdcCatalog::~ppdcCatalog() - Destroy a shared message catalog.
19 // ppdcCatalog::add_message() - Add a new message.
20 // ppdcCatalog::find_message() - Find a message in a catalog...
21 // ppdcCatalog::load_messages() - Load messages from a .po file.
22 // ppdcCatalog::save_messages() - Save the messages to a .po file.
23 // get_utf8() - Get a UTF-8 character.
24 // get_utf16() - Get a UTF-16 character...
25 // put_utf8() - Add a UTF-8 character to a string.
26 // put_utf16() - Write a UTF-16 character to a file.
27 //
28
29 //
30 // Include necessary headers...
31 //
32
33 #include "ppdc.h"
34 #include <cups/globals.h>
35
36
37 //
38 // Character encodings...
39 //
40
41 typedef enum
42 {
43 PPDC_CS_AUTO,
44 PPDC_CS_UTF8,
45 PPDC_CS_UTF16BE,
46 PPDC_CS_UTF16LE
47 } ppdc_cs_t;
48
49
50 //
51 // Local functions...
52 //
53
54 static int get_utf8(char *&ptr);
55 static int get_utf16(cups_file_t *fp, ppdc_cs_t &cs);
56 static int put_utf8(int ch, char *&ptr, char *end);
57 static int put_utf16(cups_file_t *fp, int ch);
58
59
60 //
61 // 'ppdcCatalog::ppdcCatalog()' - Create a shared message catalog.
62 //
63
64 ppdcCatalog::ppdcCatalog(const char *l, // I - Locale
65 const char *f) // I - Message catalog file
66 : ppdcShared()
67 {
68 _cups_globals_t *cg = _cupsGlobals();
69 // Global information
70
71
72 PPDC_NEW;
73
74 locale = new ppdcString(l);
75 filename = new ppdcString(f);
76 messages = new ppdcArray();
77
78 if (l)
79 {
80 // Try loading the base messages for this locale...
81 char pofile[1024]; // Message catalog file
82
83
84 snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir, l, l);
85
86 if (load_messages(pofile) && strchr(l, '_'))
87 {
88 // Try the base locale...
89 char baseloc[3]; // Base locale...
90
91
92 strlcpy(baseloc, l, sizeof(baseloc));
93 snprintf(pofile, sizeof(pofile), "%s/%s/cups_%s.po", cg->localedir,
94 baseloc, baseloc);
95
96 load_messages(pofile);
97 }
98 }
99
100 if (f)
101 load_messages(f);
102 }
103
104
105 //
106 // 'ppdcCatalog::~ppdcCatalog()' - Destroy a shared message catalog.
107 //
108
109 ppdcCatalog::~ppdcCatalog()
110 {
111 PPDC_DELETE;
112
113 locale->release();
114 filename->release();
115 messages->release();
116 }
117
118
119 //
120 // 'ppdcCatalog::add_message()' - Add a new message.
121 //
122
123 void
124 ppdcCatalog::add_message(
125 const char *id, // I - Message ID to add
126 const char *string) // I - Translation string
127 {
128 ppdcMessage *m; // Current message
129 char text[1024]; // Text to translate
130
131
132 // Range check input...
133 if (!id)
134 return;
135
136 // Verify that we don't already have the message ID...
137 for (m = (ppdcMessage *)messages->first();
138 m;
139 m = (ppdcMessage *)messages->next())
140 if (!strcmp(m->id->value, id))
141 {
142 if (string)
143 {
144 m->string->release();
145 m->string = new ppdcString(string);
146 }
147 return;
148 }
149
150 // Add the message...
151 if (!string)
152 {
153 snprintf(text, sizeof(text), "TRANSLATE %s", id);
154 string = text;
155 }
156
157 messages->add(new ppdcMessage(id, string));
158 }
159
160
161 //
162 // 'ppdcCatalog::find_message()' - Find a message in a catalog...
163 //
164
165 const char * // O - Message text
166 ppdcCatalog::find_message(
167 const char *id) // I - Message ID
168 {
169 ppdcMessage *m; // Current message
170
171
172 for (m = (ppdcMessage *)messages->first();
173 m;
174 m = (ppdcMessage *)messages->next())
175 if (!strcmp(m->id->value, id))
176 return (m->string->value);
177
178 return (id);
179 }
180
181
182 //
183 // 'ppdcCatalog::load_messages()' - Load messages from a .po file.
184 //
185
186 int // O - 0 on success, -1 on failure
187 ppdcCatalog::load_messages(
188 const char *f) // I - Message catalog file
189 {
190 cups_file_t *fp; // Message file
191 char line[4096], // Line buffer
192 *ptr, // Pointer into buffer
193 id[4096], // Translation ID
194 str[4096]; // Translation string
195 int linenum; // Line number
196
197
198 // Open the message catalog file...
199 if ((fp = cupsFileOpen(f, "r")) == NULL)
200 return (-1);
201
202 if ((ptr = (char *)strrchr(f, '.')) == NULL)
203 goto unknown_load_format;
204 else if (!strcmp(ptr, ".strings"))
205 {
206 /*
207 * Read messages in Mac OS X ".strings" format, which are UTF-16 text
208 * files of the format:
209 *
210 * "id" = "str";
211 *
212 * Strings files can also contain C-style comments.
213 */
214
215 ppdc_cs_t cs = PPDC_CS_AUTO; // Character set for file
216 int ch; // Current character from file
217 char *end; // End of buffer
218
219
220 id[0] = '\0';
221 str[0] = '\0';
222 ptr = NULL;
223 end = NULL;
224
225 while ((ch = get_utf16(fp, cs)) != 0)
226 {
227 if (ptr)
228 {
229 if (ch == '\\')
230 {
231 if ((ch = get_utf16(fp, cs)) == 0)
232 break;
233
234 if (ch == 'n')
235 ch = '\n';
236 else if (ch == 't')
237 ch = '\t';
238 }
239 else if (ch == '\"')
240 {
241 *ptr = '\0';
242 ptr = NULL;
243 }
244
245 if (ptr)
246 put_utf8(ch, ptr, end);
247 }
248 else if (ch == '/')
249 {
250 // Start of a comment?
251 if ((ch = get_utf16(fp, cs)) == 0)
252 break;
253
254 if (ch == '*')
255 {
256 // Skip C comment...
257 int lastch = 0;
258
259 while ((ch = get_utf16(fp, cs)) != 0)
260 {
261 if (ch == '/' && lastch == '*')
262 break;
263
264 lastch = ch;
265 }
266 }
267 else if (ch == '/')
268 {
269 // Skip C++ comment...
270 while ((ch = get_utf16(fp, cs)) != 0)
271 if (ch == '\n')
272 break;
273 }
274 }
275 else if (ch == '\"')
276 {
277 // Start quoted string...
278 if (id[0])
279 {
280 ptr = str;
281 end = str + sizeof(str) - 1;
282 }
283 else
284 {
285 ptr = id;
286 end = id + sizeof(id) - 1;
287 }
288 }
289 else if (ch == ';')
290 {
291 // Add string...
292 add_message(id, str);
293 id[0] = '\0';
294 }
295 }
296 }
297 else if (!strcmp(ptr, ".po") || !strcmp(ptr, ".gz"))
298 {
299 /*
300 * Read messages from the catalog file until EOF...
301 *
302 * The format is the GNU gettext .po format, which is fairly simple:
303 *
304 * msgid "some text"
305 * msgstr "localized text"
306 *
307 * The ID and localized text can span multiple lines using the form:
308 *
309 * msgid ""
310 * "some long text"
311 * msgstr ""
312 * "localized text spanning "
313 * "multiple lines"
314 */
315
316 int which, // In msgid?
317 haveid, // Did we get a msgid string?
318 havestr; // Did we get a msgstr string?
319
320 linenum = 0;
321 id[0] = '\0';
322 str[0] = '\0';
323 haveid = 0;
324 havestr = 0;
325 which = 0;
326
327 while (cupsFileGets(fp, line, sizeof(line)))
328 {
329 linenum ++;
330
331 // Skip blank and comment lines...
332 if (line[0] == '#' || !line[0])
333 continue;
334
335 // Strip the trailing quote...
336 if ((ptr = (char *)strrchr(line, '\"')) == NULL)
337 {
338 _cupsLangPrintf(stderr,
339 _("ERROR: Expected quoted string on line %d of %s!\n"),
340 linenum, f);
341 cupsFileClose(fp);
342 return (-1);
343 }
344
345 *ptr = '\0';
346
347 // Find start of value...
348 if ((ptr = strchr(line, '\"')) == NULL)
349 {
350 _cupsLangPrintf(stderr,
351 _("ERROR: Expected quoted string on line %d of %s!\n"),
352 linenum, f);
353 cupsFileClose(fp);
354 return (-1);
355 }
356
357 ptr ++;
358
359 // Unquote the text...
360 char *sptr, *dptr; // Source/destination pointers
361
362 for (sptr = ptr, dptr = ptr; *sptr;)
363 {
364 if (*sptr == '\\')
365 {
366 sptr ++;
367 if (isdigit(*sptr))
368 {
369 *dptr = 0;
370
371 while (isdigit(*sptr))
372 {
373 *dptr = *dptr * 8 + *sptr - '0';
374 sptr ++;
375 }
376
377 dptr ++;
378 }
379 else
380 {
381 if (*sptr == 'n')
382 *dptr++ = '\n';
383 else if (*sptr == 'r')
384 *dptr++ = '\r';
385 else if (*sptr == 't')
386 *dptr++ = '\t';
387 else
388 *dptr++ = *sptr;
389
390 sptr ++;
391 }
392 }
393 else
394 *dptr++ = *sptr++;
395 }
396
397 *dptr = '\0';
398
399 // Create or add to a message...
400 if (!strncmp(line, "msgid", 5))
401 {
402 if (haveid && havestr)
403 add_message(id, str);
404
405 strlcpy(id, ptr, sizeof(id));
406 str[0] = '\0';
407 haveid = 1;
408 havestr = 0;
409 which = 1;
410 }
411 else if (!strncmp(line, "msgstr", 6))
412 {
413 if (!haveid)
414 {
415 _cupsLangPrintf(stderr,
416 _("ERROR: Need a msgid line before any "
417 "translation strings on line %d of %s!\n"),
418 linenum, f);
419 cupsFileClose(fp);
420 return (-1);
421 }
422
423 strlcpy(str, ptr, sizeof(str));
424 havestr = 1;
425 which = 2;
426 }
427 else if (line[0] == '\"' && which == 2)
428 strlcat(str, ptr, sizeof(str));
429 else if (line[0] == '\"' && which == 1)
430 strlcat(id, ptr, sizeof(id));
431 else
432 {
433 _cupsLangPrintf(stderr, _("ERROR: Unexpected text on line %d of %s!\n"),
434 linenum, f);
435 cupsFileClose(fp);
436 return (-1);
437 }
438 }
439
440 if (haveid && havestr)
441 add_message(id, str);
442 }
443 else
444 goto unknown_load_format;
445
446 /*
447 * Close the file and return...
448 */
449
450 cupsFileClose(fp);
451
452 return (0);
453
454 /*
455 * Unknown format error...
456 */
457
458 unknown_load_format:
459
460 _cupsLangPrintf(stderr,
461 _("ERROR: Unknown message catalog format for \"%s\"!\n"), f);
462 cupsFileClose(fp);
463 return (-1);
464 }
465
466
467 //
468 // 'ppdcCatalog::save_messages()' - Save the messages to a .po file.
469 //
470
471 int // O - 0 on success, -1 on error
472 ppdcCatalog::save_messages(
473 const char *f) // I - File to save to
474 {
475 cups_file_t *fp; // Message file
476 ppdcMessage *m; // Current message
477 char *ptr; // Pointer into string
478 int utf16; // Output UTF-16 .strings file?
479 int ch; // Current character
480
481
482 // Open the file...
483 if ((ptr = (char *)strrchr(f, '.')) == NULL)
484 return (-1);
485
486 if (!strcmp(ptr, ".gz"))
487 fp = cupsFileOpen(f, "w9");
488 else
489 fp = cupsFileOpen(f, "w");
490
491 if (!fp)
492 return (-1);
493
494 // For .strings files, write a BOM for big-endian output...
495 utf16 = !strcmp(ptr, ".strings");
496
497 if (utf16)
498 put_utf16(fp, 0xfeff);
499
500 // Loop through all of the messages...
501 for (m = (ppdcMessage *)messages->first();
502 m;
503 m = (ppdcMessage *)messages->next())
504 {
505 if (utf16)
506 {
507 put_utf16(fp, '\"');
508
509 ptr = m->id->value;
510 while ((ch = get_utf8(ptr)) != 0)
511 switch (ch)
512 {
513 case '\n' :
514 put_utf16(fp, '\\');
515 put_utf16(fp, 'n');
516 break;
517 case '\\' :
518 put_utf16(fp, '\\');
519 put_utf16(fp, '\\');
520 break;
521 case '\"' :
522 put_utf16(fp, '\\');
523 put_utf16(fp, '\"');
524 break;
525 default :
526 put_utf16(fp, ch);
527 break;
528 }
529
530 put_utf16(fp, '\"');
531 put_utf16(fp, ' ');
532 put_utf16(fp, '=');
533 put_utf16(fp, ' ');
534 put_utf16(fp, '\"');
535
536 ptr = m->string->value;
537 while ((ch = get_utf8(ptr)) != 0)
538 switch (ch)
539 {
540 case '\n' :
541 put_utf16(fp, '\\');
542 put_utf16(fp, 'n');
543 break;
544 case '\\' :
545 put_utf16(fp, '\\');
546 put_utf16(fp, '\\');
547 break;
548 case '\"' :
549 put_utf16(fp, '\\');
550 put_utf16(fp, '\"');
551 break;
552 default :
553 put_utf16(fp, ch);
554 break;
555 }
556
557 put_utf16(fp, '\"');
558 put_utf16(fp, ';');
559 put_utf16(fp, '\n');
560 }
561 else
562 {
563 cupsFilePuts(fp, "msgid \"");
564 for (ptr = m->id->value; *ptr; ptr ++)
565 switch (*ptr)
566 {
567 case '\n' :
568 cupsFilePuts(fp, "\\n");
569 break;
570 case '\\' :
571 cupsFilePuts(fp, "\\\\");
572 break;
573 case '\"' :
574 cupsFilePuts(fp, "\\\"");
575 break;
576 default :
577 cupsFilePutChar(fp, *ptr);
578 break;
579 }
580 cupsFilePuts(fp, "\"\n");
581
582 cupsFilePuts(fp, "msgstr \"");
583 for (ptr = m->string->value; *ptr; ptr ++)
584 switch (*ptr)
585 {
586 case '\n' :
587 cupsFilePuts(fp, "\\n");
588 break;
589 case '\\' :
590 cupsFilePuts(fp, "\\\\");
591 break;
592 case '\"' :
593 cupsFilePuts(fp, "\\\"");
594 break;
595 default :
596 cupsFilePutChar(fp, *ptr);
597 break;
598 }
599 cupsFilePuts(fp, "\"\n");
600
601 cupsFilePutChar(fp, '\n');
602 }
603 }
604
605 cupsFileClose(fp);
606
607 return (0);
608 }
609
610
611 //
612 // 'get_utf8()' - Get a UTF-8 character.
613 //
614
615 static int // O - Unicode character or 0 on EOF
616 get_utf8(char *&ptr) // IO - Pointer to character
617 {
618 int ch; // Current character
619
620
621 if ((ch = *ptr++ & 255) < 0xc0)
622 return (ch);
623
624 if ((ch & 0xe0) == 0xc0)
625 {
626 // Two-byte UTF-8...
627 if ((*ptr & 0xc0) != 0x80)
628 return (0);
629
630 ch = ((ch & 0x1f) << 6) | (*ptr++ & 0x3f);
631 }
632 else if ((ch & 0xf0) == 0xe0)
633 {
634 // Three-byte UTF-8...
635 if ((*ptr & 0xc0) != 0x80)
636 return (0);
637
638 ch = ((ch & 0x0f) << 6) | (*ptr++ & 0x3f);
639
640 if ((*ptr & 0xc0) != 0x80)
641 return (0);
642
643 ch = (ch << 6) | (*ptr++ & 0x3f);
644 }
645 else if ((ch & 0xf8) == 0xf0)
646 {
647 // Four-byte UTF-8...
648 if ((*ptr & 0xc0) != 0x80)
649 return (0);
650
651 ch = ((ch & 0x07) << 6) | (*ptr++ & 0x3f);
652
653 if ((*ptr & 0xc0) != 0x80)
654 return (0);
655
656 ch = (ch << 6) | (*ptr++ & 0x3f);
657
658 if ((*ptr & 0xc0) != 0x80)
659 return (0);
660
661 ch = (ch << 6) | (*ptr++ & 0x3f);
662 }
663
664 return (ch);
665 }
666
667
668 //
669 // 'get_utf16()' - Get a UTF-16 character...
670 //
671
672 static int // O - Unicode character or 0 on EOF
673 get_utf16(cups_file_t *fp, // I - File to read from
674 ppdc_cs_t &cs) // IO - Character set of file
675 {
676 int ch; // Current character
677 unsigned char buffer[3]; // Bytes
678
679
680 if (cs == PPDC_CS_AUTO)
681 {
682 // Get byte-order-mark, if present...
683 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
684 return (0);
685
686 if (buffer[0] == 0xfe && buffer[1] == 0xff)
687 {
688 // Big-endian UTF-16...
689 cs = PPDC_CS_UTF16BE;
690
691 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
692 return (0);
693 }
694 else if (buffer[0] == 0xff && buffer[1] == 0xfe)
695 {
696 // Little-endian UTF-16...
697 cs = PPDC_CS_UTF16LE;
698
699 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
700 return (0);
701 }
702 else if (buffer[0] == 0x00 && buffer[1] != 0x00)
703 {
704 // No BOM, assume big-endian UTF-16...
705 cs = PPDC_CS_UTF16BE;
706 }
707 else if (buffer[0] != 0x00 && buffer[1] == 0x00)
708 {
709 // No BOM, assume little-endian UTF-16...
710 cs = PPDC_CS_UTF16LE;
711 }
712 else
713 {
714 // No BOM, assume UTF-8...
715 cs = PPDC_CS_UTF8;
716
717 cupsFileRewind(fp);
718 }
719 }
720 else if (cs != PPDC_CS_UTF8)
721 {
722 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
723 return (0);
724 }
725
726 if (cs == PPDC_CS_UTF8)
727 {
728 // UTF-8 character...
729 if ((ch = cupsFileGetChar(fp)) < 0)
730 return (0);
731
732 if ((ch & 0xe0) == 0xc0)
733 {
734 // Two-byte UTF-8...
735 if (cupsFileRead(fp, (char *)buffer, 1) != 1)
736 return (0);
737
738 if ((buffer[0] & 0xc0) != 0x80)
739 return (0);
740
741 ch = ((ch & 0x1f) << 6) | (buffer[0] & 0x3f);
742 }
743 else if ((ch & 0xf0) == 0xe0)
744 {
745 // Three-byte UTF-8...
746 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
747 return (0);
748
749 if ((buffer[0] & 0xc0) != 0x80 ||
750 (buffer[1] & 0xc0) != 0x80)
751 return (0);
752
753 ch = ((((ch & 0x0f) << 6) | (buffer[0] & 0x3f)) << 6) |
754 (buffer[1] & 0x3f);
755 }
756 else if ((ch & 0xf8) == 0xf0)
757 {
758 // Four-byte UTF-8...
759 if (cupsFileRead(fp, (char *)buffer, 3) != 3)
760 return (0);
761
762 if ((buffer[0] & 0xc0) != 0x80 ||
763 (buffer[1] & 0xc0) != 0x80 ||
764 (buffer[2] & 0xc0) != 0x80)
765 return (0);
766
767 ch = ((((((ch & 0x07) << 6) | (buffer[0] & 0x3f)) << 6) |
768 (buffer[1] & 0x3f)) << 6) | (buffer[2] & 0x3f);
769 }
770 }
771 else
772 {
773 // UTF-16 character...
774 if (cs == PPDC_CS_UTF16BE)
775 ch = (buffer[0] << 8) | buffer[1];
776 else
777 ch = (buffer[1] << 8) | buffer[0];
778
779 if (ch >= 0xd800 && ch <= 0xdbff)
780 {
781 // Handle multi-word encoding...
782 int lch;
783
784 if (cupsFileRead(fp, (char *)buffer, 2) != 2)
785 return (0);
786
787 if (cs == PPDC_CS_UTF16BE)
788 lch = (buffer[0] << 8) | buffer[1];
789 else
790 lch = (buffer[1] << 8) | buffer[0];
791
792 if (lch < 0xdc00 || lch >= 0xdfff)
793 return (0);
794
795 ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
796 }
797 }
798
799 return (ch);
800 }
801
802
803 //
804 // 'put_utf8()' - Add a UTF-8 character to a string.
805 //
806
807 static int // O - 0 on success, -1 on failure
808 put_utf8(int ch, // I - Unicode character
809 char *&ptr, // IO - String pointer
810 char *end) // I - End of buffer
811 {
812 if (ch < 0x80)
813 {
814 // One-byte ASCII...
815 if (ptr >= end)
816 return (-1);
817
818 *ptr++ = ch;
819 }
820 else if (ch < 0x800)
821 {
822 // Two-byte UTF-8...
823 if ((ptr + 1) >= end)
824 return (-1);
825
826 *ptr++ = 0xc0 | (ch >> 6);
827 *ptr++ = 0x80 | (ch & 0x3f);
828 }
829 else if (ch < 0x10000)
830 {
831 // Three-byte UTF-8...
832 if ((ptr + 2) >= end)
833 return (-1);
834
835 *ptr++ = 0xe0 | (ch >> 12);
836 *ptr++ = 0x80 | ((ch >> 6) & 0x3f);
837 *ptr++ = 0x80 | (ch & 0x3f);
838 }
839 else
840 {
841 // Four-byte UTF-8...
842 if ((ptr + 3) >= end)
843 return (-1);
844
845 *ptr++ = 0xf0 | (ch >> 18);
846 *ptr++ = 0x80 | ((ch >> 12) & 0x3f);
847 *ptr++ = 0x80 | ((ch >> 6) & 0x3f);
848 *ptr++ = 0x80 | (ch & 0x3f);
849 }
850
851 return (0);
852 }
853
854
855 //
856 // 'put_utf16()' - Write a UTF-16 character to a file.
857 //
858
859 static int // O - 0 on success, -1 on failure
860 put_utf16(cups_file_t *fp, // I - File to write to
861 int ch) // I - Unicode character
862 {
863 unsigned char buffer[4]; // Output buffer
864
865
866 if (ch < 0x10000)
867 {
868 // One-word UTF-16 big-endian...
869 buffer[0] = ch >> 8;
870 buffer[1] = ch;
871
872 if (cupsFileWrite(fp, (char *)buffer, 2) == 2)
873 return (0);
874 }
875 else
876 {
877 // Two-word UTF-16 big-endian...
878 ch -= 0x10000;
879
880 buffer[0] = 0xd8 | (ch >> 18);
881 buffer[1] = ch >> 10;
882 buffer[2] = 0xdc | ((ch >> 8) & 0x03);
883 buffer[3] = ch;
884
885 if (cupsFileWrite(fp, (char *)buffer, 4) == 4)
886 return (0);
887 }
888
889 return (-1);
890 }
891
892
893 //
894 // End of "$Id$".
895 //