From: Bruno Haible Date: Tue, 9 Sep 2003 10:54:47 +0000 (+0000) Subject: Fixes from Guido Flohr. X-Git-Tag: v0.13~267 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a1bbb95481bc8c216cb27310351451408e962c70;p=thirdparty%2Fgettext.git Fixes from Guido Flohr. --- diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog index 1ba3adf7b..47a1d5fac 100644 --- a/gettext-tools/doc/ChangeLog +++ b/gettext-tools/doc/ChangeLog @@ -1,3 +1,7 @@ +2003-09-09 Guido Flohr + + * gettext.texi (Perl, Interpolation I): Tweaks. + 2003-08-15 Bruno Haible * ISO_639: Update. Add an, ht, ii, li. diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index fac344d47..59d2a8255 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -8383,14 +8383,14 @@ Use @code{setlocale (LC_ALL, "");} @item Prerequisite @code{use POSIX;} @*@code{use Locale::TextDomain;} (included in the package libintl-perl -which is available on the Comprehensive Perl Authors Network CPAN, +which is available on the Comprehensive Perl Archive Network CPAN, http://www.cpan.org/). @item Use or emulate GNU gettext emulate @item Extractor -@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n -k__nx -k__xn -kN__ -k} +@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n:1,2 -k__nx:1,2 -k__xn:1,2 -kN__ -k} @item Formatting with positions Both kinds of format strings support formatting with positions. @@ -8770,7 +8770,7 @@ UTF-8 representation, regardless of the presence of the @code{use utf8} pragma. @item hex chars, like @code{\x1b} -@item wide hex chars, like @code{\x263a} +@item wide hex chars, like @code{\x@{263a@}} @* Note that this escape is translated into a UTF-8 representation, regardless of the presence of the @code{use utf8} pragma. @@ -8803,8 +8803,9 @@ The following escapes are considered partially safe: These escapes are only considered safe if the string consists of ASCII characters only. Translation of characters outside the range -defined by ASCII is locale-dependent and can only be performed at -runtime. +defined by ASCII is locale-dependent and can actually only be performed +at runtime; @code{xgettext} doesn't do these locale-dependent translations +at extraction time. Except for the modifier @code{\Q}, these translations, albeit valid, are generally useless and only obfuscate your sources. If a diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index f43accc16..a4045d81b 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,13 @@ +2003-09-09 Guido Flohr + + * x-perl.c (extract_quotelike_pass3): Change \l handling to no longer + eat up non-uppercase characters; likewise for \u. Also make \l and \u + work on characters written using hex or octal escape sequence. Fix \Q + handling. With option --extract-all, avoid "invalid variable + interpolation" warnings. + (x_perl_prelex): With option --extract-all, keep variables inside + double-quoted strings untouched. + 2003-09-03 Bruno Haible * x-sh.h: New file. diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c index 57f22b65e..ca2e807c5 100644 --- a/gettext-tools/src/x-perl.c +++ b/gettext-tools/src/x-perl.c @@ -439,8 +439,6 @@ is_whitespace (int c) /* ========================== Reading of tokens. ========================== */ -/* FIXME: All known Perl operators should be listed here. It does not - cost that much and it may improve the stability of the parser. */ enum token_type_ty { token_type_eof, @@ -856,8 +854,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) { bool backslashed; - /* Ensure room for 6 bytes. */ - if (bufpos + 6 > bufmax) + /* Ensure room for 7 bytes, 6 (multi-)bytes plus a leading backslash + if \Q modifier is present. */ + if (bufpos + 7 > bufmax) { bufmax = 2 * bufmax + 10; buffer = xrealloc (buffer, bufmax); @@ -923,6 +922,22 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) int length; crs = extract_oct (crs + 1, 3, &oct_number); + + /* FIXME: If one of the variables UPPERCASE or LOWERCASE is + true, the character should be converted to its uppercase + resp. lowercase equivalent. I don't know if the necessary + facilities are already included in gettext. For US-Ascii + the conversion can be already be done, however. */ + if (uppercase && oct_number >= 'a' && oct_number <= 'z') + { + oct_number = oct_number - 'a' + 'A'; + } + else if (lowercase && oct_number >= 'A' && oct_number <= 'Z') + { + oct_number = oct_number - 'A' + 'a'; + } + + /* Yes, octal escape sequences in the range 0x100..0x1ff are valid. */ length = u8_uctomb ((unsigned char *) (buffer + bufpos), @@ -961,8 +976,23 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) crs = extract_hex (crs, 2, &hex_number); } + /* FIXME: If one of the variables UPPERCASE or LOWERCASE is + true, the character should be converted to its uppercase + resp. lowercase equivalent. I don't know if the necessary + facilities are already included in gettext. For US-Ascii + the conversion can be already be done, however. */ + if (uppercase && hex_number >= 'a' && hex_number <= 'z') + { + hex_number = hex_number - 'a' + 'A'; + } + else if (lowercase && hex_number >= 'A' && hex_number <= 'Z') + { + hex_number = hex_number - 'A' + 'a'; + } + length = u8_uctomb ((unsigned char *) (buffer + bufpos), hex_number, 6); + if (length > 0) bufpos += length; } @@ -995,6 +1025,8 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) unicode = unicode_name_character (name); if (unicode != UNINAME_INVALID) { + /* FIXME: Convert to upper/lowercase if the + corresponding flag is set to true. */ int length = u8_uctomb ((unsigned char *) (buffer + bufpos), unicode, 6); @@ -1026,18 +1058,14 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) case 'L': uppercase = false; lowercase = true; - quotemeta = false; ++crs; continue; case 'U': uppercase = true; lowercase = false; - quotemeta = false; ++crs; continue; case 'Q': - uppercase = false; - lowercase = false; quotemeta = true; ++crs; continue; @@ -1055,6 +1083,10 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) real_file_name, line_number, *crs); error_with_progname = true; } + else + { + buffer[bufpos++] = *crs; + } ++crs; continue; case 'u': @@ -1071,6 +1103,10 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) real_file_name, line_number, *crs); error_with_progname = true; } + else + { + buffer[bufpos++] = *crs; + } ++crs; continue; case '\\': @@ -1085,7 +1121,15 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) else backslashed = false; - if (!backslashed && (*crs == '$' || *crs == '@')) + if (quotemeta + && !((*crs >= 'A' && *crs <= 'Z') || (*crs >= 'A' && *crs <= 'z') + || (*crs >= '0' && *crs <= '9') || *crs == '_')) + { + buffer[bufpos++] = '\\'; + backslashed = true; + } + + if (!backslashed && !extract_all && (*crs == '$' || *crs == '@')) { error_with_progname = false; error (error_level, 0, _("\ @@ -1128,10 +1172,6 @@ extract_quotelike_pass3 (token_ty *tp, int error_level) buffer[bufpos++] = *crs; ++crs; } - else if (quotemeta) - { - buffer[bufpos++] = *crs++; - } else { buffer[bufpos++] = *crs++; @@ -1833,9 +1873,13 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp) case '@': case '*': case '$': - extract_variable (mlp, tp, c); - prefer_division_over_regexp = true; - return; + if (!extract_all) + { + extract_variable (mlp, tp, c); + prefer_division_over_regexp = true; + return; + } + break; } last_non_comment_line = tp->line_number; diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index 81da08cfa..2c95467a8 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,8 @@ +2003-09-09 Guido Flohr + + * xgettext-29: Add more tests. + * lang-perl-1: Use gettext instead of _. + 2003-09-08 Bruno Haible * lang-sh: Avoid test failure with Solaris /bin/sh. diff --git a/gettext-tools/tests/lang-perl-1 b/gettext-tools/tests/lang-perl-1 index 12cb94a3a..36c93f8dc 100755 --- a/gettext-tools/tests/lang-perl-1 +++ b/gettext-tools/tests/lang-perl-1 @@ -10,21 +10,23 @@ trap 'rm -fr $tmpfiles' 1 2 3 15 tmpfiles="$tmpfiles program.pl" cat <<\EOF > program.pl -use Locale::Messages qw (textdomain bindtextdomain ngettext); +use strict; + +use Locale::Messages qw (textdomain bindtextdomain gettext ngettext); textdomain "prog"; bindtextdomain "prog", "./"; my $n = 2; -print _"'Your command, please?', asked the waiter."; +print gettext "'Your command, please?', asked the waiter."; print "\n"; printf ngettext ("a piece of cake", "%d pieces of cake", $n), $n; print "\n"; -printf _"%s is replaced by %s.", "FF", "EUR"; +printf gettext "%s is replaced by %s.", "FF", "EUR"; print "\n"; EOF tmpfiles="$tmpfiles prog.pot" : ${XGETTEXT=xgettext} -${XGETTEXT} -k_ -o prog.pot --omit-header --no-location program.pl +${XGETTEXT} -k__ -o prog.pot --omit-header --no-location program.pl tmpfiles="$tmpfiles prog.ok" cat < prog.ok diff --git a/gettext-tools/tests/xgettext-29 b/gettext-tools/tests/xgettext-29 index 652307e27..f75c196b3 100755 --- a/gettext-tools/tests/xgettext-29 +++ b/gettext-tools/tests/xgettext-29 @@ -1,37 +1,106 @@ #! /bin/sh -# Test of Perl support with --extract-all and printf format strings. - +# Tests for the general string extraction facilities of the Perl backend +# (with option --extract-all). + tmpfiles="" trap 'rm -fr $tmpfiles' 1 2 3 15 tmpfiles="$tmpfiles xg-test29.pl" -cat <<\EOF > xg-test29.pl -my $n = 2; -print "'Your $command, please?', asked the waiter."; -print "\n"; -printf ("$$$$$$$$$$$$$var pieces of cake", $n); -print "\n"; -printf "@s is replaced by $s->[-1]."; -print "\n"; +cat <<\EOPERL > xg-test29.pl +use strict; + +# A double quoted string. +print "'Your command, please?', asked the waiter.\n"; +# A double quoted string with interpolations. +my $polite = 'please'; +print "'Your recommendation, $polite?', answered the guest.\n"; +# A reference. +my $ref1 = \$polite; +my $ref2 = \$ref1; +my $ref3 = \$ref2; +print "Yes, $$$$ref3!\n"; +# The qq operator and some of the more esoteric string interpolation +# features of Perl. +print (qq {\uU\lp \LaNd\E \ldo\lWn, \Uoh\E, yeah\Q!!!\E\\!\n}); +# The q operator. +print q. >; +# Should not be found. +{ $polite =~ qr?le? } + +# List interpolation. +print "Your Perl include path starts with '$INC[0]' and it " . + "ends with '$INC[-1]'. $#INC directories are searched.\n"; +# Here documents. +print <&1 | grep -v "invalid variable interpolation" +${XGETTEXT} -a --omit-header --no-location -o xg-test29.pot xg-test29.pl test $? = 0 || { rm -fr $tmpfiles; exit 1; } tmpfiles="$tmpfiles xg-test29.ok" cat <<\EOF > xg-test29.ok -msgid "'Your $command, please?', asked the waiter." +msgid "'Your command, please?', asked the waiter.\n" +msgstr "" + +msgid "please" +msgstr "" + +msgid "'Your recommendation, $polite?', answered the guest.\n" +msgstr "" + +msgid "Yes, $$$$ref3!\n" +msgstr "" + +msgid "Up and down, OH, yeah\\!\\!\\!\\!\n" +msgstr "" + +msgid "E-Mail: . " +msgstr "" + +msgid "" +"Your Perl include path starts with '$INC[0]' and it ends with '$INC[-1]'. " +"$#INC directories are searched.\n" +msgstr "" + +msgid "" +"Line 1\n" +"Line 2\n" +msgstr "" + +msgid "Line 1\\nStill line 1\n" +msgstr "" + +msgid "Hello" +msgstr "" + +msgid "@{[hello_func]} world!\n" msgstr "" -msgid "$$$$$$$$$$$$$var pieces of cake" +msgid "ls $0" msgstr "" -msgid "@s is replaced by $s->[-1]." +msgid "Polina rules!\n" msgstr "" EOF