+2003-09-09 Guido Flohr <guido@imperia.net>
+
+ * gettext.texi (Perl, Interpolation I): Tweaks.
+
2003-08-15 Bruno Haible <bruno@clisp.org>
* ISO_639: Update. Add an, ht, ii, li.
@item Prerequisite
@code{use POSIX;}
@*@code{use Locale::TextDomain;} (included in the package libintl-perl
-which is available on the Comprehensive Perl Authors Network CPAN,
+which is available on the Comprehensive Perl Archive Network CPAN,
http://www.cpan.org/).
@item Use or emulate GNU gettext
emulate
@item Extractor
-@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n -k__nx -k__xn -kN__ -k}
+@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n:1,2 -k__nx:1,2 -k__xn:1,2 -kN__ -k}
@item Formatting with positions
Both kinds of format strings support formatting with positions.
@item hex chars, like @code{\x1b}
-@item wide hex chars, like @code{\x263a}
+@item wide hex chars, like @code{\x@{263a@}}
@*
Note that this escape is translated into a UTF-8 representation,
regardless of the presence of the @code{use utf8} pragma.
These escapes are only considered safe if the string consists of
ASCII characters only. Translation of characters outside the range
-defined by ASCII is locale-dependent and can only be performed at
-runtime.
+defined by ASCII is locale-dependent and can actually only be performed
+at runtime; @code{xgettext} doesn't do these locale-dependent translations
+at extraction time.
Except for the modifier @code{\Q}, these translations, albeit valid,
are generally useless and only obfuscate your sources. If a
+2003-09-09 Guido Flohr <guido@imperia.net>
+
+ * x-perl.c (extract_quotelike_pass3): Change \l handling to no longer
+ eat up non-uppercase characters; likewise for \u. Also make \l and \u
+ work on characters written using hex or octal escape sequence. Fix \Q
+ handling. With option --extract-all, avoid "invalid variable
+ interpolation" warnings.
+ (x_perl_prelex): With option --extract-all, keep variables inside
+ double-quoted strings untouched.
+
2003-09-03 Bruno Haible <bruno@clisp.org>
* x-sh.h: New file.
/* ========================== Reading of tokens. ========================== */
-/* FIXME: All known Perl operators should be listed here. It does not
- cost that much and it may improve the stability of the parser. */
enum token_type_ty
{
token_type_eof,
{
bool backslashed;
- /* Ensure room for 6 bytes. */
- if (bufpos + 6 > bufmax)
+ /* Ensure room for 7 bytes, 6 (multi-)bytes plus a leading backslash
+ if \Q modifier is present. */
+ if (bufpos + 7 > bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
int length;
crs = extract_oct (crs + 1, 3, &oct_number);
+
+ /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
+ true, the character should be converted to its uppercase
+ resp. lowercase equivalent. I don't know if the necessary
+ facilities are already included in gettext. For US-Ascii
+ the conversion can be already be done, however. */
+ if (uppercase && oct_number >= 'a' && oct_number <= 'z')
+ {
+ oct_number = oct_number - 'a' + 'A';
+ }
+ else if (lowercase && oct_number >= 'A' && oct_number <= 'Z')
+ {
+ oct_number = oct_number - 'A' + 'a';
+ }
+
+
/* Yes, octal escape sequences in the range 0x100..0x1ff are
valid. */
length = u8_uctomb ((unsigned char *) (buffer + bufpos),
crs = extract_hex (crs, 2, &hex_number);
}
+ /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
+ true, the character should be converted to its uppercase
+ resp. lowercase equivalent. I don't know if the necessary
+ facilities are already included in gettext. For US-Ascii
+ the conversion can be already be done, however. */
+ if (uppercase && hex_number >= 'a' && hex_number <= 'z')
+ {
+ hex_number = hex_number - 'a' + 'A';
+ }
+ else if (lowercase && hex_number >= 'A' && hex_number <= 'Z')
+ {
+ hex_number = hex_number - 'A' + 'a';
+ }
+
length = u8_uctomb ((unsigned char *) (buffer + bufpos),
hex_number, 6);
+
if (length > 0)
bufpos += length;
}
unicode = unicode_name_character (name);
if (unicode != UNINAME_INVALID)
{
+ /* FIXME: Convert to upper/lowercase if the
+ corresponding flag is set to true. */
int length =
u8_uctomb ((unsigned char *) (buffer + bufpos),
unicode, 6);
case 'L':
uppercase = false;
lowercase = true;
- quotemeta = false;
++crs;
continue;
case 'U':
uppercase = true;
lowercase = false;
- quotemeta = false;
++crs;
continue;
case 'Q':
- uppercase = false;
- lowercase = false;
quotemeta = true;
++crs;
continue;
real_file_name, line_number, *crs);
error_with_progname = true;
}
+ else
+ {
+ buffer[bufpos++] = *crs;
+ }
++crs;
continue;
case 'u':
real_file_name, line_number, *crs);
error_with_progname = true;
}
+ else
+ {
+ buffer[bufpos++] = *crs;
+ }
++crs;
continue;
case '\\':
else
backslashed = false;
- if (!backslashed && (*crs == '$' || *crs == '@'))
+ if (quotemeta
+ && !((*crs >= 'A' && *crs <= 'Z') || (*crs >= 'A' && *crs <= 'z')
+ || (*crs >= '0' && *crs <= '9') || *crs == '_'))
+ {
+ buffer[bufpos++] = '\\';
+ backslashed = true;
+ }
+
+ if (!backslashed && !extract_all && (*crs == '$' || *crs == '@'))
{
error_with_progname = false;
error (error_level, 0, _("\
buffer[bufpos++] = *crs;
++crs;
}
- else if (quotemeta)
- {
- buffer[bufpos++] = *crs++;
- }
else
{
buffer[bufpos++] = *crs++;
case '@':
case '*':
case '$':
- extract_variable (mlp, tp, c);
- prefer_division_over_regexp = true;
- return;
+ if (!extract_all)
+ {
+ extract_variable (mlp, tp, c);
+ prefer_division_over_regexp = true;
+ return;
+ }
+ break;
}
last_non_comment_line = tp->line_number;
+2003-09-09 Guido Flohr <guido@imperia.net>
+
+ * xgettext-29: Add more tests.
+ * lang-perl-1: Use gettext instead of _.
+
2003-09-08 Bruno Haible <bruno@clisp.org>
* lang-sh: Avoid test failure with Solaris /bin/sh.
tmpfiles="$tmpfiles program.pl"
cat <<\EOF > program.pl
-use Locale::Messages qw (textdomain bindtextdomain ngettext);
+use strict;
+
+use Locale::Messages qw (textdomain bindtextdomain gettext ngettext);
textdomain "prog";
bindtextdomain "prog", "./";
my $n = 2;
-print _"'Your command, please?', asked the waiter.";
+print gettext "'Your command, please?', asked the waiter.";
print "\n";
printf ngettext ("a piece of cake", "%d pieces of cake", $n), $n;
print "\n";
-printf _"%s is replaced by %s.", "FF", "EUR";
+printf gettext "%s is replaced by %s.", "FF", "EUR";
print "\n";
EOF
tmpfiles="$tmpfiles prog.pot"
: ${XGETTEXT=xgettext}
-${XGETTEXT} -k_ -o prog.pot --omit-header --no-location program.pl
+${XGETTEXT} -k__ -o prog.pot --omit-header --no-location program.pl
tmpfiles="$tmpfiles prog.ok"
cat <<EOF > prog.ok
#! /bin/sh
-# Test of Perl support with --extract-all and printf format strings.
-
+# Tests for the general string extraction facilities of the Perl backend
+# (with option --extract-all).
+
tmpfiles=""
trap 'rm -fr $tmpfiles' 1 2 3 15
tmpfiles="$tmpfiles xg-test29.pl"
-cat <<\EOF > xg-test29.pl
-my $n = 2;
-print "'Your $command, please?', asked the waiter.";
-print "\n";
-printf ("$$$$$$$$$$$$$var pieces of cake", $n);
-print "\n";
-printf "@s is replaced by $s->[-1].";
-print "\n";
+cat <<\EOPERL > xg-test29.pl
+use strict;
+
+# A double quoted string.
+print "'Your command, please?', asked the waiter.\n";
+# A double quoted string with interpolations.
+my $polite = 'please';
+print "'Your recommendation, $polite?', answered the guest.\n";
+# A reference.
+my $ref1 = \$polite;
+my $ref2 = \$ref1;
+my $ref3 = \$ref2;
+print "Yes, $$$$ref3!\n";
+# The qq operator and some of the more esoteric string interpolation
+# features of Perl.
+print (qq {\uU\lp \LaNd\E \ldo\lWn, \Uoh\E, yeah\Q!!!\E\\!\n});
+# The q operator.
+print q<E-Mail: <no@spam.org>. >;
+# Should not be found.
+{ $polite =~ qr?le? }
+
+# List interpolation.
+print "Your Perl include path starts with '$INC[0]' and it " .
+ "ends with '$INC[-1]'. $#INC directories are searched.\n";
+# Here documents.
+print <<EOF, <<'EOF';
+Line 1\nLine 2
+EOF
+Line 1\nStill line 1
EOF
+# Perl code inside strings.
+sub hello_func { return 'Hello' };
+print "@{[hello_func]} world!\n";
+# Backticks.
+print `ls $0`;
+print qx;ls $0;;
+
+# The rest requires a Unicode aware Perl.
+require 5.006;
+print "\U\x70\LO\154\x{69}\x{004E}a \Q\lRu\LLeS\E\041\n";
+# FIXME: The following should actually produce 'Polina4ka' in cyrillic letters.
+#print "\u\x{43f}\L\x{41E}\x{43b}\x{418}\E\x{43d}" .
+# "\x{430}\x{447}\x{43a}\x{430}\n";
+EOPERL
tmpfiles="$tmpfiles xg-test29.pot"
: ${XGETTEXT=xgettext}
LC_MESSAGES=C LC_ALL= \
-${XGETTEXT} -a -k_ --omit-header --no-location -o xg-test29.pot xg-test29.pl \
- 2>&1 | grep -v "invalid variable interpolation"
+${XGETTEXT} -a --omit-header --no-location -o xg-test29.pot xg-test29.pl
test $? = 0 || { rm -fr $tmpfiles; exit 1; }
tmpfiles="$tmpfiles xg-test29.ok"
cat <<\EOF > xg-test29.ok
-msgid "'Your $command, please?', asked the waiter."
+msgid "'Your command, please?', asked the waiter.\n"
+msgstr ""
+
+msgid "please"
+msgstr ""
+
+msgid "'Your recommendation, $polite?', answered the guest.\n"
+msgstr ""
+
+msgid "Yes, $$$$ref3!\n"
+msgstr ""
+
+msgid "Up and down, OH, yeah\\!\\!\\!\\!\n"
+msgstr ""
+
+msgid "E-Mail: <no@spam.org>. "
+msgstr ""
+
+msgid ""
+"Your Perl include path starts with '$INC[0]' and it ends with '$INC[-1]'. "
+"$#INC directories are searched.\n"
+msgstr ""
+
+msgid ""
+"Line 1\n"
+"Line 2\n"
+msgstr ""
+
+msgid "Line 1\\nStill line 1\n"
+msgstr ""
+
+msgid "Hello"
+msgstr ""
+
+msgid "@{[hello_func]} world!\n"
msgstr ""
-msgid "$$$$$$$$$$$$$var pieces of cake"
+msgid "ls $0"
msgstr ""
-msgid "@s is replaced by $s->[-1]."
+msgid "Polina rules!\n"
msgstr ""
EOF