From: Bruno Haible <bruno@clisp.org>
Date: Tue, 9 Sep 2003 10:54:47 +0000 (+0000)
Subject: Fixes from Guido Flohr.
X-Git-Tag: v0.13~267
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a1bbb95481bc8c216cb27310351451408e962c70;p=thirdparty%2Fgettext.git

Fixes from Guido Flohr.
---

diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog
index 1ba3adf7b..47a1d5fac 100644
--- a/gettext-tools/doc/ChangeLog
+++ b/gettext-tools/doc/ChangeLog
@@ -1,3 +1,7 @@
+2003-09-09  Guido Flohr <guido@imperia.net>
+
+	* gettext.texi (Perl, Interpolation I): Tweaks.
+
 2003-08-15  Bruno Haible  <bruno@clisp.org>
 
 	* ISO_639: Update. Add an, ht, ii, li.
diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi
index fac344d47..59d2a8255 100644
--- a/gettext-tools/doc/gettext.texi
+++ b/gettext-tools/doc/gettext.texi
@@ -8383,14 +8383,14 @@ Use @code{setlocale (LC_ALL, "");}
 @item Prerequisite
 @code{use POSIX;}
 @*@code{use Locale::TextDomain;} (included in the package libintl-perl
-which is available on the Comprehensive Perl Authors Network CPAN,
+which is available on the Comprehensive Perl Archive Network CPAN,
 http://www.cpan.org/).
 
 @item Use or emulate GNU gettext
 emulate
 
 @item Extractor
-@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n -k__nx -k__xn -kN__ -k}
+@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n:1,2 -k__nx:1,2 -k__xn:1,2 -kN__ -k}
 
 @item Formatting with positions
 Both kinds of format strings support formatting with positions.
@@ -8770,7 +8770,7 @@ UTF-8 representation, regardless of the presence of the @code{use utf8} pragma.
 
 @item hex chars, like @code{\x1b}
 
-@item wide hex chars, like @code{\x263a}
+@item wide hex chars, like @code{\x@{263a@}}
 @*
 Note that this escape is translated into a UTF-8 representation,
 regardless of the presence of the @code{use utf8} pragma.
@@ -8803,8 +8803,9 @@ The following escapes are considered partially safe:
 
 These escapes are only considered safe if the string consists of
 ASCII characters only.  Translation of characters outside the range
-defined by ASCII is locale-dependent and can only be performed at
-runtime.
+defined by ASCII is locale-dependent and can actually only be performed 
+at runtime; @code{xgettext} doesn't do these locale-dependent translations
+at extraction time.
 
 Except for the modifier @code{\Q}, these translations, albeit valid,
 are generally useless and only obfuscate your sources.  If a
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index f43accc16..a4045d81b 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,13 @@
+2003-09-09  Guido Flohr  <guido@imperia.net>
+
+	* x-perl.c (extract_quotelike_pass3): Change \l handling to no longer
+	eat up non-uppercase characters; likewise for \u. Also make \l and \u
+	work on characters written using hex or octal escape sequence. Fix \Q
+	handling. With option --extract-all, avoid "invalid variable
+	interpolation" warnings.
+	(x_perl_prelex): With option --extract-all, keep variables inside
+	double-quoted strings untouched.
+
 2003-09-03  Bruno Haible  <bruno@clisp.org>
 
 	* x-sh.h: New file.
diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c
index 57f22b65e..ca2e807c5 100644
--- a/gettext-tools/src/x-perl.c
+++ b/gettext-tools/src/x-perl.c
@@ -439,8 +439,6 @@ is_whitespace (int c)
 /* ========================== Reading of tokens.  ========================== */
 
 
-/* FIXME: All known Perl operators should be listed here.  It does not
-   cost that much and it may improve the stability of the parser.  */
 enum token_type_ty
 {
   token_type_eof,
@@ -856,8 +854,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
     {
       bool backslashed;
 
-      /* Ensure room for 6 bytes.  */
-      if (bufpos + 6 > bufmax)
+      /* Ensure room for 7 bytes, 6 (multi-)bytes plus a leading backslash
+	 if \Q modifier is present.  */
+      if (bufpos + 7 > bufmax)
 	{
 	  bufmax = 2 * bufmax + 10;
 	  buffer = xrealloc (buffer, bufmax);
@@ -923,6 +922,22 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 		int length;
 
 		crs = extract_oct (crs + 1, 3, &oct_number);
+
+		/* FIXME: If one of the variables UPPERCASE or LOWERCASE is
+		   true, the character should be converted to its uppercase
+		   resp. lowercase equivalent.  I don't know if the necessary
+		   facilities are already included in gettext.  For US-Ascii
+		   the conversion can be already be done, however.  */
+		if (uppercase && oct_number >= 'a' && oct_number <= 'z')
+		  {
+		    oct_number = oct_number - 'a' + 'A';
+		  }
+		else if (lowercase && oct_number >= 'A' && oct_number <= 'Z')
+		  {
+		    oct_number = oct_number - 'A' + 'a';
+		  }
+
+
 		/* Yes, octal escape sequences in the range 0x100..0x1ff are
 		   valid.  */
 		length = u8_uctomb ((unsigned char *) (buffer + bufpos),
@@ -961,8 +976,23 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 		    crs = extract_hex (crs, 2, &hex_number);
 		  }
 
+		/* FIXME: If one of the variables UPPERCASE or LOWERCASE is
+		   true, the character should be converted to its uppercase
+		   resp. lowercase equivalent.  I don't know if the necessary
+		   facilities are already included in gettext.  For US-Ascii
+		   the conversion can be already be done, however.  */
+		if (uppercase && hex_number >= 'a' && hex_number <= 'z')
+		  {
+		    hex_number = hex_number - 'a' + 'A';
+		  }
+		else if (lowercase && hex_number >= 'A' && hex_number <= 'Z')
+		  {
+		    hex_number = hex_number - 'A' + 'a';
+		  }
+
 		length = u8_uctomb ((unsigned char *) (buffer + bufpos),
 				    hex_number, 6);
+
 		if (length > 0)
 		  bufpos += length;
 	      }
@@ -995,6 +1025,8 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 		      unicode = unicode_name_character (name);
 		      if (unicode != UNINAME_INVALID)
 			{
+			  /* FIXME: Convert to upper/lowercase if the
+			     corresponding flag is set to true.  */
 			  int length =
 			    u8_uctomb ((unsigned char *) (buffer + bufpos),
 				       unicode, 6);
@@ -1026,18 +1058,14 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 	    case 'L':
 	      uppercase = false;
 	      lowercase = true;
-	      quotemeta = false;
 	      ++crs;
 	      continue;
 	    case 'U':
 	      uppercase = true;
 	      lowercase = false;
-	      quotemeta = false;
 	      ++crs;
 	      continue;
 	    case 'Q':
-	      uppercase = false;
-	      lowercase = false;
 	      quotemeta = true;
 	      ++crs;
 	      continue;
@@ -1055,6 +1083,10 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 			 real_file_name, line_number, *crs);
 		  error_with_progname = true;
 		}
+	      else
+	        {
+		  buffer[bufpos++] = *crs;
+		}
 	      ++crs;
 	      continue;
 	    case 'u':
@@ -1071,6 +1103,10 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 			 real_file_name, line_number, *crs);
 		  error_with_progname = true;
 		}
+	      else
+	        {
+		  buffer[bufpos++] = *crs;
+		}
 	      ++crs;
 	      continue;
 	    case '\\':
@@ -1085,7 +1121,15 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
       else
 	backslashed = false;
 
-      if (!backslashed && (*crs == '$' || *crs == '@'))
+      if (quotemeta
+	  && !((*crs >= 'A' && *crs <= 'Z') || (*crs >= 'A' && *crs <= 'z')
+	       || (*crs >= '0' && *crs <= '9') || *crs == '_'))
+	{
+	  buffer[bufpos++] = '\\';
+	  backslashed = true;
+	}
+
+      if (!backslashed && !extract_all && (*crs == '$' || *crs == '@'))
 	{
 	  error_with_progname = false;
 	  error (error_level, 0, _("\
@@ -1128,10 +1172,6 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 	    buffer[bufpos++] = *crs;
 	  ++crs;
 	}
-      else if (quotemeta)
-	{
-	  buffer[bufpos++] = *crs++;
-	}
       else
 	{
 	  buffer[bufpos++] = *crs++;
@@ -1833,9 +1873,13 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp)
 	case '@':
 	case '*':
 	case '$':
-	  extract_variable (mlp, tp, c);
-	  prefer_division_over_regexp = true;
-	  return;
+	  if (!extract_all)
+	    {
+	      extract_variable (mlp, tp, c);
+	      prefer_division_over_regexp = true;
+	      return;
+	    }
+	  break;
 	}
 
       last_non_comment_line = tp->line_number;
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog
index 81da08cfa..2c95467a8 100644
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,8 @@
+2003-09-09  Guido Flohr  <guido@imperia.net>
+
+	* xgettext-29: Add more tests.
+	* lang-perl-1: Use gettext instead of _.
+
 2003-09-08  Bruno Haible  <bruno@clisp.org>
 
 	* lang-sh: Avoid test failure with Solaris /bin/sh.
diff --git a/gettext-tools/tests/lang-perl-1 b/gettext-tools/tests/lang-perl-1
index 12cb94a3a..36c93f8dc 100755
--- a/gettext-tools/tests/lang-perl-1
+++ b/gettext-tools/tests/lang-perl-1
@@ -10,21 +10,23 @@ trap 'rm -fr $tmpfiles' 1 2 3 15
 
 tmpfiles="$tmpfiles program.pl"
 cat <<\EOF > program.pl
-use Locale::Messages qw (textdomain bindtextdomain ngettext);
+use strict;
+
+use Locale::Messages qw (textdomain bindtextdomain gettext ngettext);
 textdomain "prog";
 bindtextdomain "prog", "./";
 my $n = 2;
-print _"'Your command, please?', asked the waiter.";
+print gettext "'Your command, please?', asked the waiter.";
 print "\n";
 printf ngettext ("a piece of cake", "%d pieces of cake", $n), $n;
 print "\n";
-printf _"%s is replaced by %s.", "FF", "EUR";
+printf gettext "%s is replaced by %s.", "FF", "EUR";
 print "\n";
 EOF
 
 tmpfiles="$tmpfiles prog.pot"
 : ${XGETTEXT=xgettext}
-${XGETTEXT} -k_ -o prog.pot --omit-header --no-location program.pl
+${XGETTEXT} -k__ -o prog.pot --omit-header --no-location program.pl
 
 tmpfiles="$tmpfiles prog.ok"
 cat <<EOF > prog.ok
diff --git a/gettext-tools/tests/xgettext-29 b/gettext-tools/tests/xgettext-29
index 652307e27..f75c196b3 100755
--- a/gettext-tools/tests/xgettext-29
+++ b/gettext-tools/tests/xgettext-29
@@ -1,37 +1,106 @@
 #! /bin/sh
 
-# Test of Perl support with --extract-all and printf format strings.
-
+# Tests for the general string extraction facilities of the Perl backend
+# (with option --extract-all).
+                                                                                
 tmpfiles=""
 trap 'rm -fr $tmpfiles' 1 2 3 15
 
 tmpfiles="$tmpfiles xg-test29.pl"
-cat <<\EOF > xg-test29.pl
-my $n = 2;
-print "'Your $command, please?', asked the waiter.";
-print "\n";
-printf ("$$$$$$$$$$$$$var pieces of cake", $n);
-print "\n";
-printf "@s is replaced by $s->[-1].";
-print "\n";
+cat <<\EOPERL > xg-test29.pl
+use strict;
+
+# A double quoted string.
+print "'Your command, please?', asked the waiter.\n";
+# A double quoted string with interpolations.
+my $polite = 'please';
+print "'Your recommendation, $polite?', answered the guest.\n";
+# A reference.
+my $ref1 = \$polite;
+my $ref2 = \$ref1;
+my $ref3 = \$ref2;
+print "Yes, $$$$ref3!\n";
+# The qq operator and some of the more esoteric string interpolation
+# features of Perl.
+print (qq {\uU\lp \LaNd\E \ldo\lWn, \Uoh\E, yeah\Q!!!\E\\!\n});
+# The q operator.
+print q<E-Mail: <no@spam.org>.  >;
+# Should not be found.
+{ $polite =~ qr?le? }
+
+# List interpolation.
+print "Your Perl include path starts with '$INC[0]' and it " .
+      "ends with '$INC[-1]'.  $#INC directories are searched.\n";
+# Here documents.
+print <<EOF, <<'EOF';
+Line 1\nLine 2
+EOF
+Line 1\nStill line 1
 EOF
+# Perl code inside strings.
+sub hello_func { return 'Hello' };
+print "@{[hello_func]} world!\n";
+# Backticks.
+print `ls $0`;
+print qx;ls $0;;
+
+# The rest requires a Unicode aware Perl.
+require 5.006;
+print "\U\x70\LO\154\x{69}\x{004E}a \Q\lRu\LLeS\E\041\n";
+# FIXME: The following should actually produce 'Polina4ka' in cyrillic letters.
+#print "\u\x{43f}\L\x{41E}\x{43b}\x{418}\E\x{43d}" .
+#      "\x{430}\x{447}\x{43a}\x{430}\n";
+EOPERL
 
 tmpfiles="$tmpfiles xg-test29.pot"
 : ${XGETTEXT=xgettext}
 LC_MESSAGES=C LC_ALL= \
-${XGETTEXT} -a -k_ --omit-header --no-location -o xg-test29.pot xg-test29.pl \
-    2>&1 | grep -v "invalid variable interpolation"
+${XGETTEXT} -a --omit-header --no-location -o xg-test29.pot xg-test29.pl
 test $? = 0 || { rm -fr $tmpfiles; exit 1; }
 
 tmpfiles="$tmpfiles xg-test29.ok"
 cat <<\EOF > xg-test29.ok
-msgid "'Your $command, please?', asked the waiter."
+msgid "'Your command, please?', asked the waiter.\n"
+msgstr ""
+
+msgid "please"
+msgstr ""
+
+msgid "'Your recommendation, $polite?', answered the guest.\n"
+msgstr ""
+
+msgid "Yes, $$$$ref3!\n"
+msgstr ""
+
+msgid "Up and down, OH, yeah\\!\\!\\!\\!\n"
+msgstr ""
+
+msgid "E-Mail: <no@spam.org>.  "
+msgstr ""
+
+msgid ""
+"Your Perl include path starts with '$INC[0]' and it ends with '$INC[-1]'.  "
+"$#INC directories are searched.\n"
+msgstr ""
+
+msgid ""
+"Line 1\n"
+"Line 2\n"
+msgstr ""
+
+msgid "Line 1\\nStill line 1\n"
+msgstr ""
+
+msgid "Hello"
+msgstr ""
+
+msgid "@{[hello_func]} world!\n"
 msgstr ""
 
-msgid "$$$$$$$$$$$$$var pieces of cake"
+msgid "ls $0"
 msgstr ""
 
-msgid "@s is replaced by $s->[-1]."
+msgid "Polina rules!\n"
 msgstr ""
 EOF