Fixes from Guido Flohr.

author Bruno Haible <bruno@clisp.org>

Tue, 9 Sep 2003 10:54:47 +0000 (10:54 +0000)

committer Bruno Haible <bruno@clisp.org>

Tue, 23 Jun 2009 10:10:58 +0000 (12:10 +0200)
author Bruno Haible <bruno@clisp.org>
Tue, 9 Sep 2003 10:54:47 +0000 (10:54 +0000)
committer Bruno Haible <bruno@clisp.org>
Tue, 23 Jun 2009 10:10:58 +0000 (12:10 +0200)
diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog

index 1ba3adf7b3e8f08a578232c23aad5f664f686bc3..47a1d5facfa3d2eed5a01079c2a8a46e5afab042 100644 (file)
--- a/gettext-tools/doc/ChangeLog
+++ b/gettext-tools/doc/ChangeLog
@@ -1,3 +1,7 @@
+2003-09-09  Guido Flohr <guido@imperia.net>
+
+       * gettext.texi (Perl, Interpolation I): Tweaks.
+
  2003-08-15  Bruno Haible  <bruno@clisp.org>
  
         * ISO_639: Update. Add an, ht, ii, li.
diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi

index fac344d475a600ce93d294979183d13e6d463290..59d2a8255f02c9953cf0dcedc8c9bf9d04757386 100644 (file)
--- a/gettext-tools/doc/gettext.texi
+++ b/gettext-tools/doc/gettext.texi
@@ -8383,14 +8383,14 @@ Use @code{setlocale (LC_ALL, "");}
  @item Prerequisite
  @code{use POSIX;}
  @*@code{use Locale::TextDomain;} (included in the package libintl-perl
-which is available on the Comprehensive Perl Authors Network CPAN,
+which is available on the Comprehensive Perl Archive Network CPAN,
  http://www.cpan.org/).
  
  @item Use or emulate GNU gettext
  emulate
  
  @item Extractor
-@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n -k__nx -k__xn -kN__ -k}
+@code{xgettext -k__ -k\$__ -k%__ -k__x -k__n:1,2 -k__nx:1,2 -k__xn:1,2 -kN__ -k}
  
  @item Formatting with positions
  Both kinds of format strings support formatting with positions.
@@ -8770,7 +8770,7 @@ UTF-8 representation, regardless of the presence of the @code{use utf8} pragma.
  
  @item hex chars, like @code{\x1b}
  
-@item wide hex chars, like @code{\x263a}
+@item wide hex chars, like @code{\x@{263a@}}
  @*
  Note that this escape is translated into a UTF-8 representation,
  regardless of the presence of the @code{use utf8} pragma.
@@ -8803,8 +8803,9 @@ The following escapes are considered partially safe:
  
  These escapes are only considered safe if the string consists of
  ASCII characters only.  Translation of characters outside the range
-defined by ASCII is locale-dependent and can only be performed at
-runtime.
+defined by ASCII is locale-dependent and can actually only be performed 
+at runtime; @code{xgettext} doesn't do these locale-dependent translations
+at extraction time.
  
  Except for the modifier @code{\Q}, these translations, albeit valid,
  are generally useless and only obfuscate your sources.  If a
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog

index f43accc167cd9c46dd1dd5c124fd9f7ead235165..a4045d81bb8f6c21d807b0a19031c53080f1c494 100644 (file)
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,13 @@
+2003-09-09  Guido Flohr  <guido@imperia.net>
+
+       * x-perl.c (extract_quotelike_pass3): Change \l handling to no longer
+       eat up non-uppercase characters; likewise for \u. Also make \l and \u
+       work on characters written using hex or octal escape sequence. Fix \Q
+       handling. With option --extract-all, avoid "invalid variable
+       interpolation" warnings.
+       (x_perl_prelex): With option --extract-all, keep variables inside
+       double-quoted strings untouched.
+
  2003-09-03  Bruno Haible  <bruno@clisp.org>
  
         * x-sh.h: New file.
diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c

index 57f22b65ed60d5c1782249892918252497a0b825..ca2e807c5a79f5580ae117e882dc152d558318f4 100644 (file)
--- a/gettext-tools/src/x-perl.c
+++ b/gettext-tools/src/x-perl.c
@@ -439,8 +439,6 @@ is_whitespace (int c)
  /* ========================== Reading of tokens.  ========================== */
  
  
-/* FIXME: All known Perl operators should be listed here.  It does not
-   cost that much and it may improve the stability of the parser.  */
  enum token_type_ty
  {
    token_type_eof,
@@ -856,8 +854,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
      {
        bool backslashed;
  
-      /* Ensure room for 6 bytes.  */
-      if (bufpos + 6 > bufmax)
+      /* Ensure room for 7 bytes, 6 (multi-)bytes plus a leading backslash
+        if \Q modifier is present.  */
+      if (bufpos + 7 > bufmax)
         {
           bufmax = 2 * bufmax + 10;
           buffer = xrealloc (buffer, bufmax);
@@ -923,6 +922,22 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
                 int length;
  
                 crs = extract_oct (crs + 1, 3, &oct_number);
+
+               /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
+                  true, the character should be converted to its uppercase
+                  resp. lowercase equivalent.  I don't know if the necessary
+                  facilities are already included in gettext.  For US-Ascii
+                  the conversion can be already be done, however.  */
+               if (uppercase && oct_number >= 'a' && oct_number <= 'z')
+                 {
+                   oct_number = oct_number - 'a' + 'A';
+                 }
+               else if (lowercase && oct_number >= 'A' && oct_number <= 'Z')
+                 {
+                   oct_number = oct_number - 'A' + 'a';
+                 }
+
+
                 /* Yes, octal escape sequences in the range 0x100..0x1ff are
                    valid.  */
                 length = u8_uctomb ((unsigned char *) (buffer + bufpos),
@@ -961,8 +976,23 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
                     crs = extract_hex (crs, 2, &hex_number);
                   }
  
+               /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
+                  true, the character should be converted to its uppercase
+                  resp. lowercase equivalent.  I don't know if the necessary
+                  facilities are already included in gettext.  For US-Ascii
+                  the conversion can be already be done, however.  */
+               if (uppercase && hex_number >= 'a' && hex_number <= 'z')
+                 {
+                   hex_number = hex_number - 'a' + 'A';
+                 }
+               else if (lowercase && hex_number >= 'A' && hex_number <= 'Z')
+                 {
+                   hex_number = hex_number - 'A' + 'a';
+                 }
+
                 length = u8_uctomb ((unsigned char *) (buffer + bufpos),
                                     hex_number, 6);
+
                 if (length > 0)
                   bufpos += length;
               }
@@ -995,6 +1025,8 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
                       unicode = unicode_name_character (name);
                       if (unicode != UNINAME_INVALID)
                         {
+                         /* FIXME: Convert to upper/lowercase if the
+                            corresponding flag is set to true.  */
                           int length =
                             u8_uctomb ((unsigned char *) (buffer + bufpos),
                                        unicode, 6);
@@ -1026,18 +1058,14 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
             case 'L':
               uppercase = false;
               lowercase = true;
-             quotemeta = false;
               ++crs;
               continue;
             case 'U':
               uppercase = true;
               lowercase = false;
-             quotemeta = false;
               ++crs;
               continue;
             case 'Q':
-             uppercase = false;
-             lowercase = false;
               quotemeta = true;
               ++crs;
               continue;
@@ -1055,6 +1083,10 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
                          real_file_name, line_number, *crs);
                   error_with_progname = true;
                 }
+             else
+               {
+                 buffer[bufpos++] = *crs;
+               }
               ++crs;
               continue;
             case 'u':
@@ -1071,6 +1103,10 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
                          real_file_name, line_number, *crs);
                   error_with_progname = true;
                 }
+             else
+               {
+                 buffer[bufpos++] = *crs;
+               }
               ++crs;
               continue;
             case '\\':
@@ -1085,7 +1121,15 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
        else
         backslashed = false;
  
-      if (!backslashed && (*crs == '$' || *crs == '@'))
+      if (quotemeta
+         && !((*crs >= 'A' && *crs <= 'Z') || (*crs >= 'A' && *crs <= 'z')
+              || (*crs >= '0' && *crs <= '9') || *crs == '_'))
+       {
+         buffer[bufpos++] = '\\';
+         backslashed = true;
+       }
+
+      if (!backslashed && !extract_all && (*crs == '$' || *crs == '@'))
         {
           error_with_progname = false;
           error (error_level, 0, _("\
@@ -1128,10 +1172,6 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
             buffer[bufpos++] = *crs;
           ++crs;
         }
-      else if (quotemeta)
-       {
-         buffer[bufpos++] = *crs++;
-       }
        else
         {
           buffer[bufpos++] = *crs++;
@@ -1833,9 +1873,13 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp)
         case '@':
         case '*':
         case '$':
-         extract_variable (mlp, tp, c);
-         prefer_division_over_regexp = true;
-         return;
+         if (!extract_all)
+           {
+             extract_variable (mlp, tp, c);
+             prefer_division_over_regexp = true;
+             return;
+           }
+         break;
         }
  
        last_non_comment_line = tp->line_number;
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog

index 81da08cfa1fa90b19d1889e9f945a5f948c8dff6..2c95467a80d20f679246c47e69b0239b7f6ca879 100644 (file)
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,8 @@
+2003-09-09  Guido Flohr  <guido@imperia.net>
+
+       * xgettext-29: Add more tests.
+       * lang-perl-1: Use gettext instead of _.
+
  2003-09-08  Bruno Haible  <bruno@clisp.org>
  
         * lang-sh: Avoid test failure with Solaris /bin/sh.
diff --git a/gettext-tools/tests/lang-perl-1 b/gettext-tools/tests/lang-perl-1

index 12cb94a3a038e2adaf73071ec45680a23503faef..36c93f8dc63c5376767b0f065823d9e430777c9b 100755 (executable)
--- a/gettext-tools/tests/lang-perl-1
+++ b/gettext-tools/tests/lang-perl-1
@@ -10,21 +10,23 @@ trap 'rm -fr $tmpfiles' 1 2 3 15
  
  tmpfiles="$tmpfiles program.pl"
  cat <<\EOF > program.pl
-use Locale::Messages qw (textdomain bindtextdomain ngettext);
+use strict;
+
+use Locale::Messages qw (textdomain bindtextdomain gettext ngettext);
  textdomain "prog";
  bindtextdomain "prog", "./";
  my $n = 2;
-print _"'Your command, please?', asked the waiter.";
+print gettext "'Your command, please?', asked the waiter.";
  print "\n";
  printf ngettext ("a piece of cake", "%d pieces of cake", $n), $n;
  print "\n";
-printf _"%s is replaced by %s.", "FF", "EUR";
+printf gettext "%s is replaced by %s.", "FF", "EUR";
  print "\n";
  EOF
  
  tmpfiles="$tmpfiles prog.pot"
  : ${XGETTEXT=xgettext}
-${XGETTEXT} -k_ -o prog.pot --omit-header --no-location program.pl
+${XGETTEXT} -k__ -o prog.pot --omit-header --no-location program.pl
  
  tmpfiles="$tmpfiles prog.ok"
  cat <<EOF > prog.ok
diff --git a/gettext-tools/tests/xgettext-29 b/gettext-tools/tests/xgettext-29

index 652307e270aca49d8233e3e9a6d445441ce9dafa..f75c196b3e942a8bc0ba5c7ae8662df43db3f58a 100755 (executable)
--- a/gettext-tools/tests/xgettext-29
+++ b/gettext-tools/tests/xgettext-29
@@ -1,37 +1,106 @@
  #! /bin/sh
  
-# Test of Perl support with --extract-all and printf format strings.
-
+# Tests for the general string extraction facilities of the Perl backend
+# (with option --extract-all).
+                                                                                
  tmpfiles=""
  trap 'rm -fr $tmpfiles' 1 2 3 15
  
  tmpfiles="$tmpfiles xg-test29.pl"
-cat <<\EOF > xg-test29.pl
-my $n = 2;
-print "'Your $command, please?', asked the waiter.";
-print "\n";
-printf ("$$$$$$$$$$$$$var pieces of cake", $n);
-print "\n";
-printf "@s is replaced by $s->[-1].";
-print "\n";
+cat <<\EOPERL > xg-test29.pl
+use strict;
+
+# A double quoted string.
+print "'Your command, please?', asked the waiter.\n";
+# A double quoted string with interpolations.
+my $polite = 'please';
+print "'Your recommendation, $polite?', answered the guest.\n";
+# A reference.
+my $ref1 = \$polite;
+my $ref2 = \$ref1;
+my $ref3 = \$ref2;
+print "Yes, $$$$ref3!\n";
+# The qq operator and some of the more esoteric string interpolation
+# features of Perl.
+print (qq {\uU\lp \LaNd\E \ldo\lWn, \Uoh\E, yeah\Q!!!\E\\!\n});
+# The q operator.
+print q<E-Mail: <no@spam.org>.  >;
+# Should not be found.
+{ $polite =~ qr?le? }
+
+# List interpolation.
+print "Your Perl include path starts with '$INC[0]' and it " .
+      "ends with '$INC[-1]'.  $#INC directories are searched.\n";
+# Here documents.
+print <<EOF, <<'EOF';
+Line 1\nLine 2
+EOF
+Line 1\nStill line 1
  EOF
+# Perl code inside strings.
+sub hello_func { return 'Hello' };
+print "@{[hello_func]} world!\n";
+# Backticks.
+print `ls $0`;
+print qx;ls $0;;
+
+# The rest requires a Unicode aware Perl.
+require 5.006;
+print "\U\x70\LO\154\x{69}\x{004E}a \Q\lRu\LLeS\E\041\n";
+# FIXME: The following should actually produce 'Polina4ka' in cyrillic letters.
+#print "\u\x{43f}\L\x{41E}\x{43b}\x{418}\E\x{43d}" .
+#      "\x{430}\x{447}\x{43a}\x{430}\n";
+EOPERL
  
  tmpfiles="$tmpfiles xg-test29.pot"
  : ${XGETTEXT=xgettext}
  LC_MESSAGES=C LC_ALL= \
-${XGETTEXT} -a -k_ --omit-header --no-location -o xg-test29.pot xg-test29.pl \
-    2>&1 | grep -v "invalid variable interpolation"
+${XGETTEXT} -a --omit-header --no-location -o xg-test29.pot xg-test29.pl
  test $? = 0 || { rm -fr $tmpfiles; exit 1; }
  
  tmpfiles="$tmpfiles xg-test29.ok"
  cat <<\EOF > xg-test29.ok
-msgid "'Your $command, please?', asked the waiter."
+msgid "'Your command, please?', asked the waiter.\n"
+msgstr ""
+
+msgid "please"
+msgstr ""
+
+msgid "'Your recommendation, $polite?', answered the guest.\n"
+msgstr ""
+
+msgid "Yes, $$$$ref3!\n"
+msgstr ""
+
+msgid "Up and down, OH, yeah\\!\\!\\!\\!\n"
+msgstr ""
+
+msgid "E-Mail: <no@spam.org>.  "
+msgstr ""
+
+msgid ""
+"Your Perl include path starts with '$INC[0]' and it ends with '$INC[-1]'.  "
+"$#INC directories are searched.\n"
+msgstr ""
+
+msgid ""
+"Line 1\n"
+"Line 2\n"
+msgstr ""
+
+msgid "Line 1\\nStill line 1\n"
+msgstr ""
+
+msgid "Hello"
+msgstr ""
+
+msgid "@{[hello_func]} world!\n"
  msgstr ""
  
-msgid "$$$$$$$$$$$$$var pieces of cake"
+msgid "ls $0"
  msgstr ""
  
-msgid "@s is replaced by $s->[-1]."
+msgid "Polina rules!\n"
  msgstr ""
  EOF
author	Bruno Haible <bruno@clisp.org>
	Tue, 9 Sep 2003 10:54:47 +0000 (10:54 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Tue, 23 Jun 2009 10:10:58 +0000 (12:10 +0200)
gettext-tools/doc/ChangeLog		patch \| blob \| blame \| history
gettext-tools/doc/gettext.texi		patch \| blob \| blame \| history
gettext-tools/src/ChangeLog		patch \| blob \| blame \| history
gettext-tools/src/x-perl.c		patch \| blob \| blame \| history
gettext-tools/tests/ChangeLog		patch \| blob \| blame \| history
gettext-tools/tests/lang-perl-1		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-29		patch \| blob \| blame \| history