]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
xgettext: Perl: Report warnings instead of fatal errors.
authorBruno Haible <bruno@clisp.org>
Thu, 19 Sep 2024 10:34:02 +0000 (12:34 +0200)
committerBruno Haible <bruno@clisp.org>
Thu, 19 Sep 2024 10:34:02 +0000 (12:34 +0200)
* gettext-tools/doc/lang-perl.texi (Interpolation I, Interpolation II): Say
"unsupported" instead of "invalid". Say "warning" instead of "fatal error".
* gettext-tools/doc/gettext.texi: Update menu.
* gettext-tools/src/x-perl.c (token_type_string_interpol): New enum item.
(token2string): Handle token_type_string_interpol.
(extract_quotelike_pass3): Remove error_level parameter; use IF_SEVERITY_WARNING
instead. Say "unsupported" instead of "invalid". When encountering an
unsupported variable interpolation, change the token's type to
token_type_string_interpol.
(interpolate_keywords): After calling extract_quotelike_pass3, test the token's
type.
(prefer_regexp_over_division): Handle token_type_string_interpol.
(collect_message): Remove error_level parameter; use IF_SEVERITY_WARNING
instead. After calling extract_quotelike_pass3, test the token's type. Possibly
return NULL.
(extract_balanced): Handle token_type_string_interpol. Test the return value of
collect_message.
* NEWS: Mention the change.

NEWS
gettext-tools/doc/gettext.texi
gettext-tools/doc/lang-perl.texi
gettext-tools/src/x-perl.c

diff --git a/NEWS b/NEWS
index 56b66c34ac7727ad74e3554ff600f4163820cb01..9f8acdb73d7812dc63d1235d37954813b829996c 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,7 @@ Version 0.23 - September 2024
   - Tcl: With the forthcoming Tcl 9.0, characters outside the Unicode BMP
     in Tcl message catalogs (.msg files) will work regardless of the locale's
     encoding.
+  - Perl: xgettext now reports warnings instead of fatal errors.
   - PHP: Strings with embedded expressions are now recognized.
 
 * Runtime behaviour:
index 88891ecccd51afb0a422777f8790a6c370ee16a4..baf5ca610f4c6420c4ed7e835e55966505253355 100644 (file)
@@ -466,7 +466,7 @@ Perl
 * Default Keywords::            Which Keywords Will xgettext Look For?
 * Special Keywords::            How to Extract Hash Keys
 * Quote-like Expressions::      What are Strings And Quote-like Expressions?
-* Interpolation I::             Invalid String Interpolation
+* Interpolation I::             Unsupported String Interpolation
 * Interpolation II::            Valid String Interpolation
 * Parentheses::                 When To Use Parentheses
 * Long Lines::                  How To Grok with Long Lines
index 7f910481790c2ef5cb754b0c3629d47b8b5dd952..adde4b3d324f2f2c9fedc577ba02d769e46a3b32 100644 (file)
@@ -1,5 +1,5 @@
 @c This file is part of the GNU gettext manual.
-@c Copyright (C) 1995-2020 Free Software Foundation, Inc.
+@c Copyright (C) 1995-2024 Free Software Foundation, Inc.
 @c See the file gettext.texi for copying conditions.
 
 @node Perl
@@ -115,7 +115,7 @@ worst probably being its imperfectness.
 * Default Keywords::            Which Keywords Will xgettext Look For?
 * Special Keywords::            How to Extract Hash Keys
 * Quote-like Expressions::      What are Strings And Quote-like Expressions?
-* Interpolation I::             Invalid String Interpolation
+* Interpolation I::             Unsupported String Interpolation
 * Interpolation II::            Valid String Interpolation
 * Parentheses::                 When To Use Parentheses
 * Long Lines::                  How To Grok with Long Lines
@@ -479,8 +479,8 @@ Delimiters that start with a digit are not supported!
 @end itemize
 
 @node Interpolation I
-@subsubsection Invalid Uses Of String Interpolation
-@cindex Perl invalid string interpolation
+@subsubsection Unsupported Uses Of String Interpolation
+@cindex Perl unsupported string interpolation
 
 Perl is capable of interpolating variables into strings.  This offers
 some nice features in localized programs but can also lead to
@@ -503,9 +503,10 @@ Consequently, it is almost impossible that a translation can be looked
 up at runtime (except if, by accident, the interpolated string is found
 in the message catalog).
 
-The @code{xgettext} program will therefore terminate parsing with a fatal
-error if it encounters a variable inside of an extracted string.  In
-general, this will happen for all kinds of string interpolations that
+The @code{xgettext} program will therefore produce a warning
+if it encounters a variable inside of a string to be extracted,
+and not extract that string.
+In general, this will happen for all kinds of string interpolations that
 cannot be safely performed at compile time.  If you absolutely know
 what you are doing, you can always circumvent this behavior:
 
@@ -526,8 +527,8 @@ internationalization.
 
 Please see the manual page @samp{man perlop} for details of strings and
 quote-like expressions that are subject to interpolation and those
-that are not.  Safe interpolations (that will not lead to a fatal
-error) are:
+that are not.  Safe interpolations (that will not lead to a warning)
+are:
 
 @itemize @bullet
 
@@ -631,7 +632,7 @@ the resulting PO file will begrudgingly improve in terms of readability.
 
 You can interpolate hash lookups in all strings or quote-like
 expressions that are subject to interpolation (see the manual page
-@samp{man perlop} for details).  Double interpolation is invalid, however:
+@samp{man perlop} for details).  Double interpolation is unsupported, however:
 
 @example
 # TRANSLATORS: Replace "the earth" with the name of your planet.
@@ -639,9 +640,9 @@ print gettext qq@{Welcome to $gettext->@{"the earth"@}@};
 @end example
 
 The @code{qq}-quoted string is recognized as an argument to @code{xgettext} in
-the first place, and checked for invalid variable interpolation.  The
+the first place, and checked for unsupported variable interpolation.  The
 dollar sign of hash-dereferencing will therefore terminate the parser 
-with an ``invalid interpolation'' error.
+with an ``unsupported interpolation'' warning.
 
 It is valid to interpolate hash lookups in regular expressions:
 
index ba50f772c1b260c9b2d8f470474c79b781536dd0..ca49b6d8446beb99b6226e3318b7452c31904fa8 100644 (file)
    Also, the syntax after the 'sub' keyword is specified in perlsub.pod.
    Try the command "man perlsub" or "perldoc perlsub".
    Perl 5.10 has new operators '//' and '//=', see
-   <https://perldoc.perl.org/perldelta.html#Defined-or-operator>.  */
+   <https://perldoc.perl.org/perldelta.html#Defined-or-operator>.
+
+   The actual Perl lexer and parser are in
+     perl-5.40.0/toke.c
+     perl-5.40.0/perly.y
+   but, for your sanity, you better don't look at it :)
+ */
 
 #define DEBUG_PERL 0
 #define DEBUG_NESTING_DEPTH 0
@@ -544,6 +550,7 @@ enum token_type_ty
   token_type_lbracket,          /* [ */
   token_type_rbracket,          /* ] */
   token_type_string,            /* quote-like */
+  token_type_string_interpol,   /* quote-like with embedded expressions */
   token_type_number,            /* starting with a digit or dot */
   token_type_named_op,          /* if, unless, while, ... */
   token_type_variable,          /* $... */
@@ -626,6 +633,8 @@ token2string (const token_ty *token)
       return "token_type_rbracket";
     case token_type_string:
       return "token_type_string";
+    case token_type_string_interpol:
+      return "token_type_string_interpol";
     case token_type_number:
       return "token type number";
     case token_type_named_op:
@@ -927,11 +936,12 @@ extract_triple_quotelike (message_list_ty *mlp, token_ty *tp, int delim,
 
 /* Perform pass 3 of quotelike extraction (interpolation).
    *tp is a token of type token_type_string.
-   This function replaces tp->string.
+   This function may either replace tp->string, or change *tp's type to
+   token_type_string_interpol.
    This function does not access tp->comment.  */
 /* FIXME: Currently may writes null-bytes into the string.  */
 static void
-extract_quotelike_pass3 (token_ty *tp, int error_level)
+extract_quotelike_pass3 (token_ty *tp)
 {
   static char *buffer;
   static int bufmax = 0;
@@ -1077,7 +1087,7 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
                     const char *end = strchr (crs, '}');
                     if (end == NULL)
                       {
-                        if_error (error_level,
+                        if_error (IF_SEVERITY_WARNING,
                                   real_file_name, line_number, (size_t)(-1), false,
                                   _("missing right brace on \\x{HEXNUMBER}"));
                         ++crs;
@@ -1196,9 +1206,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
                 }
               else if ((unsigned char) *crs >= 0x80)
                 {
-                  if_error (error_level,
+                  if_error (IF_SEVERITY_WARNING,
                             real_file_name, line_number, (size_t)(-1), false,
-                            _("invalid interpolation (\"\\l\") of 8bit character \"%c\""),
+                            _("unsupported interpolation (\"\\l\") of 8bit character \"%c\""),
                             *crs);
                 }
               else
@@ -1215,9 +1225,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
                 }
               else if ((unsigned char) *crs >= 0x80)
                 {
-                  if_error (error_level,
+                  if_error (IF_SEVERITY_WARNING,
                             real_file_name, line_number, (size_t)(-1), false,
-                            _("invalid interpolation (\"\\u\") of 8bit character \"%c\""),
+                            _("unsupported interpolation (\"\\u\") of 8bit character \"%c\""),
                             *crs);
                 }
               else
@@ -1248,9 +1258,10 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 
       if (!backslashed && !extract_all && (*crs == '$' || *crs == '@'))
         {
-          if_error (error_level,
+          if_error (IF_SEVERITY_WARNING,
                     real_file_name, line_number, (size_t)(-1), false,
-                    _("invalid variable interpolation at \"%c\""), *crs);
+                    _("unsupported variable interpolation at \"%c\""), *crs);
+          tp->type = token_type_string_interpol;
           ++crs;
         }
       else if (lowercase)
@@ -1259,9 +1270,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
             buffer[bufpos++] = *crs - 'A' + 'a';
           else if ((unsigned char) *crs >= 0x80)
             {
-              if_error (error_level,
+              if_error (IF_SEVERITY_WARNING,
                         real_file_name, line_number, (size_t)(-1), false,
-                        _("invalid interpolation (\"\\L\") of 8bit character \"%c\""),
+                        _("unsupported interpolation (\"\\L\") of 8bit character \"%c\""),
                         *crs);
               buffer[bufpos++] = *crs;
             }
@@ -1275,9 +1286,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
             buffer[bufpos++] = *crs - 'a' + 'A';
           else if ((unsigned char) *crs >= 0x80)
             {
-              if_error (error_level,
+              if_error (IF_SEVERITY_WARNING,
                         real_file_name, line_number, (size_t)(-1), false,
-                        _("invalid interpolation (\"\\U\") of 8bit character \"%c\""),
+                        _("unsupported interpolation (\"\\U\") of 8bit character \"%c\""),
                         *crs);
               buffer[bufpos++] = *crs;
             }
@@ -1306,7 +1317,8 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 
   /* Replace tp->string.  */
   free (tp->string);
-  tp->string = xstrdup (buffer);
+  if (tp->type == token_type_string)
+    tp->string = xstrdup (buffer);
 }
 
 /* Parse a variable.  This is done in several steps:
@@ -1968,13 +1980,16 @@ interpolate_keywords (message_list_ty *mlp, string_desc_t string, int lineno)
               /* The resulting string has to be interpolated twice.  */
               buffer[bufpos] = '\0';
               token.string = xstrdup (buffer);
-              extract_quotelike_pass3 (&token, IF_SEVERITY_FATAL_ERROR);
-              /* The string can only shrink with interpolation (because
-                 we ignore \Q).  */
-              if (!(strlen (token.string) <= bufpos))
-                abort ();
-              strcpy (buffer, token.string);
-              free (token.string);
+              extract_quotelike_pass3 (&token);
+              if (token.type == token_type_string)
+                {
+                  /* The string can only shrink with interpolation (because
+                     we ignore \Q).  */
+                  if (!(strlen (token.string) <= bufpos))
+                    abort ();
+                  strcpy (buffer, token.string);
+                  free (token.string);
+                }
               state = wait_rbrace;
               break;
             case '\\':
@@ -2060,11 +2075,18 @@ interpolate_keywords (message_list_ty *mlp, string_desc_t string, int lineno)
             case_whitespace:
               break;
             case '}':
-              buffer[bufpos] = '\0';
-              token.string = xstrdup (buffer);
-              extract_quotelike_pass3 (&token, IF_SEVERITY_FATAL_ERROR);
-              remember_a_message (mlp, NULL, token.string, true, false, region,
-                                  &pos, NULL, savable_comment, true);
+              if (token.type == token_type_string)
+                {
+                  buffer[bufpos] = '\0';
+                  token.string = xstrdup (buffer);
+                  extract_quotelike_pass3 (&token);
+                  if (token.type == token_type_string)
+                    {
+                      remember_a_message (mlp, NULL, token.string, true, false,
+                                          region, &pos, NULL, savable_comment,
+                                          true);
+                    }
+                }
               FALLTHROUGH;
             default:
               region = null_context_region ();
@@ -2125,6 +2147,7 @@ prefer_regexp_over_division (token_type_ty type)
         retval = false;
         break;
       case token_type_string:
+      case token_type_string_interpol:
         retval = false;
         break;
       case token_type_number:
@@ -2961,17 +2984,27 @@ x_perl_unlex (token_ty *tp)
 /* ========================= Extracting strings.  ========================== */
 
 /* Assuming TP is a string token, this function accumulates all subsequent
-   . string2 . string3 ... to the string.  (String concatenation.)  */
+   . string2 . string3 ... to the string.  (String concatenation.)
+   If at least one of the tokens gets transformed into a token of type
+   token_type_string_interpol, it returns NULL instead.  */
 
 static char *
-collect_message (message_list_ty *mlp, token_ty *tp, int error_level)
+collect_message (message_list_ty *mlp, token_ty *tp)
 {
   char *string;
   size_t len;
 
-  extract_quotelike_pass3 (tp, error_level);
-  string = xstrdup (tp->string);
-  len = strlen (tp->string) + 1;
+  extract_quotelike_pass3 (tp);
+  if (tp->type == token_type_string)
+    {
+      string = xstrdup (tp->string);
+      len = strlen (tp->string) + 1;
+    }
+  else
+    {
+      string = NULL;
+      len = 0;
+    }
 
   for (;;)
     {
@@ -3006,11 +3039,17 @@ collect_message (message_list_ty *mlp, token_ty *tp, int error_level)
               return string;
             }
 
-          extract_quotelike_pass3 (qstring, error_level);
-          len += strlen (qstring->string);
-          string = xrealloc (string, len);
-          strcat (string, qstring->string);
-          free_token (qstring);
+          extract_quotelike_pass3 (qstring);
+          if (qstring->type == token_type_string)
+            {
+              if (string != NULL)
+                {
+                  len += strlen (qstring->string);
+                  string = xrealloc (string, len);
+                  strcat (string, qstring->string);
+                }
+              free_token (qstring);
+            }
         }
     }
 }
@@ -3436,21 +3475,30 @@ extract_balanced (message_list_ty *mlp,
               break;
 
             case token_type_string:
+            case token_type_string_interpol:
               #if DEBUG_PERL
-              fprintf (stderr, "%s:%d: type string (%d): \"%s\"\n",
-                       logical_file_name, tp->line_number, nesting_level,
-                       tp->string);
+              if (tp->type == token_type_string)
+                fprintf (stderr, "%s:%d: type string (%d): \"%s\"\n",
+                         logical_file_name, tp->line_number, nesting_level,
+                         tp->string);
+              else
+                fprintf (stderr, "%s:%d: type string_interpol (%d)\n",
+                         logical_file_name, tp->line_number, nesting_level);
               #endif
 
               if (extract_all)
                 {
-                  char *string = collect_message (mlp, tp, IF_SEVERITY_WARNING);
-                  lex_pos_ty pos;
+                  char *string = collect_message (mlp, tp);
+                  if (string != NULL)
+                    {
+                      lex_pos_ty pos;
 
-                  pos.file_name = logical_file_name;
-                  pos.line_number = tp->line_number;
-                  remember_a_message (mlp, NULL, string, true, false, inner_region,
-                                      &pos, NULL, tp->comment, true);
+                      pos.file_name = logical_file_name;
+                      pos.line_number = tp->line_number;
+                      remember_a_message (mlp, NULL, string, true, false,
+                                          inner_region, &pos, NULL, tp->comment,
+                                          true);
+                    }
                 }
               else if (!skip_until_comma)
                 {
@@ -3473,14 +3521,20 @@ extract_balanced (message_list_ty *mlp,
 
                   if (must_collect)
                     {
-                      char *string = collect_message (mlp, tp, IF_SEVERITY_FATAL_ERROR);
-                      mixed_string_ty *ms =
-                        mixed_string_alloc_utf8 (string, lc_string,
-                                                 logical_file_name, tp->line_number);
-                      free (string);
-                      arglist_parser_remember (argparser, arg, ms, inner_region,
-                                               logical_file_name, tp->line_number,
-                                               tp->comment, true);
+                      char *string = collect_message (mlp, tp);
+                      if (string != NULL)
+                        {
+                          mixed_string_ty *ms =
+                            mixed_string_alloc_utf8 (string, lc_string,
+                                                     logical_file_name,
+                                                     tp->line_number);
+                          free (string);
+                          arglist_parser_remember (argparser, arg, ms,
+                                                   inner_region,
+                                                   logical_file_name,
+                                                   tp->line_number,
+                                                   tp->comment, true);
+                        }
                     }
                 }