@c This file is part of the GNU gettext manual.
-@c Copyright (C) 1995-2020 Free Software Foundation, Inc.
+@c Copyright (C) 1995-2024 Free Software Foundation, Inc.
@c See the file gettext.texi for copying conditions.
@node Perl
* Default Keywords:: Which Keywords Will xgettext Look For?
* Special Keywords:: How to Extract Hash Keys
* Quote-like Expressions:: What are Strings And Quote-like Expressions?
-* Interpolation I:: Invalid String Interpolation
+* Interpolation I:: Unsupported String Interpolation
* Interpolation II:: Valid String Interpolation
* Parentheses:: When To Use Parentheses
* Long Lines:: How To Grok with Long Lines
@end itemize
@node Interpolation I
-@subsubsection Invalid Uses Of String Interpolation
-@cindex Perl invalid string interpolation
+@subsubsection Unsupported Uses Of String Interpolation
+@cindex Perl unsupported string interpolation
Perl is capable of interpolating variables into strings. This offers
some nice features in localized programs but can also lead to
up at runtime (except if, by accident, the interpolated string is found
in the message catalog).
-The @code{xgettext} program will therefore terminate parsing with a fatal
-error if it encounters a variable inside of an extracted string. In
-general, this will happen for all kinds of string interpolations that
+The @code{xgettext} program will therefore produce a warning
+if it encounters a variable inside of a string to be extracted,
+and not extract that string.
+In general, this will happen for all kinds of string interpolations that
cannot be safely performed at compile time. If you absolutely know
what you are doing, you can always circumvent this behavior:
Please see the manual page @samp{man perlop} for details of strings and
quote-like expressions that are subject to interpolation and those
-that are not. Safe interpolations (that will not lead to a fatal
-error) are:
+that are not. Safe interpolations (that will not lead to a warning)
+are:
@itemize @bullet
You can interpolate hash lookups in all strings or quote-like
expressions that are subject to interpolation (see the manual page
-@samp{man perlop} for details). Double interpolation is invalid, however:
+@samp{man perlop} for details). Double interpolation is unsupported, however:
@example
# TRANSLATORS: Replace "the earth" with the name of your planet.
@end example
The @code{qq}-quoted string is recognized as an argument to @code{xgettext} in
-the first place, and checked for invalid variable interpolation. The
+the first place, and checked for unsupported variable interpolation. The
dollar sign of hash-dereferencing will therefore terminate the parser
-with an ``invalid interpolation'' error.
+with an ``unsupported interpolation'' warning.
It is valid to interpolate hash lookups in regular expressions:
Also, the syntax after the 'sub' keyword is specified in perlsub.pod.
Try the command "man perlsub" or "perldoc perlsub".
Perl 5.10 has new operators '//' and '//=', see
- <https://perldoc.perl.org/perldelta.html#Defined-or-operator>. */
+ <https://perldoc.perl.org/perldelta.html#Defined-or-operator>.
+
+ The actual Perl lexer and parser are in
+ perl-5.40.0/toke.c
+ perl-5.40.0/perly.y
+ but, for your sanity, you better don't look at it :)
+ */
#define DEBUG_PERL 0
#define DEBUG_NESTING_DEPTH 0
token_type_lbracket, /* [ */
token_type_rbracket, /* ] */
token_type_string, /* quote-like */
+ token_type_string_interpol, /* quote-like with embedded expressions */
token_type_number, /* starting with a digit or dot */
token_type_named_op, /* if, unless, while, ... */
token_type_variable, /* $... */
return "token_type_rbracket";
case token_type_string:
return "token_type_string";
+ case token_type_string_interpol:
+ return "token_type_string_interpol";
case token_type_number:
return "token type number";
case token_type_named_op:
/* Perform pass 3 of quotelike extraction (interpolation).
*tp is a token of type token_type_string.
- This function replaces tp->string.
+ This function may either replace tp->string, or change *tp's type to
+ token_type_string_interpol.
This function does not access tp->comment. */
/* FIXME: Currently may writes null-bytes into the string. */
static void
-extract_quotelike_pass3 (token_ty *tp, int error_level)
+extract_quotelike_pass3 (token_ty *tp)
{
static char *buffer;
static int bufmax = 0;
const char *end = strchr (crs, '}');
if (end == NULL)
{
- if_error (error_level,
+ if_error (IF_SEVERITY_WARNING,
real_file_name, line_number, (size_t)(-1), false,
_("missing right brace on \\x{HEXNUMBER}"));
++crs;
}
else if ((unsigned char) *crs >= 0x80)
{
- if_error (error_level,
+ if_error (IF_SEVERITY_WARNING,
real_file_name, line_number, (size_t)(-1), false,
- _("invalid interpolation (\"\\l\") of 8bit character \"%c\""),
+ _("unsupported interpolation (\"\\l\") of 8bit character \"%c\""),
*crs);
}
else
}
else if ((unsigned char) *crs >= 0x80)
{
- if_error (error_level,
+ if_error (IF_SEVERITY_WARNING,
real_file_name, line_number, (size_t)(-1), false,
- _("invalid interpolation (\"\\u\") of 8bit character \"%c\""),
+ _("unsupported interpolation (\"\\u\") of 8bit character \"%c\""),
*crs);
}
else
if (!backslashed && !extract_all && (*crs == '$' || *crs == '@'))
{
- if_error (error_level,
+ if_error (IF_SEVERITY_WARNING,
real_file_name, line_number, (size_t)(-1), false,
- _("invalid variable interpolation at \"%c\""), *crs);
+ _("unsupported variable interpolation at \"%c\""), *crs);
+ tp->type = token_type_string_interpol;
++crs;
}
else if (lowercase)
buffer[bufpos++] = *crs - 'A' + 'a';
else if ((unsigned char) *crs >= 0x80)
{
- if_error (error_level,
+ if_error (IF_SEVERITY_WARNING,
real_file_name, line_number, (size_t)(-1), false,
- _("invalid interpolation (\"\\L\") of 8bit character \"%c\""),
+ _("unsupported interpolation (\"\\L\") of 8bit character \"%c\""),
*crs);
buffer[bufpos++] = *crs;
}
buffer[bufpos++] = *crs - 'a' + 'A';
else if ((unsigned char) *crs >= 0x80)
{
- if_error (error_level,
+ if_error (IF_SEVERITY_WARNING,
real_file_name, line_number, (size_t)(-1), false,
- _("invalid interpolation (\"\\U\") of 8bit character \"%c\""),
+ _("unsupported interpolation (\"\\U\") of 8bit character \"%c\""),
*crs);
buffer[bufpos++] = *crs;
}
/* Replace tp->string. */
free (tp->string);
- tp->string = xstrdup (buffer);
+ if (tp->type == token_type_string)
+ tp->string = xstrdup (buffer);
}
/* Parse a variable. This is done in several steps:
/* The resulting string has to be interpolated twice. */
buffer[bufpos] = '\0';
token.string = xstrdup (buffer);
- extract_quotelike_pass3 (&token, IF_SEVERITY_FATAL_ERROR);
- /* The string can only shrink with interpolation (because
- we ignore \Q). */
- if (!(strlen (token.string) <= bufpos))
- abort ();
- strcpy (buffer, token.string);
- free (token.string);
+ extract_quotelike_pass3 (&token);
+ if (token.type == token_type_string)
+ {
+ /* The string can only shrink with interpolation (because
+ we ignore \Q). */
+ if (!(strlen (token.string) <= bufpos))
+ abort ();
+ strcpy (buffer, token.string);
+ free (token.string);
+ }
state = wait_rbrace;
break;
case '\\':
case_whitespace:
break;
case '}':
- buffer[bufpos] = '\0';
- token.string = xstrdup (buffer);
- extract_quotelike_pass3 (&token, IF_SEVERITY_FATAL_ERROR);
- remember_a_message (mlp, NULL, token.string, true, false, region,
- &pos, NULL, savable_comment, true);
+ if (token.type == token_type_string)
+ {
+ buffer[bufpos] = '\0';
+ token.string = xstrdup (buffer);
+ extract_quotelike_pass3 (&token);
+ if (token.type == token_type_string)
+ {
+ remember_a_message (mlp, NULL, token.string, true, false,
+ region, &pos, NULL, savable_comment,
+ true);
+ }
+ }
FALLTHROUGH;
default:
region = null_context_region ();
retval = false;
break;
case token_type_string:
+ case token_type_string_interpol:
retval = false;
break;
case token_type_number:
/* ========================= Extracting strings. ========================== */
/* Assuming TP is a string token, this function accumulates all subsequent
- . string2 . string3 ... to the string. (String concatenation.) */
+ . string2 . string3 ... to the string. (String concatenation.)
+ If at least one of the tokens gets transformed into a token of type
+ token_type_string_interpol, it returns NULL instead. */
static char *
-collect_message (message_list_ty *mlp, token_ty *tp, int error_level)
+collect_message (message_list_ty *mlp, token_ty *tp)
{
char *string;
size_t len;
- extract_quotelike_pass3 (tp, error_level);
- string = xstrdup (tp->string);
- len = strlen (tp->string) + 1;
+ extract_quotelike_pass3 (tp);
+ if (tp->type == token_type_string)
+ {
+ string = xstrdup (tp->string);
+ len = strlen (tp->string) + 1;
+ }
+ else
+ {
+ string = NULL;
+ len = 0;
+ }
for (;;)
{
return string;
}
- extract_quotelike_pass3 (qstring, error_level);
- len += strlen (qstring->string);
- string = xrealloc (string, len);
- strcat (string, qstring->string);
- free_token (qstring);
+ extract_quotelike_pass3 (qstring);
+ if (qstring->type == token_type_string)
+ {
+ if (string != NULL)
+ {
+ len += strlen (qstring->string);
+ string = xrealloc (string, len);
+ strcat (string, qstring->string);
+ }
+ free_token (qstring);
+ }
}
}
}
break;
case token_type_string:
+ case token_type_string_interpol:
#if DEBUG_PERL
- fprintf (stderr, "%s:%d: type string (%d): \"%s\"\n",
- logical_file_name, tp->line_number, nesting_level,
- tp->string);
+ if (tp->type == token_type_string)
+ fprintf (stderr, "%s:%d: type string (%d): \"%s\"\n",
+ logical_file_name, tp->line_number, nesting_level,
+ tp->string);
+ else
+ fprintf (stderr, "%s:%d: type string_interpol (%d)\n",
+ logical_file_name, tp->line_number, nesting_level);
#endif
if (extract_all)
{
- char *string = collect_message (mlp, tp, IF_SEVERITY_WARNING);
- lex_pos_ty pos;
+ char *string = collect_message (mlp, tp);
+ if (string != NULL)
+ {
+ lex_pos_ty pos;
- pos.file_name = logical_file_name;
- pos.line_number = tp->line_number;
- remember_a_message (mlp, NULL, string, true, false, inner_region,
- &pos, NULL, tp->comment, true);
+ pos.file_name = logical_file_name;
+ pos.line_number = tp->line_number;
+ remember_a_message (mlp, NULL, string, true, false,
+ inner_region, &pos, NULL, tp->comment,
+ true);
+ }
}
else if (!skip_until_comma)
{
if (must_collect)
{
- char *string = collect_message (mlp, tp, IF_SEVERITY_FATAL_ERROR);
- mixed_string_ty *ms =
- mixed_string_alloc_utf8 (string, lc_string,
- logical_file_name, tp->line_number);
- free (string);
- arglist_parser_remember (argparser, arg, ms, inner_region,
- logical_file_name, tp->line_number,
- tp->comment, true);
+ char *string = collect_message (mlp, tp);
+ if (string != NULL)
+ {
+ mixed_string_ty *ms =
+ mixed_string_alloc_utf8 (string, lc_string,
+ logical_file_name,
+ tp->line_number);
+ free (string);
+ arglist_parser_remember (argparser, arg, ms,
+ inner_region,
+ logical_file_name,
+ tp->line_number,
+ tp->comment, true);
+ }
}
}