From: Bruno Haible Date: Mon, 2 Oct 2006 12:16:36 +0000 (+0000) Subject: Fix Perl parsing bug introduced between 0.14.6 and 0.15. X-Git-Tag: 0.16.x-branchpoint~139 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=64c41e41ec814c99030a7eec9a410bde7be87a69;p=thirdparty%2Fgettext.git Fix Perl parsing bug introduced between 0.14.6 and 0.15. --- diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 4ee8be6ce..58ab778d8 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,5 +1,15 @@ 2006-10-01 Bruno Haible + Fix Perl parsing bug introduced between 0.14.6 and 0.15. + * x-perl.c (extract_balanced): Add eat_delim and comma_delim arguments. + Use recursion to handle the case of a keyword not followed by an + opening parenthesis. When a keyword is seen, set next_argparser + instead of modifying the argparser variable of the current recursion. + When seeing an opening parenthesis, distinguish the case of function + arguments and the case of a subexpression. Don't call collect_message + for unused argument positions. + Reported by Nicolas François . + * x-perl.c (extract_balanced): Remove unused variable. 2006-09-06 Bruno Haible diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c index bc2055383..9ea03e919 100644 --- a/gettext-tools/src/x-perl.c +++ b/gettext-tools/src/x-perl.c @@ -760,7 +760,8 @@ static void interpolate_keywords (message_list_ty *mlp, const char *string, static token_ty *x_perl_lex (message_list_ty *mlp); static void x_perl_unlex (token_ty *tp); static bool extract_balanced (message_list_ty *mlp, int state, - token_type_ty delim, + token_type_ty delim, bool eat_delim, + bool comma_delim, flag_context_ty outer_context, flag_context_list_iterator_ty context_iter, int arg, struct arglist_parser *argparser); @@ -1369,7 +1370,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first) real_file_name, line_number); #endif - if (extract_balanced (mlp, 0, token_type_rbrace, + if (extract_balanced (mlp, 0, token_type_rbrace, true, false, null_context, null_context_list_iterator, 1, arglist_parser_alloc (mlp, NULL))) return; @@ -1560,7 +1561,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first) else { x_perl_unlex (t1); - if (extract_balanced (mlp, 1, token_type_rbrace, + if (extract_balanced (mlp, 1, token_type_rbrace, true, false, null_context, context_iter, 1, arglist_parser_alloc (mlp, &shapes))) return; @@ -1591,7 +1592,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first) fprintf (stderr, "%s:%d: extracting balanced '{' after varname\n", real_file_name, line_number); #endif - extract_balanced (mlp, 0, token_type_rbrace, + extract_balanced (mlp, 0, token_type_rbrace, true, false, null_context, null_context_list_iterator, 1, arglist_parser_alloc (mlp, NULL)); break; @@ -1601,7 +1602,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first) fprintf (stderr, "%s:%d: extracting balanced '[' after varname\n", real_file_name, line_number); #endif - extract_balanced (mlp, 0, token_type_rbracket, + extract_balanced (mlp, 0, token_type_rbracket, true, false, null_context, null_context_list_iterator, 1, arglist_parser_alloc (mlp, NULL)); break; @@ -2846,7 +2847,8 @@ collect_message (message_list_ty *mlp, token_ty *tp, int error_level) Extracted messages are added to MLP. DELIM can be either token_type_rbrace, token_type_rbracket, - token_type_rparen. + token_type_rparen. Additionally, if COMMA_DELIM is true, parsing + stops at the next comma outside parentheses. ARG is the current argument list position, starts with 1. ARGPARSER is the corresponding argument list parser. @@ -2857,23 +2859,17 @@ collect_message (message_list_ty *mlp, token_ty *tp, int error_level) 0 - initial state 1 - keyword has been seen - 2 - extractable string has been seen - 3 - a dot operator after an extractable string has been seen - States 2 and 3 are "fragile", the parser will remain in state 2 - as long as only opening parentheses are seen, a transition to - state 3 is done on appearance of a dot operator, all other tokens + States 1 is "fragile". The parser will remain in state 1 + as long as only opening parentheses are seen. All other tokens will cause the parser to fall back to state 1 or 0, eventually with an error message about invalid intermixing of constant and - non-constant strings. - - Likewise, state 3 is fragile. The parser will remain in state 3 - as long as only closing parentheses are seen, a transition to state - 2 is done on appearance of another (literal!) string, all other - tokens will cause a warning. */ + non-constant strings. */ static bool -extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, +extract_balanced (message_list_ty *mlp, + int state, + token_type_ty delim, bool eat_delim, bool comma_delim, flag_context_ty outer_context, flag_context_list_iterator_ty context_iter, int arg, struct arglist_parser *argparser) @@ -2881,6 +2877,13 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, /* Whether to implicitly assume the next tokens are arguments even without a '('. */ bool next_is_argument = false; + /* Parameters of the keyword just seen. Defined only when next_is_argument + is true. */ + const struct callshapes *next_shapes = NULL; + struct arglist_parser *next_argparser = NULL; + + /* Whether to not consider strings until the next comma. */ + bool skip_until_comma = false; /* Context iterator that will be used if the next token is a '('. */ flag_context_list_iterator_ty next_context_iter = @@ -2914,23 +2917,84 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, xgettext_current_source_encoding = po_charset_utf8; arglist_parser_done (argparser, arg); xgettext_current_source_encoding = xgettext_global_source_encoding; + if (next_argparser != NULL) + free (next_argparser); #if DEBUG_PERL fprintf (stderr, "%s:%d: extract_balanced finished (%d)\n", logical_file_name, tp->line_number, --nesting_level); #endif - free_token (tp); + if (eat_delim) + free_token (tp); + else + /* Preserve the delimiter for the caller. */ + x_perl_unlex (tp); return false; } + if (comma_delim && tp->type == token_type_comma) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_global_source_encoding; + if (next_argparser != NULL) + free (next_argparser); +#if DEBUG_PERL + fprintf (stderr, "%s:%d: extract_balanced finished at comma (%d)\n", + logical_file_name, tp->line_number, --nesting_level); +#endif + x_perl_unlex (tp); + return false; + } + if (next_is_argument && tp->type != token_type_lparen) { /* An argument list starts, even though there is no '('. */ - context_iter = next_context_iter; - outer_context = inner_context; - inner_context = - inherited_context (outer_context, - flag_context_list_iterator_advance ( - &context_iter)); + bool next_comma_delim; + + x_perl_unlex (tp); + + if (next_shapes != NULL) + /* We know something about the function being called. Assume + that it consumes only one argument if no argument number or + total > 1 is specified. */ + { + size_t i; + + next_comma_delim = true; + for (i = 0; i < next_shapes->nshapes; i++) + { + const struct callshape *shape = &next_shapes->shapes[i]; + + if (shape->argnum1 > 1 + || shape->argnum2 > 1 + || shape->argnumc > 1 + || shape->argtotal > 1) + next_comma_delim = false; + } + } + else + /* We know nothing about the function being called. It could be + a function prototyped to take only one argument, or on the other + hand it could be prototyped to take more than one argument or an + arbitrary argument list or it could be unprototyped. Due to + the way the parser works, assuming the first case gives the + best results. */ + next_comma_delim = true; + + if (extract_balanced (mlp, state, delim, false, next_comma_delim, + inner_context, next_context_iter, + 1, next_argparser)) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_global_source_encoding; + return true; + } + + next_is_argument = false; + next_argparser = NULL; + next_context_iter = null_context_list_iterator; + continue; } switch (tp->type) @@ -2951,15 +3015,16 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, const struct callshapes *shapes = (const struct callshapes *) keyword_value; - xgettext_current_source_encoding = po_charset_utf8; - arglist_parser_done (argparser, arg); - xgettext_current_source_encoding = xgettext_global_source_encoding; - argparser = arglist_parser_alloc (mlp, shapes); - arg = 1; - last_token = token_type_keyword_symbol; - - state = 2; + next_shapes = shapes; + next_argparser = arglist_parser_alloc (mlp, shapes); + state = 1; + } + else + { + next_shapes = NULL; + next_argparser = arglist_parser_alloc (mlp, NULL); + state = 0; } } next_is_argument = true; @@ -2977,41 +3042,74 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, #endif prefer_division_over_regexp = true; next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; break; case token_type_lparen: #if DEBUG_PERL - fprintf (stderr, "%s:%d: type left parentheses (%d)\n", + fprintf (stderr, "%s:%d: type left parenthesis (%d)\n", logical_file_name, tp->line_number, nesting_level); #endif - if (extract_balanced (mlp, state, token_type_rparen, - inner_context, next_context_iter, - arg, arglist_parser_clone (argparser))) + if (next_is_argument) { - xgettext_current_source_encoding = po_charset_utf8; - arglist_parser_done (argparser, arg); - xgettext_current_source_encoding = xgettext_global_source_encoding; - free_token (tp); - return true; + /* Parse the argument list of a function call. */ + if (extract_balanced (mlp, state, token_type_rparen, true, false, + inner_context, next_context_iter, + 1, next_argparser)) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_global_source_encoding; + return true; + } + next_is_argument = false; + next_argparser = NULL; } - if (my_last_token == token_type_keyword_symbol) + else { - xgettext_current_source_encoding = po_charset_utf8; - arglist_parser_done (argparser, arg); - xgettext_current_source_encoding = xgettext_global_source_encoding; - argparser = arglist_parser_alloc (mlp, NULL); + /* Parse a parenthesized expression or comma expression. */ + if (extract_balanced (mlp, state, token_type_rparen, true, false, + inner_context, next_context_iter, + arg, arglist_parser_clone (argparser))) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_global_source_encoding; + if (next_argparser != NULL) + free (next_argparser); + free_token (tp); + return true; + } + /* FIXME: Is this still needed? */ + if (my_last_token == token_type_keyword_symbol) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_global_source_encoding; + argparser = arglist_parser_alloc (mlp, NULL); + } + next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; } - next_is_argument = false; + skip_until_comma = true; next_context_iter = null_context_list_iterator; break; case token_type_rparen: #if DEBUG_PERL - fprintf (stderr, "%s:%d: type right parentheses(%d)\n", + fprintf (stderr, "%s:%d: type right parenthesis (%d)\n", logical_file_name, tp->line_number, nesting_level); #endif next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; + skip_until_comma = true; next_context_iter = null_context_list_iterator; break; @@ -3040,6 +3138,10 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, flag_context_list_iterator_advance ( &context_iter)); next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; + skip_until_comma = false; next_context_iter = passthrough_context_list_iterator; break; @@ -3061,16 +3163,36 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, remember_a_message (mlp, NULL, string, inner_context, &pos, savable_comment); xgettext_current_source_encoding = xgettext_global_source_encoding; } - else if (state) + else if (/* state != 0 && */ !skip_until_comma) { - char *string = collect_message (mlp, tp, EXIT_FAILURE); + /* Need to collect the complete string, with error checking, + only if the argument ARG is used in ARGPARSER. */ + bool must_collect = false; + { + size_t nalternatives = argparser->nalternatives; + size_t i; - xgettext_current_source_encoding = po_charset_utf8; - arglist_parser_remember (argparser, arg, - string, inner_context, - logical_file_name, tp->line_number, - savable_comment); - xgettext_current_source_encoding = xgettext_global_source_encoding; + for (i = 0; i < nalternatives; i++) + { + struct partial_call *cp = &argparser->alternative[i]; + + if (arg == cp->argnumc + || arg == cp->argnum1 || arg == cp->argnum2) + must_collect = true; + } + } + + if (must_collect) + { + char *string = collect_message (mlp, tp, EXIT_FAILURE); + + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_remember (argparser, arg, + string, inner_context, + logical_file_name, tp->line_number, + savable_comment); + xgettext_current_source_encoding = xgettext_global_source_encoding; + } } if (arglist_parser_decidedp (argparser, arg)) @@ -3083,6 +3205,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, } next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; break; @@ -3094,6 +3219,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, xgettext_current_source_encoding = po_charset_utf8; arglist_parser_done (argparser, arg); xgettext_current_source_encoding = xgettext_global_source_encoding; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; free_token (tp); return true; @@ -3102,17 +3230,22 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, fprintf (stderr, "%s:%d: type lbrace (%d)\n", logical_file_name, tp->line_number, nesting_level); #endif - if (extract_balanced (mlp, 0, token_type_rbrace, + if (extract_balanced (mlp, 0, token_type_rbrace, true, false, null_context, null_context_list_iterator, 1, arglist_parser_alloc (mlp, NULL))) { xgettext_current_source_encoding = po_charset_utf8; arglist_parser_done (argparser, arg); xgettext_current_source_encoding = xgettext_global_source_encoding; + if (next_argparser != NULL) + free (next_argparser); free_token (tp); return true; } next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; break; @@ -3122,6 +3255,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, logical_file_name, tp->line_number, nesting_level); #endif next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; state = 0; break; @@ -3131,17 +3267,22 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, fprintf (stderr, "%s:%d: type lbracket (%d)\n", logical_file_name, tp->line_number, nesting_level); #endif - if (extract_balanced (mlp, 0, token_type_rbracket, + if (extract_balanced (mlp, 0, token_type_rbracket, true, false, null_context, null_context_list_iterator, 1, arglist_parser_alloc (mlp, NULL))) { xgettext_current_source_encoding = po_charset_utf8; arglist_parser_done (argparser, arg); xgettext_current_source_encoding = xgettext_global_source_encoding; + if (next_argparser != NULL) + free (next_argparser); free_token (tp); return true; } next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; break; @@ -3151,6 +3292,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, logical_file_name, tp->line_number, nesting_level); #endif next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; state = 0; break; @@ -3174,6 +3318,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, outer_context = null_context; context_iter = null_context_list_iterator; next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = passthrough_context_list_iterator; inner_context = inherited_context (outer_context, @@ -3187,6 +3334,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, logical_file_name, tp->line_number, nesting_level); #endif next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; break; @@ -3196,6 +3346,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, logical_file_name, tp->line_number, nesting_level); #endif next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; state = 0; break; @@ -3207,6 +3360,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, tp->string); #endif next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; state = 0; break; @@ -3217,6 +3373,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, logical_file_name, tp->line_number, nesting_level); #endif next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; break; @@ -3226,6 +3385,9 @@ extract_balanced (message_list_ty *mlp, int state, token_type_ty delim, logical_file_name, tp->line_number, nesting_level); #endif next_is_argument = false; + if (next_argparser != NULL) + free (next_argparser); + next_argparser = NULL; next_context_iter = null_context_list_iterator; state = 0; break; @@ -3269,7 +3431,7 @@ extract_perl (FILE *f, const char *real_filename, const char *logical_filename, /* Eat tokens until eof is seen. When extract_balanced returns due to an unbalanced closing brace, just restart it. */ - while (!extract_balanced (mlp, 0, token_type_rbrace, + while (!extract_balanced (mlp, 0, token_type_rbrace, true, false, null_context, null_context_list_iterator, 1, arglist_parser_alloc (mlp, NULL))) ; diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index 4de0f61eb..142399fdc 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,8 @@ +2006-10-01 Bruno Haible + + * xgettext-perl-6: New file. + * Makefile.am (TESTS): Add it. + 2006-09-06 Bruno Haible * msgfmt-12: Update expected output. diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index a851d96b4..c74903db8 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -79,7 +79,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ xgettext-lisp-1 \ xgettext-objc-1 xgettext-objc-2 \ xgettext-perl-1 xgettext-perl-2 xgettext-perl-3 xgettext-perl-4 \ - xgettext-perl-5 \ + xgettext-perl-5 xgettext-perl-6 \ xgettext-php-1 xgettext-php-2 \ xgettext-po-1 \ xgettext-properties-1 \