From d4661005c12f491eb66765ceb27be9b45aaf9aff Mon Sep 17 00:00:00 2001 From: Daiki Ueno Date: Tue, 16 Apr 2013 13:03:07 +0900 Subject: [PATCH] Support explicit string concatenation with '+' in Python. --- gettext-tools/src/ChangeLog | 10 ++++ gettext-tools/src/x-python.c | 76 ++++++++++++++++++++++----- gettext-tools/tests/ChangeLog | 5 ++ gettext-tools/tests/xgettext-python-1 | 8 +++ 4 files changed, 85 insertions(+), 14 deletions(-) diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index f1aa644c7..4b92aeb87 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,13 @@ +2013-04-22 Daiki Ueno + + Support explicit string concatenation in Python. + * x-python.c (enum token_type_ty): New enumeration item + token_type_plus. + (free_token): New function. + (phase5_get): Recognize token_type_plus. + (x_python_lex): Handle string concatenation with '+'. + (extract_balanced): Handle token_type_plus. + 2013-04-18 Daiki Ueno * xgettext.c (usage): Wrap long lines in --help output. diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c index aa6a7d64c..cdca2551c 100644 --- a/gettext-tools/src/x-python.c +++ b/gettext-tools/src/x-python.c @@ -1,5 +1,5 @@ /* xgettext Python backend. - Copyright (C) 2002-2003, 2005-2011 Free Software Foundation, Inc. + Copyright (C) 2002-2003, 2005-2013 Free Software Foundation, Inc. This file was written by Bruno Haible , 2002. @@ -994,6 +994,7 @@ enum token_type_ty token_type_rbracket, /* ] */ token_type_string, /* "abc", 'abc', """abc""", '''abc''' */ token_type_symbol, /* symbol, number */ + token_type_plus, /* + */ token_type_other /* misc. operator */ }; typedef enum token_type_ty token_type_ty; @@ -1007,6 +1008,16 @@ struct token_ty int line_number; }; +/* Free the memory pointed to by a 'struct token_ty'. */ +static inline void +free_token (token_ty *tp) +{ + if (tp->type == token_type_string || tp->type == token_type_symbol) + free (tp->string); + if (tp->type == token_type_string) + drop_reference (tp->comment); +} + /* There are two different input syntaxes for strings, "abc" and r"abc", and two different input syntaxes for Unicode strings, u"abc" and ur"abc". @@ -1594,6 +1605,10 @@ phase5_get (token_ty *tp) tp->type = (c == ']' ? token_type_rbracket : token_type_other); return; + case '+': + tp->type = token_type_plus; + return; + default: /* We could carefully recognize each of the 2 and 3 character operators, but it is not necessary, as we only need to recognize @@ -1625,23 +1640,55 @@ static void x_python_lex (token_ty *tp) { phase5_get (tp); - if (tp->type != token_type_string) - return; - for (;;) + if (tp->type == token_type_string) { - token_ty tmp; - size_t len; + char *sum = tp->string; + size_t sum_len = strlen (sum); - phase5_get (&tmp); - if (tmp.type != token_type_string) + for (;;) { - phase5_unget (&tmp); - return; + token_ty token2, *tp2 = NULL; + + phase5_get (&token2); + switch (token2.type) + { + case token_type_plus: + { + token_ty token3; + + phase5_get (&token3); + if (token3.type == token_type_string) + { + free_token (&token2); + tp2 = &token3; + } + else + phase5_unget (&token3); + } + break; + case token_type_string: + tp2 = &token2; + break; + default: + break; + } + + if (tp2) + { + char *addend = tp2->string; + size_t addend_len = strlen (addend); + + sum = (char *) xrealloc (sum, sum_len + addend_len + 1); + memcpy (sum + sum_len, addend, addend_len + 1); + sum_len += addend_len; + + free_token (tp2); + continue; + } + phase5_unget (&token2); + break; } - len = strlen (tp->string); - tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1); - strcpy (tp->string + len, tmp.string); - free (tmp.string); + tp->string = sum; } } @@ -1817,6 +1864,7 @@ extract_balanced (message_list_ty *mlp, xgettext_current_source_encoding = xgettext_current_file_source_encoding; return true; + case token_type_plus: case token_type_other: next_context_iter = null_context_list_iterator; state = 0; diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index 72f46b1ae..c4e9a2782 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,8 @@ +2013-04-22 Daiki Ueno + + Support explicit string concatenation in Python. + * xgettext-python-1: Add test for explicit string concatenation. + 2013-04-17 Andreas Stricker Support for JavaScript. diff --git a/gettext-tools/tests/xgettext-python-1 b/gettext-tools/tests/xgettext-python-1 index 20ea53e48..a649b3d24 100755 --- a/gettext-tools/tests/xgettext-python-1 +++ b/gettext-tools/tests/xgettext-python-1 @@ -25,6 +25,10 @@ _(ur"abc\ # This will not be extracted. _(CATEGORIES["default"]["name"]); + +# string concatenation +_("abc" "def" + "ghi" +"jkl") EOF tmpfiles="$tmpfiles xg-py-1.err xg-py-1.tmp xg-py-1.pot" @@ -80,6 +84,10 @@ msgid "" "\\\\def\\'ghi\\\"jkl\\a\\b\\f\\n\\r\\t\\v x\\040x\\x7eył\\U00010123\\N{LATIN " "SMALL LETTER Z}" msgstr "" + +#. string concatenation +msgid "abcdefghijkl" +msgstr "" EOF : ${DIFF=diff} -- 2.47.2