Support explicit string concatenation with '+' in Python.

author Daiki Ueno <ueno@gnu.org>

Tue, 16 Apr 2013 04:03:07 +0000 (13:03 +0900)

committer Daiki Ueno <ueno@gnu.org>

Mon, 22 Apr 2013 03:07:39 +0000 (12:07 +0900)
author Daiki Ueno <ueno@gnu.org>
Tue, 16 Apr 2013 04:03:07 +0000 (13:03 +0900)
committer Daiki Ueno <ueno@gnu.org>
Mon, 22 Apr 2013 03:07:39 +0000 (12:07 +0900)
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog

index f1aa644c73fbe92db03f8a4e31eb914a304b0e6d..4b92aeb871236c122afb371e06402e5f74d146cf 100644 (file)
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,13 @@
+2013-04-22  Daiki Ueno  <ueno@gnu.org>
+
+       Support explicit string concatenation in Python.
+       * x-python.c (enum token_type_ty): New enumeration item
+       token_type_plus.
+       (free_token): New function.
+       (phase5_get): Recognize token_type_plus.
+       (x_python_lex): Handle string concatenation with '+'.
+       (extract_balanced): Handle token_type_plus.
+
  2013-04-18  Daiki Ueno  <ueno@gnu.org>
  
         * xgettext.c (usage): Wrap long lines in --help output.
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c

index aa6a7d64c3b5074ea6ec862112413480b15e18af..cdca2551c97122a9117fc183495ee46eec4e3d3d 100644 (file)
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -1,5 +1,5 @@
  /* xgettext Python backend.
-   Copyright (C) 2002-2003, 2005-2011 Free Software Foundation, Inc.
+   Copyright (C) 2002-2003, 2005-2013 Free Software Foundation, Inc.
  
     This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
  
@@ -994,6 +994,7 @@ enum token_type_ty
    token_type_rbracket,          /* ] */
    token_type_string,            /* "abc", 'abc', """abc""", '''abc''' */
    token_type_symbol,            /* symbol, number */
+  token_type_plus,              /* + */
    token_type_other              /* misc. operator */
  };
  typedef enum token_type_ty token_type_ty;
@@ -1007,6 +1008,16 @@ struct token_ty
    int line_number;
  };
  
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (token_ty *tp)
+{
+  if (tp->type == token_type_string || tp->type == token_type_symbol)
+    free (tp->string);
+  if (tp->type == token_type_string)
+    drop_reference (tp->comment);
+}
+
  
  /* There are two different input syntaxes for strings, "abc" and r"abc",
     and two different input syntaxes for Unicode strings, u"abc" and ur"abc".
@@ -1594,6 +1605,10 @@ phase5_get (token_ty *tp)
            tp->type = (c == ']' ? token_type_rbracket : token_type_other);
            return;
  
+        case '+':
+          tp->type = token_type_plus;
+          return;
+
          default:
            /* We could carefully recognize each of the 2 and 3 character
               operators, but it is not necessary, as we only need to recognize
@@ -1625,23 +1640,55 @@ static void
  x_python_lex (token_ty *tp)
  {
    phase5_get (tp);
-  if (tp->type != token_type_string)
-    return;
-  for (;;)
+  if (tp->type == token_type_string)
      {
-      token_ty tmp;
-      size_t len;
+      char *sum = tp->string;
+      size_t sum_len = strlen (sum);
  
-      phase5_get (&tmp);
-      if (tmp.type != token_type_string)
+      for (;;)
          {
-          phase5_unget (&tmp);
-          return;
+          token_ty token2, *tp2 = NULL;
+
+          phase5_get (&token2);
+          switch (token2.type)
+            {
+            case token_type_plus:
+              {
+                token_ty token3;
+
+                phase5_get (&token3);
+                if (token3.type == token_type_string)
+                  {
+                    free_token (&token2);
+                    tp2 = &token3;
+                  }
+                else
+                  phase5_unget (&token3);
+              }
+              break;
+            case token_type_string:
+              tp2 = &token2;
+              break;
+            default:
+              break;
+            }
+
+          if (tp2)
+            {
+              char *addend = tp2->string;
+              size_t addend_len = strlen (addend);
+
+              sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
+              memcpy (sum + sum_len, addend, addend_len + 1);
+              sum_len += addend_len;
+
+              free_token (tp2);
+              continue;
+            }
+          phase5_unget (&token2);
+          break;
          }
-      len = strlen (tp->string);
-      tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
-      strcpy (tp->string + len, tmp.string);
-      free (tmp.string);
+      tp->string = sum;
      }
  }
  
@@ -1817,6 +1864,7 @@ extract_balanced (message_list_ty *mlp,
            xgettext_current_source_encoding = xgettext_current_file_source_encoding;
            return true;
  
+        case token_type_plus:
          case token_type_other:
            next_context_iter = null_context_list_iterator;
            state = 0;
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog

index 72f46b1aeef0ba874b90e69f4a4c04def6b32c72..c4e9a278209f6e9e744e9aaf5d40a2885cefb1c0 100644 (file)
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,8 @@
+2013-04-22  Daiki Ueno  <ueno@gnu.org>
+
+       Support explicit string concatenation in Python.
+       * xgettext-python-1: Add test for explicit string concatenation.
+
  2013-04-17  Andreas Stricker  <astricker@futurelab.ch>
  
         Support for JavaScript.
diff --git a/gettext-tools/tests/xgettext-python-1 b/gettext-tools/tests/xgettext-python-1

index 20ea53e48a913ff940e615456b53c8668b45bafe..a649b3d244f5dcb3a05166a30c8048af2bcbd5d2 100755 (executable)
--- a/gettext-tools/tests/xgettext-python-1
+++ b/gettext-tools/tests/xgettext-python-1
@@ -25,6 +25,10 @@ _(ur"abc\
  
  # This will not be extracted.
  _(CATEGORIES["default"]["name"]);
+
+# string concatenation
+_("abc" "def" + "ghi"
+"jkl")
  EOF
  
  tmpfiles="$tmpfiles xg-py-1.err xg-py-1.tmp xg-py-1.pot"
@@ -80,6 +84,10 @@ msgid ""
  "\\\\def\\'ghi\\\"jkl\\a\\b\\f\\n\\r\\t\\v x\\040x\\x7eył\\U00010123\\N{LATIN "
  "SMALL LETTER Z}"
  msgstr ""
+
+#. string concatenation
+msgid "abcdefghijkl"
+msgstr ""
  EOF
  
  : ${DIFF=diff}
author	Daiki Ueno <ueno@gnu.org>
	Tue, 16 Apr 2013 04:03:07 +0000 (13:03 +0900)
committer	Daiki Ueno <ueno@gnu.org>
	Mon, 22 Apr 2013 03:07:39 +0000 (12:07 +0900)
gettext-tools/src/ChangeLog		patch \| blob \| blame \| history
gettext-tools/src/x-python.c		patch \| blob \| blame \| history
gettext-tools/tests/ChangeLog		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-python-1		patch \| blob \| blame \| history