Support for Vala.

author Daiki Ueno <ueno@gnu.org>

Tue, 21 May 2013 01:39:06 +0000 (10:39 +0900)

committer Daiki Ueno <ueno@gnu.org>

Tue, 4 Jun 2013 05:44:23 +0000 (14:44 +0900)
author Daiki Ueno <ueno@gnu.org>
Tue, 21 May 2013 01:39:06 +0000 (10:39 +0900)
committer Daiki Ueno <ueno@gnu.org>
Tue, 4 Jun 2013 05:44:23 +0000 (14:44 +0900)
diff --git a/ChangeLog b/ChangeLog

index 9cbd5ce84fc32614f99338144a52df064f74d0f1..9625a7520866b31db2f770ce0153684c21496235 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2013-06-04  Daiki Ueno  <ueno@gnu.org>
+
+       Support for Vala.
+       * NEWS: Mention Vala support.
+
  2013-05-29  Stefano Lattarini  <stefano.lattarini@gmail.com>  (tiny change)
  
         * configure.ac (AM_INIT_AUTOMAKE): Add 'parallel-tests' option.
diff --git a/NEWS b/NEWS

index de2b61d4394719965c9a1799b12b8a43e9cd8fcf..84352a2b90b009d7e42afc30c7781fca759e3453 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,10 @@ Version 0.18.3 - unreleased
      xgettext and msgfmt's format string checking now recognize Python
      format string in braced syntax (PEP 3101).
  
+  - Vala
+
+    xgettext now supports Vala.
+
  * msgattrib now has --previous option to keep previous msgid when
    making messages fuzzy, similar to msgmerge --previous.
  
diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog

index e5e75291697ae47d2ee8a48f2bb54f88609a19ec..c153dbbe9eacff78c76ee3ece8102b42be3de36d 100644 (file)
--- a/gettext-tools/doc/ChangeLog
+++ b/gettext-tools/doc/ChangeLog
@@ -1,3 +1,9 @@
+2013-06-04  Daiki Ueno  <ueno@gnu.org>
+
+       Support for Vala.
+       * xgettext.texi: Document Vala source language. Document
+       that it is applicable to --flag.
+
  2013-05-20  Pavel Kharitonov  <ineiev@gnu.org>  (tiny change)
  
         Add --previous option to msgattrib.
diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi

index ccee65794987fa0164f5d8d1327ec38afa2f23d7..83a6fa2893fa0247019b4c1e97b691ef1efba8a8 100644 (file)
--- a/gettext-tools/doc/xgettext.texi
+++ b/gettext-tools/doc/xgettext.texi
@@ -73,7 +73,8 @@ are @code{C}, @code{C++}, @code{ObjectiveC}, @code{PO}, @code{Shell},
  @code{Python}, @code{Lisp}, @code{EmacsLisp}, @code{librep}, @code{Scheme},
  @code{Smalltalk}, @code{Java}, @code{JavaProperties}, @code{C#}, @code{awk},
  @code{YCP}, @code{Tcl}, @code{Perl}, @code{PHP}, @code{GCC-source},
-@code{NXStringTable}, @code{RST}, @code{Glade}, @code{Lua}, @code{JavaScript}.
+@code{NXStringTable}, @code{RST}, @code{Glade}, @code{Lua}, @code{JavaScript},
+@code{Vala}.
  
  @item -C
  @itemx --c++
@@ -137,7 +138,7 @@ Extract all strings.
  
  This option has an effect with most languages, namely C, C++, ObjectiveC,
  Shell, Python, Lisp, EmacsLisp, librep, Java, C#, awk, Tcl, Perl, PHP,
-GCC-source, Glade, Lua, JavaScript.
+GCC-source, Glade, Lua, JavaScript, Vala.
  
  @item -k[@var{keywordspec}]
  @itemx --keyword[=@var{keywordspec}]
@@ -180,7 +181,7 @@ escaped.
  
  This option has an effect with most languages, namely C, C++, ObjectiveC,
  Shell, Python, Lisp, EmacsLisp, librep, Java, C#, awk, Tcl, Perl, PHP,
-GCC-source, Glade, Lua, JavaScript.
+GCC-source, Glade, Lua, JavaScript, Vala.
  
  The default keyword specifications, which are always looked for if not
  explicitly disabled, are language dependent.  They are:
@@ -255,6 +256,11 @@ For Lua: @code{_}, @code{gettext.gettext}, @code{gettext.dgettext:2},
  For JavaScript: @code{_}, @code{gettext}, @code{dgettext:2},
  @code{dcgettext:2}, @code{ngettext:1,2}, @code{dngettext:2,3},
  @code{pgettext:1c,2}, @code{dpgettext:2c,3}.
+
+@item
+For Vala: @code{_}, @code{Q_}, @code{N_}, @code{NC_}, @code{dgettext:2},
+@code{dcgettext:2}, @code{ngettext:1,2}, @code{dngettext:2,3},
+@code{dpgettext:2c,3}, @code{dpgettext2:2c,3}.
  @end itemize
  
  To disable the default keyword specifications, the option @samp{-k} or
@@ -307,7 +313,7 @@ lead to a crash at runtime.
  @*
  This option has an effect with most languages, namely C, C++, ObjectiveC,
  Shell, Python, Lisp, EmacsLisp, librep, Scheme, Java, C#, awk, YCP, Tcl, Perl, PHP,
-GCC-source, Lua, JavaScript.
+GCC-source, Lua, JavaScript, Vala.
  
  @item -T
  @itemx --trigraphs
diff --git a/gettext-tools/po/ChangeLog b/gettext-tools/po/ChangeLog

index d6017a68bec3dbd19191b3ccbe3015f30b35ae14..2e54773a03837e8fc94e42de5c3efbed4bd8777c 100644 (file)
--- a/gettext-tools/po/ChangeLog
+++ b/gettext-tools/po/ChangeLog
@@ -1,3 +1,8 @@
+2013-06-04  Daiki Ueno  <ueno@gnu.org>
+
+       Support for Vala.
+       * POTFILES.in: Add src/x-vala.c.
+
  2013-04-30  Daiki Ueno  <ueno@gnu.org>
  
         * POTFILES.in: Add src/format-javascript.c, src/format-lua.c,
diff --git a/gettext-tools/po/POTFILES.in b/gettext-tools/po/POTFILES.in

index 3f2a52e0b36dce29a370ff28fdfb4cfc614a4c29..c9d28874864488fb97dd6e95dbb13dbba4106cd2 100644 (file)
--- a/gettext-tools/po/POTFILES.in
+++ b/gettext-tools/po/POTFILES.in
@@ -118,6 +118,7 @@ src/x-scheme.c
  src/x-sh.c
  src/x-smalltalk.c
  src/x-tcl.c
+src/x-vala.c
  src/x-ycp.c
  src/xgettext.c
  src/user-email.sh.in
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog

index 0608e15e3fca26784604c1608db1ca1b0e949379..171ad8d058663f3e7d1fe00a38943cfd70fea68b 100644 (file)
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,19 @@
+2013-06-04  Daiki Ueno  <ueno@gnu.org>
+
+       Support for Vala.
+       * x-vala.h: New file.
+       * x-vala.c: New file.
+       * xgettext.c: Include x-vala.h.
+       (flag_table_vala): New variable.
+       (main): Invoke init_flag_table_vala, x_vala_extract_all,
+       x_vala_keyword.
+       (usage): Mention Vala source language.
+       (language_to_extractor): Add Vala rule.
+       (extension_to_language): Add Vala rule.
+       * Makefile.am (noinst_HEADERS): Add x-vala.h.
+       (xgettext_SOURCES): Add x-vala.c.
+       * FILES: Update.
+
  2013-05-21  Daiki Ueno  <ueno@gnu.org>
  
         * x-javascript.c (phase7_getuc): Treat non-legitimate character
diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES

index 94d7f6482a567cdf7ee93c17f1d4c427c1ae3b8f..5fa8c6e8fccc35de3ff38fe469e765fb1501f6ba 100644 (file)
--- a/gettext-tools/src/FILES
+++ b/gettext-tools/src/FILES
@@ -342,6 +342,9 @@ msgl-check.c
  | x-javascript.h
  | x-javascript.c
  |               String extractor for JavaScript.
+| x-vala.h
+| x-vala.c
+|               String extractor for Vala.
  | xgettext.c
  |               Main source for the 'xgettext' program.
  |
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am

index d5fe699b5b7b1b8e4bbef65cab70a6fd92f25211..905ddfd5af601e144c5e6d9d9614e16dc6980cf0 100644 (file)
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -52,7 +52,7 @@ po-time.h plural-table.h lang-table.h format.h filters.h \
  xgettext.h x-c.h x-po.h x-sh.h x-python.h x-lisp.h x-elisp.h x-librep.h \
  x-scheme.h x-smalltalk.h x-java.h x-properties.h x-csharp.h x-awk.h x-ycp.h \
  x-tcl.h x-perl.h x-php.h x-stringtable.h x-rst.h x-glade.h x-lua.h \
-x-javascript.h
+x-javascript.h x-vala.h
  
  EXTRA_DIST += FILES project-id ChangeLog.0
  
@@ -177,7 +177,7 @@ endif
  xgettext_SOURCES += \
    x-c.c x-po.c x-sh.c x-python.c x-lisp.c x-elisp.c x-librep.c x-scheme.c \
    x-smalltalk.c x-java.c x-csharp.c x-awk.c x-ycp.c x-tcl.c x-perl.c x-php.c \
-  x-rst.c x-glade.c x-lua.c x-javascript.c
+  x-rst.c x-glade.c x-lua.c x-javascript.c x-vala.c
  if !WOE32DLL
  msgattrib_SOURCES = msgattrib.c
  else
diff --git a/gettext-tools/src/x-vala.c b/gettext-tools/src/x-vala.c

new file mode 100644 (file)

index 0000000..2faa513
--- /dev/null
+++ b/gettext-tools/src/x-vala.c
@@ -0,0 +1,1315 @@
+/* xgettext Vala backend.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+   This file was written by Daiki Ueno <ueno@gnu.org>, 2013.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/* Specification.  */
+#include "x-vala.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "xgettext.h"
+#include "error.h"
+#include "error-progname.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+#include "hash.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
+/* The Vala syntax is defined in the Vala Reference Manual
+   http://www.vala-project.org/doc/vala/.
+   See also vala/valascanner.vala.  */
+
+/* ====================== Keyword set customization.  ====================== */
+
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_vala_extract_all ()
+{
+  extract_all = true;
+}
+
+
+static void
+add_keyword (const char *name, hash_table *keywords)
+{
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      struct callshape shape;
+      const char *colon;
+
+      if (keywords->table == NULL)
+        hash_init (keywords, 100);
+
+      split_keywordspec (name, &end, &shape);
+
+      /* The characters between name and end should form a valid C identifier.
+         A colon means an invalid parse in split_keywordspec().  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+        insert_keyword_callshape (keywords, name, end - name, &shape);
+    }
+}
+
+void
+x_vala_keyword (const char *name)
+{
+  add_keyword (name, &keywords);
+}
+
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      /* When adding new keywords here, also update the documentation in
+         xgettext.texi!  */
+      x_vala_keyword ("dgettext:2");
+      x_vala_keyword ("dcgettext:2");
+      x_vala_keyword ("ngettext:1,2");
+      x_vala_keyword ("dngettext:2,3");
+      x_vala_keyword ("dpgettext:2g");
+      x_vala_keyword ("dpgettext2:2c,3");
+      x_vala_keyword ("_");
+      x_vala_keyword ("Q_");
+      x_vala_keyword ("N_");
+      x_vala_keyword ("NC_:1c,2");
+
+      default_keywords = false;
+    }
+}
+
+void
+init_flag_table_vala ()
+{
+  xgettext_record_flag ("dgettext:2:pass-c-format");
+  xgettext_record_flag ("dcgettext:2:pass-c-format");
+  xgettext_record_flag ("ngettext:1:pass-c-format");
+  xgettext_record_flag ("ngettext:2:pass-c-format");
+  xgettext_record_flag ("dngettext:2:pass-c-format");
+  xgettext_record_flag ("dngettext:3:pass-c-format");
+  xgettext_record_flag ("dpgettext:2:pass-c-format");
+  xgettext_record_flag ("dpgettext2:3:pass-c-format");
+  xgettext_record_flag ("_:1:pass-c-format");
+  xgettext_record_flag ("Q_:1:pass-c-format");
+  xgettext_record_flag ("N_:1:pass-c-format");
+  xgettext_record_flag ("NC_:2:pass-c-format");
+
+  /* Vala leaves string formatting to Glib functions and thus the
+     format string is exactly same as C.  See also
+     vapi/glib-2.0.vapi.  */
+  xgettext_record_flag ("printf:1:c-format");
+  xgettext_record_flag ("vprintf:1:c-format");
+}
+
+
+/* ======================== Reading of characters.  ======================== */
+
+/* Real filename, used in error messages about the input file.  */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages.  */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream.  */
+static FILE *fp;
+
+
+/* 1. line_number handling.  */
+
+#define MAX_PHASE1_PUSHBACK 16
+static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
+static int phase1_pushback_length;
+
+
+static int
+phase1_getc ()
+{
+  int c;
+
+  if (phase1_pushback_length)
+    c = phase1_pushback[--phase1_pushback_length];
+  else
+    {
+      c = getc (fp);
+      if (c == EOF)
+        {
+          if (ferror (fp))
+            error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+                   real_file_name);
+          return EOF;
+        }
+    }
+
+  if (c == '\n')
+    ++line_number;
+  return c;
+}
+
+
+/* Supports 2 characters of pushback.  */
+static void
+phase1_ungetc (int c)
+{
+  if (c != EOF)
+    {
+      if (c == '\n')
+        --line_number;
+
+      if (phase1_pushback_length == SIZEOF (phase1_pushback))
+        abort ();
+      phase1_pushback[phase1_pushback_length++] = c;
+    }
+}
+
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+/* Accumulating comments.  */
+
+static char *buffer;
+static size_t bufmax;
+static size_t buflen;
+
+static inline void
+comment_start ()
+{
+  buflen = 0;
+}
+
+static inline void
+comment_add (int c)
+{
+  if (buflen >= bufmax)
+    {
+      bufmax = 2 * bufmax + 10;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (size_t chars_to_remove)
+{
+  buflen -= chars_to_remove;
+  while (buflen >= 1
+         && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+    --buflen;
+  if (chars_to_remove == 0 && buflen >= bufmax)
+    {
+      bufmax = 2 * bufmax + 10;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen] = '\0';
+  savable_comment_add (buffer);
+}
+
+
+/* 2. Replace each comment that is not inside a character constant or
+   string literal with a space character.  */
+
+static int
+phase2_getc ()
+{
+  int c;
+  bool last_was_star;
+
+  c = phase1_getc ();
+  if (c != '/')
+    return c;
+  c = phase1_getc ();
+  switch (c)
+    {
+    default:
+      phase1_ungetc (c);
+      return '/';
+
+    case '*':
+      /* C comment.  */
+      comment_start ();
+      last_was_star = false;
+      for (;;)
+        {
+          c = phase1_getc ();
+          if (c == EOF)
+            break;
+          /* We skip all leading white space, but not EOLs.  */
+          if (!(buflen == 0 && (c == ' ' || c == '\t')))
+            comment_add (c);
+          switch (c)
+            {
+            case '\n':
+              comment_line_end (1);
+              comment_start ();
+              last_was_star = false;
+              continue;
+
+            case '*':
+              last_was_star = true;
+              continue;
+
+            case '/':
+              if (last_was_star)
+                {
+                  comment_line_end (2);
+                  break;
+                }
+              /* FALLTHROUGH */
+
+            default:
+              last_was_star = false;
+              continue;
+            }
+          break;
+        }
+      last_comment_line = line_number;
+      return ' ';
+
+    case '/':
+      /* C++ or ISO C 99 comment.  */
+      comment_start ();
+      for (;;)
+        {
+          c = phase1_getc ();
+          if (c == '\n' || c == EOF)
+            break;
+          /* We skip all leading white space, but not EOLs.  */
+          if (!(buflen == 0 && (c == ' ' || c == '\t')))
+            comment_add (c);
+        }
+      comment_line_end (0);
+      last_comment_line = line_number;
+      return '\n';
+    }
+}
+
+
+static void
+phase2_ungetc (int c)
+{
+  phase1_ungetc (c);
+}
+
+
+/* ========================== Reading of tokens.  ========================== */
+
+enum token_type_ty
+{
+  token_type_character_constant,        /* 'x' */
+  token_type_eof,
+  token_type_lparen,                    /* ( */
+  token_type_rparen,                    /* ) */
+  token_type_lbrace,                    /* { */
+  token_type_rbrace,                    /* } */
+  token_type_assign,                    /* = */
+  token_type_return,                    /* return */
+  token_type_plus,                      /* + */
+  token_type_minus,                     /* - */
+  token_type_equality_test_operator,    /* == < > >= <= != */
+  token_type_logic_operator,            /* ! && || */
+  token_type_comma,                     /* , */
+  token_type_colon,                     /* : */
+  token_type_number,                    /* 2.7 */
+  token_type_string_literal,            /* "abc" */
+  token_type_string_template,           /* @"abc" */
+  token_type_regex_literal,             /* /.../ */
+  token_type_symbol,                    /* if else etc. */
+  token_type_other
+};
+typedef enum token_type_ty token_type_ty;
+
+typedef struct token_ty token_ty;
+struct token_ty
+{
+  token_type_ty type;
+  char *string;         /* for token_type_symbol, token_type_string_literal */
+  refcounted_string_list_ty *comment;   /* for token_type_string_literal */
+  int line_number;
+};
+
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (token_ty *tp)
+{
+  if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+    free (tp->string);
+  if (tp->type == token_type_string_literal)
+    drop_reference (tp->comment);
+}
+
+
+/* Replace escape sequences within character strings with their single
+   character equivalents.  */
+
+#define P7_QUOTES (1000 + '"')
+#define P7_QUOTE (1000 + '\'')
+#define P7_NEWLINE (1000 + '\n')
+
+static int
+phase7_getc ()
+{
+  int c, n, j;
+
+  /* Use phase 1, because phase 2 elides comments.  */
+  c = phase1_getc ();
+
+  /* Return a magic newline indicator, so that we can distinguish
+     between the user requesting a newline in the string (e.g. using
+     "\n" or "\012") from the user failing to terminate the string or
+     character constant.  The ANSI C standard says: 3.1.3.4 Character
+     Constants contain "any character except single quote, backslash or
+     newline; or an escape sequence" and 3.1.4 String Literals contain
+     "any character except double quote, backslash or newline; or an
+     escape sequence".
+
+     Most compilers give a fatal error in this case, however gcc is
+     stupidly silent, even though this is a very common typo.  OK, so
+     "gcc --pedantic" will tell me, but that gripes about too much other
+     stuff.  Could I have a "gcc -Wnewline-in-string" option, or
+     better yet a "gcc -fno-newline-in-string" option, please?  Gcc is
+     also inconsistent between string literals and character constants:
+     you may not embed newlines in character constants; try it, you get
+     a useful diagnostic.  --PMiller  */
+  if (c == '\n')
+    return P7_NEWLINE;
+
+  if (c == '"')
+    return P7_QUOTES;
+  if (c == '\'')
+    return P7_QUOTE;
+  if (c != '\\')
+    return c;
+  c = phase1_getc ();
+  switch (c)
+    {
+    default:
+      /* Unknown escape sequences really should be an error, but just
+         ignore them, and let the real compiler complain.  */
+      phase1_ungetc (c);
+      return '\\';
+
+    case '"':
+    case '\'':
+    case '?':
+    case '\\':
+      return c;
+
+    case 'a':
+      return '\a';
+    case 'b':
+      return '\b';
+
+      /* The \e escape is preculiar to gcc, and assumes an ASCII
+         character set (or superset).  We don't provide support for it
+         here.  */
+
+    case 'f':
+      return '\f';
+    case 'n':
+      return '\n';
+    case 'r':
+      return '\r';
+    case 't':
+      return '\t';
+    case 'v':
+      return '\v';
+
+    case 'x':
+      c = phase1_getc ();
+      switch (c)
+        {
+        default:
+          phase1_ungetc (c);
+          phase1_ungetc ('x');
+          return '\\';
+
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+          break;
+        }
+      n = 0;
+      for (;;)
+        {
+          switch (c)
+            {
+            default:
+              phase1_ungetc (c);
+              return n;
+
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+              n = n * 16 + c - '0';
+              break;
+
+            case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+              n = n * 16 + 10 + c - 'A';
+              break;
+
+            case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+              n = n * 16 + 10 + c - 'a';
+              break;
+            }
+          c = phase1_getc ();
+        }
+      return n;
+
+    case '0': case '1': case '2': case '3':
+    case '4': case '5': case '6': case '7':
+      n = 0;
+      for (j = 0; j < 3; ++j)
+        {
+          n = n * 8 + c - '0';
+          c = phase1_getc ();
+          switch (c)
+            {
+            default:
+              break;
+
+            case '0': case '1': case '2': case '3':
+            case '4': case '5': case '6': case '7':
+              continue;
+            }
+          break;
+        }
+      phase1_ungetc (c);
+      return n;
+    }
+}
+
+
+static void
+phase7_ungetc (int c)
+{
+  phase1_ungetc (c);
+}
+
+
+/* 3. Parse each resulting logical line as preprocessing tokens and
+   white space.  Preprocessing tokens and Vala tokens don't always
+   match.  */
+
+static token_ty phase3_pushback[2];
+static int phase3_pushback_length;
+
+
+static token_type_ty last_token_type = token_type_other;
+
+static void
+phase3_scan_regex ()
+{
+    int c;
+
+    for (;;)
+      {
+        c = phase1_getc ();
+        if (c == '/')
+          break;
+        if (c == '\\')
+          {
+            c = phase1_getc ();
+            if (c != EOF)
+              continue;
+          }
+        if (c == EOF)
+          {
+            error_with_progname = false;
+            error (0, 0,
+                   _("%s:%d: warning: regular expression literal terminated too early"),
+                   logical_file_name, line_number);
+            error_with_progname = true;
+            return;
+          }
+      }
+
+    c = phase2_getc ();
+    if (!(c == 'i' || c == 's' || c == 'm' || c == 'x'))
+      phase2_ungetc (c);
+}
+
+static void
+phase3_get (token_ty *tp)
+{
+  static char *buffer;
+  static int bufmax;
+  int bufpos;
+
+  if (phase3_pushback_length)
+    {
+      *tp = phase3_pushback[--phase3_pushback_length];
+      last_token_type = tp->type;
+      return;
+    }
+
+  for (;;)
+    {
+      bool template;
+      bool verbatim;
+      int c;
+
+      tp->line_number = line_number;
+      c = phase2_getc ();
+
+      switch (c)
+        {
+        case EOF:
+          tp->type = last_token_type = token_type_eof;
+          return;
+
+        case '\n':
+          if (last_non_comment_line > last_comment_line)
+            savable_comment_reset ();
+          /* FALLTHROUGH */
+        case ' ':
+        case '\f':
+        case '\t':
+          /* Ignore whitespace and comments.  */
+          continue;
+        default:
+          break;
+        }
+
+      last_non_comment_line = tp->line_number;
+      template = false;
+      verbatim = false;
+
+      switch (c)
+        {
+        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+        case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+        case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+        case 'V': case 'W': case 'X': case 'Y': case 'Z':
+        case '_':
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+        case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+        case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+        case 'v': case 'w': case 'x': case 'y': case 'z':
+          bufpos = 0;
+          for (;;)
+            {
+              if (bufpos >= bufmax)
+                {
+                  bufmax = 2 * bufmax + 10;
+                  buffer = xrealloc (buffer, bufmax);
+                }
+              buffer[bufpos++] = c;
+              c = phase2_getc ();
+              switch (c)
+                {
+                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+                case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+                case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+                case 'Y': case 'Z':
+                case '_':
+                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+                case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+                case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+                case 'y': case 'z':
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                  continue;
+
+                default:
+                  phase2_ungetc (c);
+                  break;
+                }
+              break;
+            }
+          if (bufpos >= bufmax)
+            {
+              bufmax = 2 * bufmax + 10;
+              buffer = xrealloc (buffer, bufmax);
+            }
+          buffer[bufpos] = 0;
+          if (strcmp (buffer, "return") == 0)
+            tp->type = last_token_type = token_type_return;
+          else
+            {
+              tp->string = xstrdup (buffer);
+              tp->type = last_token_type = token_type_symbol;
+            }
+          return;
+
+        case '.':
+          c = phase2_getc ();
+          phase2_ungetc (c);
+          switch (c)
+            {
+            default:
+              tp->string = xstrdup (".");
+              tp->type = last_token_type = token_type_symbol;
+              return;
+
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+              c = '.';
+              break;
+            }
+          /* FALLTHROUGH */
+
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+          /* The preprocessing number token is more "generous" than the C
+             number tokens.  This is mostly due to token pasting (another
+             thing we can ignore here).  */
+          bufpos = 0;
+          for (;;)
+            {
+              if (bufpos >= bufmax)
+                {
+                  bufmax = 2 * bufmax + 10;
+                  buffer = xrealloc (buffer, bufmax);
+                }
+              buffer[bufpos++] = c;
+              c = phase2_getc ();
+              switch (c)
+                {
+                case 'e':
+                case 'E':
+                  if (bufpos >= bufmax)
+                    {
+                      bufmax = 2 * bufmax + 10;
+                      buffer = xrealloc (buffer, bufmax);
+                    }
+                  buffer[bufpos++] = c;
+                  c = phase2_getc ();
+                  if (c != '+' || c != '-')
+                    {
+                      phase2_ungetc (c);
+                      break;
+                    }
+                  continue;
+
+                case 'A': case 'B': case 'C': case 'D':           case 'F':
+                case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+                case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+                case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+                case 'Y': case 'Z':
+                case 'a': case 'b': case 'c': case 'd':           case 'f':
+                case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+                case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+                case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+                case 'y': case 'z':
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                case '.':
+                  continue;
+
+                default:
+                  phase2_ungetc (c);
+                  break;
+                }
+              break;
+            }
+          if (bufpos >= bufmax)
+            {
+              bufmax = 2 * bufmax + 10;
+              buffer = xrealloc (buffer, bufmax);
+            }
+          buffer[bufpos] = 0;
+          tp->type = last_token_type = token_type_number;
+          return;
+
+        case '\'':
+          for (;;)
+            {
+              c = phase7_getc ();
+              if (c == P7_NEWLINE)
+                {
+                  error_with_progname = false;
+                  error (0, 0, _("%s:%d: warning: unterminated character constant"),
+                         logical_file_name, line_number - 1);
+                  error_with_progname = true;
+                  phase7_ungetc ('\n');
+                  break;
+                }
+              if (c == EOF || c == P7_QUOTE)
+                break;
+            }
+          tp->type = last_token_type = token_type_character_constant;
+          return;
+
+          /* Vala provides strings in three different formats.
+
+             Usual string literals:
+               "..."
+             Verbatim string literals:
+               """...""" (where ... can include newlines and double quotes)
+             String templates.
+               @"...", @"""..."""
+          
+             Note that, with the current implementation string
+             templates are not subject to translation, because they are
+             inspected at compile time.  For example, the following code
+
+               string bar = "bar";
+               string foo = _(@"foo $bar");
+
+             will be translated into the C code, like:
+
+               _(g_strconcat ("foo ", "bar", NULL));  */
+        case '@':
+          c = phase2_getc ();
+          if (c != '"')
+            {
+              phase2_ungetc (c);
+              tp->type = last_token_type = token_type_other;
+              return;
+            }
+          template = true;
+          /* FALLTHROUGH */
+        case '"':
+          {
+            int c2 = phase2_getc ();
+            if (c2 == '"')
+              {
+                int c3 = phase2_getc ();
+                if (c3 == '"')
+                  verbatim = true;
+                else
+                  {
+                    phase2_ungetc (c3);
+                    phase2_ungetc (c2);
+                  }
+              }
+            else
+              phase2_ungetc (c2);
+          }
+
+          bufpos = 0;
+          for (;;)
+            {
+              c = phase7_getc ();
+              if (c == P7_NEWLINE)
+                {
+                  if (verbatim)
+                    c = '\n';
+                  else
+                    {
+                      error_with_progname = false;
+                      error (0, 0, _("%s:%d: warning: unterminated string literal"),
+                             logical_file_name, line_number - 1);
+                      error_with_progname = true;
+                      phase7_ungetc ('\n');
+                      break;
+                    }
+                }
+              if (c == P7_QUOTES)
+                {
+                  if (verbatim)
+                    {
+                      int c2 = phase2_getc ();
+                      if (c2 == '"')
+                        {
+                          int c3 = phase2_getc ();
+                          if (c3 == '"')
+                            break;
+                          phase2_ungetc (c3);
+                        }
+                      phase2_ungetc (c2);
+                      c = '"';
+                    }
+                  else
+                    break;
+                }
+              if (c == EOF)
+                break;
+              if (c == P7_QUOTE)
+                c = '\'';
+              if (bufpos >= bufmax)
+                {
+                  bufmax = 2 * bufmax + 10;
+                  buffer = xrealloc (buffer, bufmax);
+                }
+              buffer[bufpos++] = c;
+            }
+          if (bufpos >= bufmax)
+            {
+              bufmax = 2 * bufmax + 10;
+              buffer = xrealloc (buffer, bufmax);
+            }
+          buffer[bufpos] = 0;
+          tp->type = last_token_type = template ? token_type_string_template : token_type_string_literal;
+          tp->string = xstrdup (buffer);
+          tp->comment = add_reference (savable_comment);
+          return;
+
+        case '/':
+          switch (last_token_type)
+            {
+            case token_type_lparen:
+            case token_type_lbrace:
+            case token_type_assign:
+            case token_type_return:
+            case token_type_plus:
+            case token_type_minus:
+            case token_type_equality_test_operator:
+            case token_type_logic_operator:
+            case token_type_comma:
+              phase3_scan_regex ();
+              tp->type = last_token_type = token_type_regex_literal;
+              break;
+            default:
+              {
+                int c2 = phase2_getc ();
+                if (c2 == '=')
+                  {
+                    /* /= */
+                    phase2_ungetc (c2);
+                  }
+                tp->type = last_token_type = token_type_other;
+                break;
+              }
+            }
+          return;
+
+        case '(':
+          tp->type = last_token_type = token_type_lparen;
+          return;
+
+        case ')':
+          tp->type = last_token_type = token_type_rparen;
+          return;
+
+        case '{':
+          tp->type = last_token_type = token_type_lbrace;
+          return;
+
+        case '}':
+          tp->type = last_token_type = token_type_rbrace;
+          return;
+
+        case '+':
+          {
+            int c2 = phase2_getc ();
+            switch (c2)
+              {
+              case '=': case '+':
+                tp->type = last_token_type = token_type_other;
+                break;
+              default:
+                phase2_ungetc (c2);
+                tp->type = last_token_type = token_type_plus;
+                break;
+              }
+            return;
+          }
+
+        case '-':
+          {
+            int c2 = phase2_getc ();
+            switch (c2)
+              {
+              case '=': case '-':
+                tp->type = last_token_type = token_type_other;
+                break;
+              default:
+                phase2_ungetc (c2);
+                tp->type = last_token_type = token_type_minus;
+                break;
+              }
+            return;
+          }
+
+        case '=':
+          {
+            int c2 = phase2_getc ();
+            switch (c2)
+              {
+              case '=':
+                tp->type = last_token_type = token_type_equality_test_operator;
+                break;
+              case '>':
+                tp->type = last_token_type = token_type_other;
+                break;
+              default:
+                phase2_ungetc (c2);
+                tp->type = last_token_type = token_type_assign;
+                break;
+              }
+            return;
+          }
+
+        case '!':
+          {
+            int c2 = phase2_getc ();
+            if (c2 == '=')
+              {
+                tp->type = last_token_type = token_type_equality_test_operator;
+                return;
+              }
+            phase2_ungetc (c2);
+            tp->type = last_token_type = token_type_logic_operator;
+            return;
+          }
+          
+        case '>':
+        case '<':
+          {
+            int c2 = phase2_getc ();
+            if (c2 == '=')
+             tp->type = last_token_type = token_type_equality_test_operator;
+            else if (c2 == c)
+              {
+                int c3 = phase2_getc ();
+                if (c3 != '=')
+                  phase2_ungetc (c3);
+                tp->type = last_token_type = token_type_other;
+              }
+            else
+              {
+                phase2_ungetc (c2);
+                tp->type = last_token_type = token_type_equality_test_operator;
+              }
+          }
+          return;
+          
+        case ',':
+          tp->type = last_token_type = token_type_comma;
+          return;
+
+        case ':':
+          tp->type = last_token_type = token_type_colon;
+          return;
+
+        case '&':
+        case '|':
+          {
+            int c2 = phase2_getc ();
+            if (c2 == c)
+             tp->type = last_token_type = token_type_logic_operator;
+            else if (c2 == '=')
+             tp->type = last_token_type = token_type_other;
+            else
+              {
+                phase2_ungetc (c2);
+                tp->type = last_token_type = token_type_other;
+              }
+          }
+          return;
+
+        case '?':
+          {
+            int c2 = phase2_getc ();
+            if (c2 == '?')
+              {
+                tp->type = last_token_type = token_type_logic_operator;
+                return;
+              }
+            phase2_ungetc (c2);
+            tp->type = last_token_type = token_type_other;
+            return;
+          }
+
+        default:
+          tp->type = last_token_type = token_type_other;
+          return;
+        }
+    }
+}
+
+static void
+phase3_unget (token_ty *tp)
+{
+  if (tp->type != token_type_eof)
+    {
+      if (phase3_pushback_length == SIZEOF (phase3_pushback))
+        abort ();
+      phase3_pushback[phase3_pushback_length++] = *tp;
+    }
+}
+
+
+/* String concatenation with '+'.  */
+
+static void
+x_vala_lex (token_ty *tp)
+{
+  phase3_get (tp);
+  if (tp->type == token_type_string_literal)
+    {
+      char *sum = tp->string;
+      size_t sum_len = strlen (sum);
+
+      for (;;)
+        {
+          token_ty token2;
+
+          phase3_get (&token2);
+          if (token2.type == token_type_plus)
+            {
+              token_ty token3;
+
+              phase3_get (&token3);
+              if (token3.type == token_type_string_literal)
+                {
+                  char *addend = token3.string;
+                  size_t addend_len = strlen (addend);
+
+                  sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
+                  memcpy (sum + sum_len, addend, addend_len + 1);
+                  sum_len += addend_len;
+
+                  free_token (&token3);
+                  free_token (&token2);
+                  continue;
+                }
+              phase3_unget (&token3);
+            }
+          phase3_unget (&token2);
+          break;
+        }
+      tp->string = sum;
+    }
+}
+
+
+/* ========================= Extracting strings.  ========================== */
+
+
+/* Context lookup table.  */
+static flag_context_list_table_ty *flag_context_list_table;
+
+
+/* The file is broken into tokens.  Scan the token stream, looking for
+   a keyword, followed by a left paren, followed by a string.  When we
+   see this sequence, we have something to remember.  We assume we are
+   looking at a valid Vala program, and leave the complaints about the
+   grammar to the compiler.
+
+     Normal handling: Look for
+       keyword ( ... msgid ... )
+       keyword msgid
+     Plural handling: Look for
+       keyword ( ... msgid ... msgid_plural ... )
+
+   We use recursion because the arguments before msgid or between msgid
+   and msgid_plural can contain subexpressions of the same form.  */
+
+/* Extract messages until the next balanced closing parenthesis or bracket.
+   Extracted messages are added to MLP.
+   DELIM can be either token_type_rparen or token_type_rbracket, or
+   token_type_eof to accept both.
+   Return true upon eof, false upon closing parenthesis or bracket.  */
+static bool
+extract_balanced (message_list_ty *mlp, token_type_ty delim,
+                  flag_context_ty outer_context,
+                  flag_context_list_iterator_ty context_iter,
+                  struct arglist_parser *argparser)
+{
+  /* Current argument number.  */
+  int arg = 1;
+  /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
+  int state;
+  /* Parameters of the keyword just seen.  Defined only in state 1.  */
+  const struct callshapes *next_shapes = NULL;
+  /* Context iterator that will be used if the next token is a '('.  */
+  flag_context_list_iterator_ty next_context_iter =
+    passthrough_context_list_iterator;
+  /* Current context.  */
+  flag_context_ty inner_context =
+    inherited_context (outer_context,
+                       flag_context_list_iterator_advance (&context_iter));
+
+  /* Start state is 0.  */
+  state = 0;
+
+  for (;;)
+    {
+      token_ty token;
+
+      x_vala_lex (&token);
+
+      switch (token.type)
+        {
+        case token_type_symbol:
+          {
+            void *keyword_value;
+
+            if (hash_find_entry (&keywords, token.string, strlen (token.string),
+                                 &keyword_value)
+                == 0)
+              {
+                next_shapes = (const struct callshapes *) keyword_value;
+                state = 1;
+              }
+            else
+              state = 0;
+          }
+          next_context_iter =
+            flag_context_list_iterator (
+              flag_context_list_table_lookup (
+                flag_context_list_table,
+                token.string, strlen (token.string)));
+          free (token.string);
+          continue;
+
+        case token_type_lparen:
+          if (extract_balanced (mlp, token_type_rparen,
+                                inner_context, next_context_iter,
+                                arglist_parser_alloc (mlp,
+                                                      state ? next_shapes : NULL)))
+            {
+              arglist_parser_done (argparser, arg);
+              return true;
+            }
+          next_context_iter = null_context_list_iterator;
+          state = 0;
+          break;
+
+        case token_type_rparen:
+          if (delim == token_type_rparen || delim == token_type_eof)
+            {
+              arglist_parser_done (argparser, arg);
+              return false;
+            }
+
+          next_context_iter = null_context_list_iterator;
+          state = 0;
+          continue;
+
+        case token_type_comma:
+          arg++;
+          inner_context =
+            inherited_context (outer_context,
+                               flag_context_list_iterator_advance (
+                                 &context_iter));
+          next_context_iter = passthrough_context_list_iterator;
+          state = 0;
+          continue;
+
+        case token_type_eof:
+          arglist_parser_done (argparser, arg);
+          return true;
+
+        case token_type_string_literal:
+          {
+            lex_pos_ty pos;
+            pos.file_name = logical_file_name;
+            pos.line_number = token.line_number;
+
+            if (extract_all)
+              remember_a_message (mlp, NULL, token.string, inner_context,
+                                  &pos, NULL, token.comment);
+            else
+              {
+                /* A string immediately after a symbol means a function call.  */
+                if (state)
+                  {
+                    struct arglist_parser *tmp_argparser;
+                    tmp_argparser = arglist_parser_alloc (mlp, next_shapes);
+
+                    arglist_parser_remember (tmp_argparser, 1, token.string,
+                                             inner_context, pos.file_name,
+                                             pos.line_number, token.comment);
+                    arglist_parser_done (tmp_argparser, 1);
+                  }
+                else
+                  arglist_parser_remember (argparser, arg, token.string,
+                                           inner_context, pos.file_name,
+                                           pos.line_number, token.comment);
+              }
+          }
+          drop_reference (token.comment);
+          next_context_iter = null_context_list_iterator;
+          state = 0;
+          continue;
+
+        case token_type_character_constant:
+        case token_type_lbrace:
+        case token_type_rbrace:
+        case token_type_assign:
+        case token_type_return:
+        case token_type_plus:
+        case token_type_minus:
+        case token_type_equality_test_operator:
+        case token_type_logic_operator:
+        case token_type_colon:
+        case token_type_number:
+        case token_type_string_template:
+        case token_type_regex_literal:
+        case token_type_other:
+          next_context_iter = null_context_list_iterator;
+          state = 0;
+          continue;
+
+        default:
+          abort ();
+        }
+    }
+}
+
+void
+extract_vala (FILE *f,
+              const char *real_filename, const char *logical_filename,
+              flag_context_list_table_ty *flag_table,
+              msgdomain_list_ty *mdlp)
+{
+  message_list_ty *mlp = mdlp->item[0]->messages;
+
+  fp = f;
+  real_file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 1;
+
+  last_comment_line = -1;
+  last_non_comment_line = -1;
+
+  flag_context_list_table = flag_table;
+
+  init_keywords ();
+
+  /* Eat tokens until eof is seen.  When extract_parenthesized returns
+     due to an unbalanced closing parenthesis, just restart it.  */
+  while (!extract_balanced (mlp, token_type_eof,
+                            null_context, null_context_list_iterator,
+                            arglist_parser_alloc (mlp, NULL)))
+    ;
+
+  fp = NULL;
+  real_file_name = NULL;
+  logical_file_name = NULL;
+  line_number = 0;
+}
diff --git a/gettext-tools/src/x-vala.h b/gettext-tools/src/x-vala.h

new file mode 100644 (file)

index 0000000..db9a66d
--- /dev/null
+++ b/gettext-tools/src/x-vala.h
@@ -0,0 +1,50 @@
+/* xgettext Vala backend.
+   Copyright (C) 2002-2003, 2006, 2013 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+
+#include <stdio.h>
+
+#include "message.h"
+#include "xgettext.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define EXTENSIONS_VALA \
+  { "vala",        "Vala"   },                                        \
+
+#define SCANNERS_VALA \
+  { "Vala",       extract_vala,                               \
+                        &flag_table_vala, &formatstring_c, NULL }, \
+
+/* Scan a Vala file and add its translatable strings to mdlp.  */
+extern void extract_vala (FILE *fp, const char *real_filename,
+                          const char *logical_filename,
+                          flag_context_list_table_ty *flag_table,
+                          msgdomain_list_ty *mdlp);
+
+extern void x_vala_keyword (const char *keyword);
+extern void x_vala_extract_all (void);
+
+extern void init_flag_table_vala (void);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c

index d433794758a2f59d63327698c1124a99d6331a8c..702ceb0726e6aff57813da0f038bc53ff0a75f0c 100644 (file)
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -94,6 +94,7 @@
  #include "x-glade.h"
  #include "x-lua.h"
  #include "x-javascript.h"
+#include "x-vala.h"
  
  
  /* If nonzero add all comments immediately preceding one of the keywords. */
@@ -164,6 +165,7 @@ static flag_context_list_table_ty flag_table_perl;
  static flag_context_list_table_ty flag_table_php;
  static flag_context_list_table_ty flag_table_lua;
  static flag_context_list_table_ty flag_table_javascript;
+static flag_context_list_table_ty flag_table_vala;
  
  /* If true, recognize Qt format strings.  */
  static bool recognize_format_qt;
@@ -337,6 +339,7 @@ main (int argc, char *argv[])
    init_flag_table_php ();
    init_flag_table_lua ();
    init_flag_table_javascript ();
+  init_flag_table_vala ();
  
    while ((optchar = getopt_long (argc, argv,
                                   "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:",
@@ -363,6 +366,7 @@ main (int argc, char *argv[])
          x_glade_extract_all ();
          x_lua_extract_all ();
          x_javascript_extract_all ();
+        x_vala_extract_all ();
          break;
  
        case 'c':
@@ -442,6 +446,7 @@ main (int argc, char *argv[])
          x_glade_keyword (optarg);
          x_lua_keyword (optarg);
          x_javascript_keyword (optarg);
+        x_vala_keyword (optarg);
          if (optarg == NULL)
            no_default_keywords = true;
          else
@@ -867,8 +872,8 @@ Choice of input file language:\n"));
                                  (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
                                  EmacsLisp, librep, Scheme, Smalltalk, Java,\n\
                                  JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\
-                                Lua, JavaScript, GCC-source, NXStringTable, RST,\n\
-                                Glade)\n"));
+                                GCC-source, NXStringTable, RST, Glade, Lua,\n\
+                                JavaScript, Vala)\n"));
        printf (_("\
    -C, --c++                   shorthand for --language=C++\n"));
        printf (_("\
@@ -901,24 +906,24 @@ Language specific options:\n"));
        printf (_("\
                                  (only languages C, C++, ObjectiveC, Shell,\n\
                                  Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
-                                C#, awk, Tcl, Perl, PHP, Lua, JavaScript,\n\
-                                GCC-source, Glade)\n"));
+                                C#, awk, Tcl, Perl, PHP, GCC-source, Glade,\n\
+                                Lua, JavaScript, Vala)\n"));
        printf (_("\
    -kWORD, --keyword=WORD      look for WORD as an additional keyword\n\
    -k, --keyword               do not to use default keywords\n"));
        printf (_("\
                                  (only languages C, C++, ObjectiveC, Shell,\n\
                                  Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
-                                C#, awk, Tcl, Perl, PHP, Lua, JavaScript,\n\
-                                GCC-source, Glade)\n"));
+                                C#, awk, Tcl, Perl, PHP, GCC-source, Glade,\n\
+                                Lua, JavaScript, Vala)\n"));
        printf (_("\
        --flag=WORD:ARG:FLAG    additional flag for strings inside the argument\n\
                                number ARG of keyword WORD\n"));
        printf (_("\
                                  (only languages C, C++, ObjectiveC, Shell,\n\
                                  Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
-                                C#, awk, YCP, Tcl, Perl, PHP, Lua, JavaScript,\n\
-                                GCC-source)\n"));
+                                C#, awk, YCP, Tcl, Perl, PHP, GCC-source,\n\
+                                Lua, JavaScript, Vala)\n"));
        printf (_("\
    -T, --trigraphs             understand ANSI C trigraphs for input\n"));
        printf (_("\
@@ -3216,6 +3221,7 @@ language_to_extractor (const char *name)
      SCANNERS_GLADE
      SCANNERS_LUA
      SCANNERS_JAVASCRIPT
+    SCANNERS_VALA
      /* Here may follow more languages and their scanners: pike, etc...
         Make sure new scanners honor the --exclude-file option.  */
    };
@@ -3301,6 +3307,7 @@ extension_to_language (const char *extension)
      EXTENSIONS_GLADE
      EXTENSIONS_LUA
      EXTENSIONS_JAVASCRIPT
+    EXTENSIONS_VALA
      /* Here may follow more file extensions... */
    };
  
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog

index 93a33f7d2254b003c1b9d52a305032b2c9d24f11..7edd85af1c2fab55a7fbc8c6ecbd92b6e7c7cef9 100644 (file)
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,10 @@
+2013-06-04  Daiki Ueno  <ueno@gnu.org>
+
+       Support for Vala.
+       * xgettext-vala-1: New file.
+       * lang-vala: New file.
+       * Makefile.am (TESTS): Add them.
+
  2013-05-29  Stefano Lattarini  <stefano.lattarini@gmail.com>  (tiny change)
  
         * Makefile.am (TESTS_ENVIRONMENT): Remove $(SHELL).
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am

index 423804ed2bbba20045fe1be09413f7cb6b1e49f0..73290324c84d21c75efe2de426567f3a7caa972d 100644 (file)
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -103,6 +103,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \
         xgettext-lua-1 xgettext-lua-2 \
         xgettext-javascript-1 xgettext-javascript-2 xgettext-javascript-3 \
         xgettext-javascript-4 xgettext-javascript-5 \
+       xgettext-vala-1 \
         format-awk-1 format-awk-2 \
         format-boost-1 format-boost-2 \
         format-c-1 format-c-2 format-c-3 format-c-4 format-c-5 \
@@ -135,7 +136,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \
         lang-python-2 lang-clisp lang-elisp lang-librep lang-guile \
         lang-smalltalk lang-java lang-csharp lang-gawk lang-pascal \
         lang-ycp lang-tcl lang-perl-1 lang-perl-2 lang-php lang-po lang-rst \
-       lang-lua lang-javascript
+       lang-lua lang-javascript lang-vala
  
  EXTRA_DIST += $(TESTS) \
         test.mo xg-c-1.ok.po mex-test2.ok \
diff --git a/gettext-tools/tests/lang-vala b/gettext-tools/tests/lang-vala

new file mode 100644 (file)

index 0000000..2a5950d
--- /dev/null
+++ b/gettext-tools/tests/lang-vala
@@ -0,0 +1,124 @@
+#! /bin/sh
+
+# Test of gettext facilities in the Vala language.
+# Assumes an fr_FR locale is installed.
+# Assumes the following packages are installed: vala.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles l-vala-prog.vala"
+cat <<\EOF > l-vala-prog.vala
+int main (string[] args) {
+    Intl.setlocale (LocaleCategory.ALL, "");
+    Intl.textdomain ("prog");
+    Intl.bindtextdomain ("prog", "l-vala");
+    stdout.printf ("%s\n", _("'Your command, please?', asked the waiter."));
+    stdout.printf ("%s\n", _("%s is replaced by %s.").printf ("FF", "EUR"));
+    return 0;
+}
+EOF
+
+# Test for presence of valac.
+(valac --version) >/dev/null 2>/dev/null
+test $? -le 1 \
+  || { echo "Skipping test: valac not found"; rm -fr $tmpfiles; exit 77; }
+
+tmpfiles="$tmpfiles l-vala-prog.vala l-vala-prog.c l-vala-prog l-vala-prog.err"
+: ${VALAC=valac}
+${VALAC} --Xcc=-DGETTEXT_PACKAGE=\"prog\" l-vala-prog.vala 2>l-vala-prog.err \
+  || { cat l-vala-prog.err 1>&2; exit 1; }
+
+tmpfiles="$tmpfiles l-vala-prog.tmp l-vala-prog.pot"
+: ${XGETTEXT=xgettext}
+${XGETTEXT} -o l-vala-prog.tmp --omit-header --no-location l-vala-prog.vala
+test $? = 0 || { rm -fr $tmpfiles; exit 1; }
+LC_ALL=C tr -d '\r' < l-vala-prog.tmp > l-vala-prog.pot
+test $? = 0 || { rm -fr $tmpfiles; exit 1; }
+
+tmpfiles="$tmpfiles l-vala-prog.ok"
+cat <<EOF > l-vala-prog.ok
+msgid "'Your command, please?', asked the waiter."
+msgstr ""
+
+#, c-format
+msgid "%s is replaced by %s."
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} l-vala-prog.ok l-vala-prog.pot || exit 1
+
+tmpfiles="$tmpfiles l-vala-fr.po"
+cat <<\EOF > l-vala-fr.po
+msgid ""
+msgstr "Content-Type: text/plain; charset=ISO-8859-1\n"
+
+msgid "'Your command, please?', asked the waiter."
+msgstr "«Votre commande, s'il vous plait», dit le garçon."
+
+# Reverse the arguments.
+#, c-format
+msgid "%s is replaced by %s."
+msgstr "%2$s remplace %1$s."
+EOF
+
+tmpfiles="$tmpfiles l-vala-fr.po.tmp l-vala-fr.po.new"
+: ${MSGMERGE=msgmerge}
+${MSGMERGE} -q -o l-vala-fr.po.tmp l-vala-fr.po l-vala-prog.pot
+test $? = 0 || { rm -fr $tmpfiles; exit 1; }
+LC_ALL=C tr -d '\r' < l-vala-fr.po.tmp > l-vala-fr.po.new
+test $? = 0 || { rm -fr $tmpfiles; exit 1; }
+
+: ${DIFF=diff}
+${DIFF} l-vala-fr.po l-vala-fr.po.new || exit 1
+
+tmpfiles="$tmpfiles l-vala"
+test -d l-vala || mkdir l-vala
+test -d l-vala/fr || mkdir l-vala/fr
+test -d l-vala/fr/LC_MESSAGES || mkdir l-vala/fr/LC_MESSAGES
+
+: ${MSGFMT=msgfmt}
+${MSGFMT} -o l-vala/fr/LC_MESSAGES/prog.mo l-vala-fr.po
+
+tmpfiles="$tmpfiles l-vala-prog.ok l-vala-prog.oku l-vala-prog.out"
+: ${DIFF=diff}
+cat <<\EOF > l-vala-prog.ok
+«Votre commande, s'il vous plait», dit le garçon.
+EUR remplace FF.
+EOF
+cat <<\EOF > l-vala-prog.oku
+«Votre commande, s'il vous plait», dit le garçon.
+EUR remplace FF.
+EOF
+
+: ${LOCALE_FR=fr_FR}
+: ${LOCALE_FR_UTF8=fr_FR.UTF-8}
+if test $LOCALE_FR != none; then
+  LANGUAGE= LC_ALL=$LOCALE_FR ./l-vala-prog > l-vala-prog.out
+  case $? in
+    0) ${DIFF} l-vala-prog.ok l-vala-prog.out || exit 1;;
+    77) LOCALE_FR=none;;
+    *) exit 1;;
+  esac
+fi
+if test $LOCALE_FR_UTF8 != none; then
+  LANGUAGE= LC_ALL=$LOCALE_FR_UTF8 ./l-vala-prog > l-vala-prog.out
+  case $? in
+    0) ${DIFF} l-vala-prog.oku l-vala-prog.out || exit 1;;
+    77) LOCALE_FR_UTF8=none;;
+    *) exit 1;;
+  esac
+fi
+if test $LOCALE_FR = none && test $LOCALE_FR_UTF8 = none; then
+  if test -f /usr/bin/localedef; then
+    echo "Skipping test: no french locale is installed"
+  else
+    echo "Skipping test: no french locale is supported"
+  fi
+  rm -fr $tmpfiles; exit 77
+fi
+
+rm -fr $tmpfiles
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-vala-1 b/gettext-tools/tests/xgettext-vala-1

new file mode 100644 (file)

index 0000000..7c7e243
--- /dev/null
+++ b/gettext-tools/tests/xgettext-vala-1
@@ -0,0 +1,76 @@
+#!/bin/sh
+
+# Test of Vala support.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles xg-vala-1.vala"
+cat <<\EOF > xg-vala-1.vala
+int main (string[] args) {
+    var s1 = "Simple string, no gettext needed";
+    var s2 = _("Extract this first string");
+    var s3 = "Prefix _(" + _("Extract this second string") + ") Postfix";
+
+    if (args[0] == _("Extract this thirth string")) {
+      /* _("This is a comment and must not be extracted!") */
+    }
+
+    var s4 = _("""Extract this
+    ""
+    fourth string""");
+    return 0;
+}
+EOF
+
+tmpfiles="$tmpfiles xg-vala-1.err xg-vala-1.tmp xg-vala-1.pot"
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-vala-1.tmp xg-vala-1.vala 2>xg-vala-1.err
+test $? = 0 || { cat xg-vala-1.err; rm -fr $tmpfiles; exit 1; }
+# Don't simplify this to "grep ... < xg-vala-1.tmp", otherwise OpenBSD 4.0 grep
+# only outputs "Binary file (standard input) matches".
+cat xg-vala-1.tmp | grep -v 'POT-Creation-Date' | LC_ALL=C tr -d '\r' > xg-vala-1.pot
+
+tmpfiles="$tmpfiles xg-vala-1.ok"
+cat <<\EOF > xg-vala-1.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "Extract this first string"
+msgstr ""
+
+msgid "Extract this second string"
+msgstr ""
+
+msgid "Extract this thirth string"
+msgstr ""
+
+msgid ""
+"Extract this\n"
+"    \"\"\n"
+"    fourth string"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-vala-1.ok xg-vala-1.pot
+result=$?
+
+rm -fr $tmpfiles
+
+exit $result
author	Daiki Ueno <ueno@gnu.org>
	Tue, 21 May 2013 01:39:06 +0000 (10:39 +0900)
committer	Daiki Ueno <ueno@gnu.org>
	Tue, 4 Jun 2013 05:44:23 +0000 (14:44 +0900)
ChangeLog		patch \| blob \| blame \| history
NEWS		patch \| blob \| blame \| history
gettext-tools/doc/ChangeLog		patch \| blob \| blame \| history
gettext-tools/doc/xgettext.texi		patch \| blob \| blame \| history
gettext-tools/po/ChangeLog		patch \| blob \| blame \| history
gettext-tools/po/POTFILES.in		patch \| blob \| blame \| history
gettext-tools/src/ChangeLog		patch \| blob \| blame \| history
gettext-tools/src/FILES		patch \| blob \| blame \| history
gettext-tools/src/Makefile.am		patch \| blob \| blame \| history
gettext-tools/src/x-vala.c	[new file with mode: 0644]	patch \| blob
gettext-tools/src/x-vala.h	[new file with mode: 0644]	patch \| blob
gettext-tools/src/xgettext.c		patch \| blob \| blame \| history
gettext-tools/tests/ChangeLog		patch \| blob \| blame \| history
gettext-tools/tests/Makefile.am		patch \| blob \| blame \| history
gettext-tools/tests/lang-vala	[new file with mode: 0644]	patch \| blob
gettext-tools/tests/xgettext-vala-1	[new file with mode: 0644]	patch \| blob