From: Ľubomír Remák Date: Wed, 1 Feb 2012 16:31:56 +0000 (+0100) Subject: Support for Lua. X-Git-Tag: v0.18.3~69 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e43c5e2d3ccd5280722655cdb0c85579916e02f1;p=thirdparty%2Fgettext.git Support for Lua. --- diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog index ed7e35603..5b597d67f 100644 --- a/gettext-tools/doc/ChangeLog +++ b/gettext-tools/doc/ChangeLog @@ -1,3 +1,12 @@ +2013-04-11 Ľubomír Remák + + Support for Lua. + * gettext.texi (PO Files): Mention lua-format. + (lua-format): New subsection. + (Lua): New subsection. + * xgettext.texi: Document Lua source language. Document that it is + applicable to --flag. + 2013-04-02 Daiki Ueno * gettext.texi (PO Files): Use '@pxref' instead of '@xref'. diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index 1b45112cb..f479ed853 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -409,6 +409,7 @@ The Translator's View * qt-plural-format:: Qt Plural Format Strings * kde-format:: KDE Format Strings * boost-format:: Boost Format Strings +* lua-format:: Lua Format Strings Individual Programming Languages @@ -433,6 +434,7 @@ Individual Programming Languages * PHP:: PHP Hypertext Preprocessor * Pike:: Pike * GCC-source:: GNU Compiler Collection sources +* Lua:: Lua sh - Shell Script @@ -1623,6 +1625,12 @@ Likewise for KDE, see @ref{kde-format}. @kwindex no-boost-format@r{ flag} Likewise for Boost, see @ref{boost-format}. +@item lua-format +@kwindex lua-format@r{ flag} +@itemx no-lua-format +@kwindex no-lua-format@r{ flag} +Likewise for Lua, see @ref{lua-format}. + @end table @kwindex msgctxt @@ -8965,6 +8973,7 @@ strings. * qt-plural-format:: Qt Plural Format Strings * kde-format:: KDE Format Strings * boost-format:: Boost Format Strings +* lua-format:: Lua Format Strings @end menu @node c-format, objc-format, Translators for other Languages, Translators for other Languages @@ -9210,7 +9219,7 @@ A directive consists of a @samp{%} followed by a non-zero decimal number. If a @samp{%n} occurs in a format strings, all of @samp{%1}, ..., @samp{%(n-1)} must occur as well, except possibly one of them. -@node boost-format, , kde-format, Translators for other Languages +@node boost-format, lua-format, kde-format, Translators for other Languages @subsection Boost Format Strings Boost format strings are described in the documentation of the @@ -9221,6 +9230,12 @@ such as @samp{%1$+5d}, or may be surrounded by vertical bars, such as @samp{%|1$+5d|} or @samp{%|1$+5|}, or consists of just an argument number between percent signs, such as @samp{%1%}. +@node lua-format, , boost-format, Translators for other Languages +@subsection Lua Format Strings + +Lua format strings are described in the Lua reference manual, section @w{String Manipulation}, +@uref{http://www.lua.org/manual/5.1/manual.html#pdf-string.format}. + @node Maintainers for other Languages, List of Programming Languages, Translators for other Languages, Programming Languages @section The Maintainer's View @@ -9324,6 +9339,7 @@ that language, and to combine the resulting files using @code{msgcat}. * PHP:: PHP Hypertext Preprocessor * Pike:: Pike * GCC-source:: GNU Compiler Collection sources +* Lua:: Lua @end menu @node C, sh, List of Programming Languages, List of Programming Languages @@ -11735,7 +11751,7 @@ On platforms without gettext, the functions are not available. --- @end table -@node GCC-source, , Pike, List of Programming Languages +@node GCC-source, Lua, Pike, List of Programming Languages @subsection GNU Compiler Collection sources @cindex GCC-source @@ -11784,6 +11800,68 @@ Uses autoconf macros yes @end table +@node Lua, , GCC-source, List of Programming Languages +@subsection Lua + +@table @asis +@item RPMs +lua + +@item File extension +@code{lua} + +@item String syntax +@itemize @bullet + +@item @code{"abc"} + +@item @code{'abc'} + +@item @code{[[abc]]} + +@item @code{[=[abc]=]} + +@item @code{[==[abc]==]} + +@item ... + +@end itemize + +@item gettext shorthand +@code{_("abc")} + +@item gettext/ngettext functions +@code{gettext.gettext}, @code{gettext.dgettext}, @code{gettext.dcgettext}, +@code{gettext.ngettext}, @code{gettext.dngettext}, @code{gettext.dcngettext} + +@item textdomain +@code{textdomain} function + +@item bindtextdomain +@code{bindtextdomain} function + +@item setlocale +automatic + +@item Prerequisite +@code{require 'gettext'} or running lua interpreter with @code{-l gettext} option + +@item Use or emulate GNU gettext +use + +@item Extractor +@code{xgettext} + +@item Formatting with positions +--- + +@item Portability +On platforms without gettext, the functions are not available. + +@item po-mode marking +--- +@end table + @c This is the template for new languages. @ignore @@ -11863,7 +11941,7 @@ fpk @code{xgettext}, @code{rstconv} @end table -@node Glade, , RST, List of Data Formats +@node Glade, , RST, List of Data Formats @subsection Glade - GNOME user interface description @table @asis diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi index 4c59f4c48..b6dd0dc43 100644 --- a/gettext-tools/doc/xgettext.texi +++ b/gettext-tools/doc/xgettext.texi @@ -73,7 +73,7 @@ are @code{C}, @code{C++}, @code{ObjectiveC}, @code{PO}, @code{Shell}, @code{Python}, @code{Lisp}, @code{EmacsLisp}, @code{librep}, @code{Scheme}, @code{Smalltalk}, @code{Java}, @code{JavaProperties}, @code{C#}, @code{awk}, @code{YCP}, @code{Tcl}, @code{Perl}, @code{PHP}, @code{GCC-source}, -@code{NXStringTable}, @code{RST}, @code{Glade}. +@code{NXStringTable}, @code{RST}, @code{Glade}, @code{Lua}. @item -C @itemx --c++ @@ -137,7 +137,7 @@ Extract all strings. This option has an effect with most languages, namely C, C++, ObjectiveC, Shell, Python, Lisp, EmacsLisp, librep, Java, C#, awk, Tcl, Perl, PHP, -GCC-source, Glade. +GCC-source, Glade, Lua. @item -k[@var{keywordspec}] @itemx --keyword[=@var{keywordspec}] @@ -180,7 +180,7 @@ escaped. This option has an effect with most languages, namely C, C++, ObjectiveC, Shell, Python, Lisp, EmacsLisp, librep, Java, C#, awk, Tcl, Perl, PHP, -GCC-source, Glade. +GCC-source, Glade, Lua. The default keyword specifications, which are always looked for if not explicitly disabled, are language dependent. They are: @@ -245,6 +245,11 @@ For PHP: @code{_}, @code{gettext}, @code{dgettext:2}, @code{dcgettext:2}, @item For Glade 1: @code{label}, @code{title}, @code{text}, @code{format}, @code{copyright}, @code{comments}, @code{preview_text}, @code{tooltip}. + +@item +For Lua: @code{_}, @code{gettext.gettext}, @code{gettext.dgettext:2}, +@code{gettext.dcgettext:2}, @code{gettext.ngettext:1,2}, +@code{gettext.dngettext:2,3}, @code{gettext.dcngettext:2,3}. @end itemize To disable the default keyword specifications, the option @samp{-k} or @@ -297,7 +302,7 @@ lead to a crash at runtime. @* This option has an effect with most languages, namely C, C++, ObjectiveC, Shell, Python, Lisp, EmacsLisp, librep, Scheme, Java, C#, awk, YCP, Tcl, Perl, PHP, -GCC-source. +GCC-source, Lua. @item -T @itemx --trigraphs diff --git a/gettext-tools/libgettextpo/ChangeLog b/gettext-tools/libgettextpo/ChangeLog index ef53e2a9e..576966f30 100644 --- a/gettext-tools/libgettextpo/ChangeLog +++ b/gettext-tools/libgettextpo/ChangeLog @@ -1,3 +1,7 @@ +2013-04-11 Ľubomír Remák + + * Makefile.am (libgettextpo_la_AUXSOURCES): Add format-lua.c. + 2012-12-25 Daiki Ueno * gettext-0.18.2 released. diff --git a/gettext-tools/libgettextpo/Makefile.am b/gettext-tools/libgettextpo/Makefile.am index cf4a92864..9765f6ac1 100644 --- a/gettext-tools/libgettextpo/Makefile.am +++ b/gettext-tools/libgettextpo/Makefile.am @@ -84,6 +84,7 @@ libgettextpo_la_AUXSOURCES = \ ../src/format-qt-plural.c \ ../src/format-kde.c \ ../src/format-boost.c \ + ../src/format-lua.c \ ../src/format.c \ ../src/plural-exp.c \ ../src/plural-eval.c \ diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 66bd3c4c4..9427c8774 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,28 @@ +2013-04-11 Ľubomír Remák + + Support for Lua. + * message.h (format_type): New enum value 'format_lua'. + (NFORMATS): Increment. + * message.c (format_language): Add format_lua entry. + (format_language_pretty): Likewise. + * format.h (formatstring_lua): New declaration. + * format-lua.c: New file. + * format.c (formatstring_parsers): Add formatstring_lua. + * x-lua.h: New file. + * x-lua.c: New file. + * xgettext.c: Include x-lua.h. + (flag_table_lua): New variable. + (main): Invoke init_flag_table_lua, x_lua_extract_all, + x_lua_keyword. + (usage): Mention Lua source language. + (xgettext_record_flag): Handle format_lua. + (language_to_extractor): Add Lua rule. + (extension_to_language): Add Lua rule. + * Makefile.am (noinst_HEADERS): Add x-lua.h. + (FORMAT_SOURCE): Add format-lua.c. + (xgettext_SOURCES): Add x-lua.c. + * FILES: Update. + 2013-03-15 Miguel Ángel Arruga Vivas Daiki Ueno diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES index 9a41f4865..37a6558b9 100644 --- a/gettext-tools/src/FILES +++ b/gettext-tools/src/FILES @@ -333,6 +333,9 @@ msgl-check.c | x-glade.h | x-glade.c | String extractor from .glade files, GNOME GUI descriptions. +| x-lua.h +| x-lua.c +| String extractor for Lua. | xgettext.c | Main source for the 'xgettext' program. | diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index aea01e65c..b7bb9d788 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -51,7 +51,7 @@ write-qt.h \ po-time.h plural-table.h lang-table.h format.h filters.h \ xgettext.h x-c.h x-po.h x-sh.h x-python.h x-lisp.h x-elisp.h x-librep.h \ x-scheme.h x-smalltalk.h x-java.h x-properties.h x-csharp.h x-awk.h x-ycp.h \ -x-tcl.h x-perl.h x-php.h x-stringtable.h x-rst.h x-glade.h +x-tcl.h x-perl.h x-php.h x-stringtable.h x-rst.h x-glade.h x-lua.h EXTRA_DIST += FILES project-id ChangeLog.0 @@ -135,7 +135,8 @@ FORMAT_SOURCE += \ format-qt.c \ format-qt-plural.c \ format-kde.c \ - format-boost.c + format-boost.c \ + format-lua.c # libgettextsrc contains all code that is needed by at least two programs. libgettextsrc_la_SOURCES = \ @@ -173,7 +174,7 @@ endif xgettext_SOURCES += \ x-c.c x-po.c x-sh.c x-python.c x-lisp.c x-elisp.c x-librep.c x-scheme.c \ x-smalltalk.c x-java.c x-csharp.c x-awk.c x-ycp.c x-tcl.c x-perl.c x-php.c \ - x-rst.c x-glade.c + x-rst.c x-glade.c x-lua.c if !WOE32DLL msgattrib_SOURCES = msgattrib.c else diff --git a/gettext-tools/src/format-lua.c b/gettext-tools/src/format-lua.c new file mode 100644 index 000000000..272def446 --- /dev/null +++ b/gettext-tools/src/format-lua.c @@ -0,0 +1,348 @@ +/* Lua format strings. + Copyright (C) 2012 Free Software Foundation, Inc. + Written by Ľubomír Remák , 2012. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "format.h" +#include "gettext.h" +#include "xalloc.h" +#include "format-invalid.h" +#include "c-ctype.h" +#include "xvasprintf.h" + +#define _(str) gettext (str) + +/* The Lua format strings are described in the Lua manual, + which can be found at: + http://www.lua.org/manual/5.2/manual.html + + A directive + - starts with '%' + - is optionally followed by any of the characters '0', '-', ' ', or + each of which acts as a flag, + - is optionally followed by a width specification: a nonempty digit + sequence, + - is optionally followed by '.' and a precision specification: a nonempty + digit sequence, + - is finished by a specifier + - 's', 'q', that needs a string argument, + - 'd', 'i', 'o', 'u', 'X', 'x', that need an integer argument, + - 'A', 'a', 'E', 'e', 'f', 'G', 'g', that need a floating-point argument, + - 'c', that needs a character argument. + Additionally there is the directive '%%', which takes no argument. + + Note: Lua does not distinguish between integer, floating-point + and character arguments, since it has a number data type only. + However, we should not allow users to use %d instead of %c. + The same applies to %s and %q - we should not allow intermixing them. + */ + +enum format_arg_type +{ + FAT_INTEGER, + FAT_CHARACTER, + FAT_FLOAT, + FAT_STRING, + FAT_ESCAPED_STRING +}; + +struct spec +{ + unsigned int directives; + unsigned int format_args_count; + unsigned int allocated; + enum format_arg_type *format_args; +}; + +/* Locale independent test for a decimal digit. + Argument can be 'char' or 'unsigned char'. (Whereas the argument of + isdigit must be an 'unsigned char'.) */ +#undef isdigit +#define isdigit(c) ((unsigned int) ((c) - '0') < 10) + +static void format_free (void *descr); + +static void * +format_parse (const char *format, bool translated, char *fdi, + char **invalid_reason) +{ + + const char *format_start = format; + const char *fatstr = format; + struct spec *result = NULL; + result = XMALLOC (struct spec); + result->directives = 0; + result->allocated = 0; + result->format_args_count = 0; + result->format_args = NULL; + + + for (; *fatstr != '\0';) + { + if (*fatstr++ == '%') + { + FDI_SET (fatstr - 1, FMTDIR_START); + result->directives++; + + if (*fatstr != '%') + { + enum format_arg_type type; + + /* Remove width. */ + while (isdigit (*fatstr)) + fatstr++; + + if (*fatstr == '.') + { + fatstr++; + + /* Remove precision. */ + while (isdigit (*fatstr)) + fatstr++; + } + + switch (*fatstr) + { + case 'c': + type = FAT_CHARACTER; + break; + case 'd': + case 'i': + case 'o': + case 'u': + case 'X': + case 'x': + type = FAT_INTEGER; + break; + case 'a': + case 'A': + case 'E': + case 'e': + case 'f': + case 'g': + case 'G': + type = FAT_FLOAT; + break; + case 's': + type = FAT_STRING; + break; + case 'q': + type = FAT_ESCAPED_STRING; + break; + default: + if (*fatstr == '\0') + { + *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); + FDI_SET (fatstr - 1, FMTDIR_ERROR); + } + else + { + *invalid_reason = + INVALID_CONVERSION_SPECIFIER (result-> + format_args_count + 1, + *fatstr); + FDI_SET (fatstr, FMTDIR_ERROR); + } + goto fmt_error; + } + + if (result->format_args_count == result->allocated) + { + result->allocated = 2 * result->allocated + 10; + result->format_args = + xrealloc (result->format_args, + result->allocated * + sizeof (enum format_arg_type)); + } + result->format_args[result->format_args_count++] = type; + } + FDI_SET (fatstr, FMTDIR_END); + fatstr++; + } + } + + return result; + +fmt_error: + format_free (result); + return NULL; +} + +static void +format_free (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + if (spec->format_args != NULL) + free (spec->format_args); + free (spec); +} + +static int +format_get_number_of_directives (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (void *msgid_descr, void *msgstr_descr, bool equality, + formatstring_error_logger_t error_logger, + const char *pretty_msgid, const char *pretty_msgstr) +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + + if (spec1->format_args_count + spec2->format_args_count > 0) + { + unsigned int i, n1, n2; + + n1 = spec1->format_args_count; + n2 = spec2->format_args_count; + + for (i = 0; i < n1 || i < n2; i++) + { + if (i >= n1) + { + if (error_logger) + error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"), + i + 1, pretty_msgstr, pretty_msgid); + return true; + } + else if (i >= n2) + { + if (error_logger) + error_logger (_("a format specification for argument %u doesn't exist in '%s'"), + i + 1, pretty_msgstr); + return true; + } + else if (spec1->format_args[i] != spec2->format_args[i]) + { + if (error_logger) + error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"), + pretty_msgid, pretty_msgstr, i + 1); + return true; + } + } + } + + return false; +} + +struct formatstring_parser formatstring_lua = +{ + format_parse, + format_free, + format_get_number_of_directives, + NULL, + format_check +}; + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include + +static void +format_print (void *descr) +{ + struct spec *spec = (struct spec *) descr; + unsigned int i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + printf ("("); + for (i = 0; i < spec->format_args_count; i++) + { + if (i > 0) + printf (" "); + switch (spec->format_args[i]) + { + case FAT_INTEGER: + printf ("i"); + break; + case FAT_FLOAT: + printf ("f"); + break; + case FAT_CHARACTER: + printf ("c"); + break; + case FAT_STRING: + printf ("s"); + break; + case FAT_ESCAPED_STRING: + printf ("q"); + break; + default: + abort (); + } + } + printf (")"); +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_size = 0; + int line_len; + char *invalid_reason; + void *descr; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + invalid_reason = NULL; + descr = format_parse (line, false, NULL, &invalid_reason); + + format_print (descr); + printf ("\n"); + if (descr == NULL) + printf ("%s\n", invalid_reason); + + free (invalid_reason); + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-lua.c ../gnulib-lib/libgettextlib.la" + * End: + */ + +#endif /* TEST */ diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c index e6c5de996..7200a9430 100644 --- a/gettext-tools/src/format.c +++ b/gettext-tools/src/format.c @@ -57,7 +57,8 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] = /* format_qt */ &formatstring_qt, /* format_qt_plural */ &formatstring_qt_plural, /* format_kde */ &formatstring_kde, - /* format_boost */ &formatstring_boost + /* format_boost */ &formatstring_boost, + /* format_lua */ &formatstring_lua }; /* Check whether both formats strings contain compatible format diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h index 60f0adcfc..9d3530eac 100644 --- a/gettext-tools/src/format.h +++ b/gettext-tools/src/format.h @@ -119,6 +119,7 @@ extern DLL_VARIABLE struct formatstring_parser formatstring_qt; extern DLL_VARIABLE struct formatstring_parser formatstring_qt_plural; extern DLL_VARIABLE struct formatstring_parser formatstring_kde; extern DLL_VARIABLE struct formatstring_parser formatstring_boost; +extern DLL_VARIABLE struct formatstring_parser formatstring_lua; /* Table of all format string parsers. */ extern DLL_VARIABLE struct formatstring_parser *formatstring_parsers[NFORMATS]; diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c index 5162b06f3..4d1ce6ec7 100644 --- a/gettext-tools/src/message.c +++ b/gettext-tools/src/message.c @@ -57,7 +57,8 @@ const char *const format_language[NFORMATS] = /* format_qt */ "qt", /* format_qt_plursl */ "qt-plural", /* format_kde */ "kde", - /* format_boost */ "boost" + /* format_boost */ "boost", + /* format_lua */ "lua" }; const char *const format_language_pretty[NFORMATS] = @@ -85,7 +86,8 @@ const char *const format_language_pretty[NFORMATS] = /* format_qt */ "Qt", /* format_qt_plural */ "Qt plural", /* format_kde */ "KDE", - /* format_boost */ "Boost" + /* format_boost */ "Boost", + /* format_lua */ "Lua" }; diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h index af9244aed..24d6c16b3 100644 --- a/gettext-tools/src/message.h +++ b/gettext-tools/src/message.h @@ -66,9 +66,10 @@ enum format_type format_qt, format_qt_plural, format_kde, - format_boost + format_boost, + format_lua }; -#define NFORMATS 24 /* Number of format_type enum values. */ +#define NFORMATS 25 /* Number of format_type enum values. */ extern DLL_VARIABLE const char *const format_language[NFORMATS]; extern DLL_VARIABLE const char *const format_language_pretty[NFORMATS]; diff --git a/gettext-tools/src/x-lua.c b/gettext-tools/src/x-lua.c new file mode 100644 index 000000000..fad7d0e8e --- /dev/null +++ b/gettext-tools/src/x-lua.c @@ -0,0 +1,1174 @@ +/* xgettext Lua backend. + Copyright (C) 2012-2013 Free Software Foundation, Inc. + + This file was written by Ľubomír Remák , 2012. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Specification. */ +#include "x-lua.h" + +#include +#include +#include +#include + +#include "message.h" +#include "xgettext.h" +#include "error.h" +#include "xalloc.h" +#include "gettext.h" +#include "po-charset.h" + +#define _(s) gettext(s) + +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +/* The Lua syntax is defined in the Lua manual section 9, + which can be found at + http://www.lua.org/manual/5.2/manual.html#9 */ + +/* If true extract all strings. */ +static bool extract_all = false; + +/* A hash table for keywords. */ +static hash_table keywords; +static bool default_keywords = true; + +/* Set extract_all flag (gettext will extract all strings). */ +void +x_lua_extract_all () +{ + extract_all = true; +} + +/* Adds a keyword. Copied from other lexers. */ +void +x_lua_keyword (const char *name) +{ + if (name == NULL) + default_keywords = false; + else + { + const char *end; + struct callshape shape; + const char *colon; + + if (keywords.table == NULL) + hash_init (&keywords, 100); + + split_keywordspec (name, &end, &shape); + + /* The characters between name and end should form a valid C identifier. + A colon means an invalid parse in split_keywordspec(). */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + insert_keyword_callshape (&keywords, name, end - name, &shape); + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + /* When adding new keywords here, also update the documentation in + xgettext.texi! */ + x_lua_keyword ("_"); + x_lua_keyword ("gettext.gettext"); + x_lua_keyword ("gettext.dgettext:2"); + x_lua_keyword ("gettext.dcgettext:2"); + x_lua_keyword ("gettext.ngettext:1,2"); + x_lua_keyword ("gettext.dngettext:2,3"); + x_lua_keyword ("gettext.dcngettext:2,3"); + default_keywords = false; + } +} + +void +init_flag_table_lua () +{ + xgettext_record_flag ("_:1:pass-lua-format"); + xgettext_record_flag ("gettext.gettext:1:pass-lua-format"); + xgettext_record_flag ("gettext.dgettext:2:pass-lua-format"); + xgettext_record_flag ("gettext.dcgettext:2:pass-lua-format"); + xgettext_record_flag ("gettext.ngettext:1:pass-lua-format"); + xgettext_record_flag ("gettext.ngettext:2:pass-lua-format"); + xgettext_record_flag ("gettext.dngettext:2:pass-lua-format"); + xgettext_record_flag ("gettext.dngettext:3:pass-lua-format"); + xgettext_record_flag ("gettext.dcngettext:2:pass-lua-format"); + xgettext_record_flag ("gettext.dcngettext:3:pass-lua-format"); + xgettext_record_flag ("string.format:1:lua-format"); +} + +/* ======================== Reading of characters. ======================== */ + + +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; + +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; + +/* The input file stream. */ +static FILE *fp; + + +/* 1. line_number handling. */ + +static unsigned char phase1_pushback[2]; +static int phase1_pushback_length; + +static int first_character = 1; + +static int +phase1_getc () +{ + int c; + + if (phase1_pushback_length) + c = phase1_pushback[--phase1_pushback_length]; + else + { + c = getc (fp); + + if (first_character) + { + first_character = 0; + + /* Ignore shebang line. No pushback required in this case. */ + if (c == '#') + { + while (c != '\n' && c != EOF) + c = getc (fp); + if (c == '\n') + { + line_number++; + c = getc (fp); + } + } + } + + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("error while reading \"%s\""), + real_file_name); + return EOF; + } + } + + if (c == '\n') + line_number++; + + return c; +} + +/* Supports 2 characters of pushback. */ + +static void +phase1_ungetc (int c) +{ + if (c != EOF) + { + if (c == '\n') + --line_number; + + if (phase1_pushback_length == SIZEOF (phase1_pushback)) + abort (); + phase1_pushback[phase1_pushback_length++] = c; + } +} + + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + +/* Accumulating comments. */ + +static char *buffer; +static size_t bufmax; +static size_t buflen; + +static inline void +comment_start () +{ + buflen = 0; +} + +static inline void +comment_add (int c) +{ + if (buflen >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen++] = c; +} + +static inline void +comment_line_end (size_t chars_to_remove) +{ + buflen -= chars_to_remove; + while (buflen >= 1 + && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) + --buflen; + if (chars_to_remove == 0 && buflen >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen] = '\0'; + savable_comment_add (buffer); +} + +/* Eats characters until '\n' and adds them to the comment. */ +static void +eat_comment_line () +{ + for (;;) + { + int c = phase1_getc (); + if (c == '\n' || c == EOF) + { + comment_line_end (0); + break; + } + + if (!(buflen == 0 && (c == ' ' || c == '\t'))) + comment_add (c); + } +} + +static int +phase2_getc () +{ + int c; + int lineno; + + c = phase1_getc (); + + if (c == '-') + { + c = phase1_getc (); + + if (c == '-') + { + /* It starts with '--', so it must be either a short or a long + comment. */ + c = phase1_getc (); + + if (c == '[') + { + c = phase1_getc (); + + int esigns = 0; + while (c == '=') + { + esigns++; + c = phase1_getc (); + } + + if (c == '[') + { + /* Long comment. */ + bool right_bracket = false; + bool end = false; + int esigns2 = 0; + + lineno = line_number; + comment_start (); + while (!end) + { + c = phase1_getc (); + + if (c == EOF) + break; + + /* Ignore leading spaces and tabs. */ + if (buflen == 0 && (c == ' ' || c == '\t')) + continue; + + comment_add (c); + + switch (c) + { + case ']': + if (!right_bracket) + { + right_bracket = true; + esigns2 = 0; + } + else + { + if (esigns2 == esigns) + { + comment_line_end (2 + esigns); + end = true; + } + } + break; + + case '=': + if (right_bracket) + esigns2++; + break; + + case '\n': + comment_line_end (1); + comment_start (); + lineno = line_number; + /* Intentionally not breaking. */ + + default: + right_bracket = false; + } + } + last_comment_line = lineno; + return ' '; + } + else + { + /* One line (short) comment, starting with '--[=...='. */ + lineno = last_comment_line; + comment_start (); + comment_add ('['); + while (esigns--) + comment_add ('='); + phase1_ungetc (c); + eat_comment_line (); + last_comment_line = lineno; + return '\n'; + } + } + else + { + /* One line (short) comment. */ + lineno = line_number; + comment_start (); + phase1_ungetc (c); + eat_comment_line (); + last_comment_line = lineno; + return '\n'; + } + } + else + { + /* Minus sign. */ + phase1_ungetc (c); + return '-'; + } + } + else + return c; +} + +/* ========================== Reading of tokens. ========================== */ + +enum token_type_ty +{ + token_type_eof, + token_type_lparen, /* ( */ + token_type_rparen, /* ) */ + token_type_lbracket, /* [ */ + token_type_rbracket, /* ] */ + token_type_comma, /* , */ + token_type_dot, /* . */ + token_type_doubledot, /* .. */ + token_type_operator1, /* + - * / % not # - ^ */ + token_type_operator2, /* < > <= >= ~= == and or */ + token_type_string, + token_type_number, + token_type_symbol, + token_type_other +}; + +typedef enum token_type_ty token_type_ty; + +typedef struct token_ty token_ty; +struct token_ty +{ + token_type_ty type; + char *string; /* for token_type_string_literal, token_type_symbol */ + refcounted_string_list_ty *comment; /* for token_type_string_literal */ + int line_number; +}; + +/* Free the memory pointed to by a 'struct token_ty'. */ +static inline void +free_token (token_ty *tp) +{ + if (tp->type == token_type_string || tp->type == token_type_symbol) + free (tp->string); + if (tp->type == token_type_string) + drop_reference (tp->comment); +} + +/* Our current string. */ +static int string_buf_length; +static int string_buf_alloc; +static char *string_buf; + +static void +string_start () +{ + string_buf_length = 0; +} + +static void +string_add (int c) +{ + if (string_buf_length >= string_buf_alloc) + { + string_buf_alloc = 2 * string_buf_alloc + 10; + string_buf = xrealloc (string_buf, string_buf_alloc); + } + + string_buf[string_buf_length++] = c; +} + +static void +string_end () +{ + string_buf[string_buf_length] = '\0'; +} + + +/* We need 3 pushback tokens for string optimization. */ +static int phase3_pushback_length; +static token_ty phase3_pushback[3]; + + +static void +phase3_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + { + if (phase3_pushback_length == SIZEOF (phase3_pushback)) + abort (); + phase3_pushback[phase3_pushback_length++] = *tp; + } +} + +static void +phase3_get (token_ty *tp) +{ + int c; + int c2; + int c_start; + + if (phase3_pushback_length) + { + *tp = phase3_pushback[--phase3_pushback_length]; + return; + } + + tp->string = NULL; + + for (;;) + { + tp->line_number = line_number; + c = phase2_getc (); + + switch (c) + { + case EOF: + tp->type = token_type_eof; + return; + + case '\n': + if (last_non_comment_line > last_comment_line) + savable_comment_reset (); + /* Intentionally not breaking. */ + case ' ': + case '\t': + case '\f': + continue; + + case '+': + case '-': + case '*': + case '/': + case '^': + case '%': + case '#': + tp->type = token_type_operator1; + return; + case '<': + case '>': + case '=': + c2 = phase1_getc (); + if (c2 != '=') + phase1_ungetc (c2); + tp->type = token_type_operator2; + return; + case '~': + c2 = phase1_getc (); + if (c2 == '=') + { + tp->type = token_type_operator2; + return; + } + else + phase1_ungetc (c2); + continue; + case '(': + tp->type = token_type_lparen; + return; + case ')': + tp->type = token_type_rparen; + return; + case ',': + tp->type = token_type_comma; + return; + + case ';': + tp->type = token_type_other; + return; + + /* There are three operators beginning with a dot. '.', + '..' and '...'. The most useful for us is the string + concatenation operator ('..'). */ + case '.': + c = phase1_getc (); + if (c == '.') + { + c = phase1_getc (); + if (c == '.') + { + tp->type = token_type_other; + return; + } + else + { + phase1_ungetc (c); + tp->type = token_type_doubledot; + return; + } + } + else if (c >= '0' && c <= '9') + { + /* It's a number. We aren't interested in the actual + numeric value, so ignore the dot and let next + iteration eat the number. */ + phase1_ungetc (c); + continue; + } + else + { + phase1_ungetc (c); + tp->type = token_type_dot; + return; + } + + case '"': + case '\'': + c_start = c; + string_start (); + + for (;;) + { + /* We need unprocessed characters from phase 1. */ + c = phase1_getc (); + + /* We got '\', this is probably an escape sequence. */ + if (c == '\\') + { + c = phase1_getc (); + switch (c) + { + case 'a': + string_add ('\a'); + break; + case 'b': + string_add ('\b'); + break; + case 'f': + string_add ('\f'); + break; + case 'n': + string_add ('\n'); + break; + case 'r': + string_add ('\r'); + break; + case 't': + string_add ('\t'); + break; + case 'v': + string_add ('\v'); + break; + + default: + /* Check if it's a '\ddd' sequence. */ + if (c >= '0' && c <= '9') + { + int num = 0; + int i = 0; + + while (c >= '0' && c <= '9' && i < 3) + { + num *= 10; + num += (c - '0'); + c = phase1_getc (); + i++; + } + + /* The last read character is either a + non-number or another number after our + '\ddd' sequence. We need to ungetc it. */ + phase1_ungetc (c); + + /* The sequence number is too big, this + causes a lexical error. Ignore it. */ + if (num < 256) + string_add (num); + } + else + string_add (c); + } + } + else if (c == c_start || c == EOF || c == '\n') + { + /* End of string. */ + string_end (); + tp->string = xstrdup (string_buf); + tp->comment = add_reference (savable_comment); + tp->type = token_type_string; + return; + } + else + string_add (c); + } + break; + + case '[': + c = phase1_getc (); + + /* Count the number of equal signs. */ + int esigns = 0; + while (c == '=') + { + esigns++; + c = phase1_getc (); + } + + if (c != '[') + { + /* We did not find what we were looking for, ungetc it. */ + phase1_ungetc (c); + if (esigns == 0) + { + /* Our current character isn't '[' and we got 0 equal + signs, so the first '[' must have been a left + bracket. */ + tp->type = token_type_lbracket; + return; + } + else + /* Lexical error, ignore it. */ + continue; + } + + string_start (); + + for (;;) + { + c = phase1_getc (); + + if (c == ']') + { + c = phase1_getc (); + + /* Count the number of equal signs. */ + int esigns2 = 0; + while (c == '=') + { + esigns2++; + c = phase1_getc (); + } + + if (c == ']' && esigns == esigns2) + { + /* We got ']==...==]', where the number of equal + signs matches the number of equal signs in + the opening bracket. */ + string_end (); + tp->string = xstrdup (string_buf); + tp->comment = add_reference (savable_comment); + tp->type = token_type_string; + return; + } + else + { + /* Otherwise we got either ']==' garbage or + ']==...==]' with a different number of equal + signs. + + Add ']' and equal signs to the string, and + ungetc the current character, because the + second ']' might be a part of another closing + long bracket, e.g. '==]===]'. */ + phase1_ungetc (c); + + string_add (']'); + while (esigns2--) + string_add ('='); + } + } + else + { + if (c == EOF) + { + string_end (); + tp->string = xstrdup (string_buf); + tp->comment = add_reference (savable_comment); + tp->type = token_type_string; + return; + } + else + string_add (c); + } + } + break; + + case ']': + tp->type = token_type_rbracket; + return; + + default: + if (c >= '0' && c <= '9') + { + while (c >= '0' && c <= '9') + c = phase1_getc (); + + if (c == '.') + { + c = phase1_getc (); + while (c >= '0' && c <= '9') + c = phase1_getc (); + } + + if (c == 'e' || c == 'E') + { + if (c == '+' || c == '-') + c = phase1_getc (); + while (c >= '0' && c <= '9') + c = phase1_getc (); + } + + phase1_ungetc (c); + + tp->type = token_type_number; + return; + } + else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || c == '_') + { + string_start (); + while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || c == '_' || (c >= '0' && c <= '9')) + { + string_add (c); + c = phase1_getc (); + } + string_end (); + phase1_ungetc (c); + + if (strcmp (string_buf, "not") == 0) + tp->type = token_type_operator1; + else if (strcmp (string_buf, "and") == 0) + tp->type = token_type_operator2; + else if (strcmp (string_buf, "or") == 0) + tp->type = token_type_operator2; + else + { + tp->string = xstrdup (string_buf); + tp->type = token_type_symbol; + } + return; + } + else + tp->type = token_type_other; + } + } +} + +/* String and symbol concatenation. */ + +static token_type_ty phase4_last; + +/* We need 3 pushback tokens for string and symbol concatenation. */ +static int phase4_pushback_length; +static token_ty phase4_pushback[3]; + +static void +phase4_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + { + if (phase4_pushback_length == SIZEOF (phase4_pushback)) + abort (); + phase4_pushback[phase4_pushback_length++] = *tp; + } +} + +static void +phase4_get (token_ty *tp) +{ + if (phase4_pushback_length) + { + *tp = phase4_pushback[--phase4_pushback_length]; + phase4_last = tp->type; + return; + } + + phase3_get (tp); + if (tp->type == token_type_string + && !(phase4_last == token_type_operator1 + || phase4_last == token_type_dot + || phase4_last == token_type_symbol + || phase4_last == token_type_doubledot + || phase4_last == token_type_rparen)) + { + char *sum = tp->string; + size_t sum_len = strlen (sum); + + for (;;) + { + token_ty token2; + + phase3_get (&token2); + if (token2.type == token_type_doubledot) + { + token_ty token3; + + phase3_get (&token3); + if (token3.type == token_type_string) + { + token_ty token_after; + + phase3_get (&token_after); + if (token_after.type != token_type_operator1) + { + char *addend = token3.string; + size_t addend_len = strlen (addend); + + sum = (char *) xrealloc (sum, sum_len + addend_len + 1); + memcpy (sum + sum_len, addend, addend_len + 1); + sum_len += addend_len; + + phase3_unget (&token_after); + free_token (&token3); + free_token (&token2); + continue; + } + phase3_unget (&token_after); + } + phase3_unget (&token3); + } + phase3_unget (&token2); + break; + } + tp->string = sum; + } + phase4_last = tp->type; +} + +static void +phase5_get (token_ty *tp) +{ + phase4_get (tp); + + /* Combine symbol1 . ... . symbolN to a single strings, so that + we can recognize function calls like + gettext.gettext. The information present for + symbolI.....symbolN has precedence over the information for + symbolJ.....symbolN with J > I. */ + if (tp->type == token_type_symbol) + { + char *sum = tp->string; + size_t sum_len = strlen (sum); + + for (;;) + { + token_ty token2; + + phase4_get (&token2); + if (token2.type == token_type_dot) + { + token_ty token3; + + phase4_get (&token3); + if (token3.type == token_type_symbol) + { + char *addend = token3.string; + size_t addend_len = strlen (addend); + + sum = (char *) xrealloc (sum, sum_len + 1 + addend_len + 1); + sum[sum_len] = '.'; + memcpy (sum + sum_len + 1, addend, addend_len + 1); + sum_len += 1 + addend_len; + + free_token (&token2); + free_token (&token3); + continue; + } + phase4_unget (&token3); + } + phase4_unget (&token2); + break; + } + tp->string = sum; + } +} + +static void +x_lua_lex (token_ty *tok) +{ + phase5_get (tok); +} + + +/* ========================= Extracting strings. ========================== */ + + +/* Context lookup table. */ +static flag_context_list_table_ty *flag_context_list_table; + + +/* The file is broken into tokens. Scan the token stream, looking for + a keyword, followed by a left paren, followed by a string. When we + see this sequence, we have something to remember. We assume we are + looking at a valid Lua program, and leave the complaints about the + grammar to the compiler. + + Normal handling: Look for + keyword ( ... msgid ... ) + keyword msgid + Plural handling: Look for + keyword ( ... msgid ... msgid_plural ... ) + + We use recursion because the arguments before msgid or between msgid + and msgid_plural can contain subexpressions of the same form. */ + +/* Extract messages until the next balanced closing parenthesis or bracket. + Extracted messages are added to MLP. + DELIM can be either token_type_rparen or token_type_rbracket, or + token_type_eof to accept both. + Return true upon eof, false upon closing parenthesis or bracket. */ +static bool +extract_balanced (message_list_ty *mlp, token_type_ty delim, + flag_context_ty outer_context, + flag_context_list_iterator_ty context_iter, + struct arglist_parser *argparser) +{ + /* Current argument number. */ + int arg = 1; + /* 0 when no keyword has been seen. 1 right after a keyword is seen. */ + int state; + /* Parameters of the keyword just seen. Defined only in state 1. */ + const struct callshapes *next_shapes = NULL; + /* Context iterator that will be used if the next token is a '('. */ + flag_context_list_iterator_ty next_context_iter = + passthrough_context_list_iterator; + /* Current context. */ + flag_context_ty inner_context = + inherited_context (outer_context, + flag_context_list_iterator_advance (&context_iter)); + + /* Start state is 0. */ + state = 0; + + for (;;) + { + token_ty token; + + x_lua_lex (&token); + + switch (token.type) + { + case token_type_symbol: + { + void *keyword_value; + + if (hash_find_entry (&keywords, token.string, strlen (token.string), + &keyword_value) + == 0) + { + next_shapes = (const struct callshapes *) keyword_value; + state = 1; + } + else + state = 0; + } + next_context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + token.string, strlen (token.string))); + free (token.string); + continue; + + case token_type_lparen: + if (extract_balanced (mlp, token_type_rparen, + inner_context, next_context_iter, + arglist_parser_alloc (mlp, + state ? next_shapes : NULL))) + { + arglist_parser_done (argparser, arg); + return true; + } + next_context_iter = null_context_list_iterator; + state = 0; + break; + + case token_type_rparen: + if (delim == token_type_rparen || delim == token_type_eof) + { + arglist_parser_done (argparser, arg); + return false; + } + + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_lbracket: + if (extract_balanced (mlp, token_type_rbracket, + null_context, null_context_list_iterator, + arglist_parser_alloc (mlp, NULL))) + { + arglist_parser_done (argparser, arg); + return true; + } + next_context_iter = null_context_list_iterator; + state = 0; + break; + + case token_type_rbracket: + if (delim == token_type_rbracket || delim == token_type_eof) + { + arglist_parser_done (argparser, arg); + return false; + } + + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_comma: + arg++; + inner_context = + inherited_context (outer_context, + flag_context_list_iterator_advance ( + &context_iter)); + next_context_iter = passthrough_context_list_iterator; + state = 0; + continue; + + case token_type_eof: + arglist_parser_done (argparser, arg); + return true; + + case token_type_string: + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + + if (extract_all) + remember_a_message (mlp, NULL, token.string, inner_context, + &pos, NULL, token.comment); + else + { + /* A string immediately after a symbol means a function call. */ + if (state) + { + struct arglist_parser *tmp_argparser; + tmp_argparser = arglist_parser_alloc (mlp, next_shapes); + + arglist_parser_remember (tmp_argparser, 1, token.string, + inner_context, pos.file_name, + pos.line_number, token.comment); + arglist_parser_done (tmp_argparser, 1); + } + else + arglist_parser_remember (argparser, arg, token.string, + inner_context, pos.file_name, + pos.line_number, token.comment); + } + } + drop_reference (token.comment); + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_dot: + case token_type_operator1: + case token_type_operator2: + case token_type_number: + case token_type_other: + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + default: + abort (); + } + } +} + +void +extract_lua (FILE *f, + const char *real_filename, const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp) +{ + message_list_ty *mlp = mdlp->item[0]->messages; + + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + last_comment_line = -1; + last_non_comment_line = -1; + + flag_context_list_table = flag_table; + + init_keywords (); + + /* Eat tokens until eof is seen. When extract_parenthesized returns + due to an unbalanced closing parenthesis, just restart it. */ + while (!extract_balanced (mlp, token_type_eof, + null_context, null_context_list_iterator, + arglist_parser_alloc (mlp, NULL))) + ; + + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} diff --git a/gettext-tools/src/x-lua.h b/gettext-tools/src/x-lua.h new file mode 100644 index 000000000..bca030491 --- /dev/null +++ b/gettext-tools/src/x-lua.h @@ -0,0 +1,48 @@ +/* xgettext Lua backend. + Copyright (C) 2011 Free Software Foundation, Inc. + Written by Ľubomír Remák , 2011 + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "message.h" +#include "xgettext.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define EXTENSIONS_LUA \ + { "lua", "Lua" }, \ + +#define SCANNERS_LUA \ + { "Lua", extract_lua, \ + &flag_table_lua, &formatstring_lua, NULL }, \ + + /* Scan a Lua file and add its translatable strings to mdlp. */ + extern void extract_lua (FILE * fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty * flag_table, + msgdomain_list_ty * mdlp); + + extern void x_lua_keyword (const char *keyword); + extern void x_lua_extract_all (void); + + extern void init_flag_table_lua (void); + +#ifdef __cplusplus +} +#endif diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index 90184b7f7..032d0aea5 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -92,6 +92,7 @@ #include "x-stringtable.h" #include "x-rst.h" #include "x-glade.h" +#include "x-lua.h" /* If nonzero add all comments immediately preceding one of the keywords. */ @@ -160,6 +161,7 @@ static flag_context_list_table_ty flag_table_ycp; static flag_context_list_table_ty flag_table_tcl; static flag_context_list_table_ty flag_table_perl; static flag_context_list_table_ty flag_table_php; +static flag_context_list_table_ty flag_table_lua; /* If true, recognize Qt format strings. */ static bool recognize_format_qt; @@ -331,6 +333,7 @@ main (int argc, char *argv[]) init_flag_table_tcl (); init_flag_table_perl (); init_flag_table_php (); + init_flag_table_lua (); while ((optchar = getopt_long (argc, argv, "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:", @@ -355,6 +358,7 @@ main (int argc, char *argv[]) x_perl_extract_all (); x_php_extract_all (); x_glade_extract_all (); + x_lua_extract_all (); break; case 'c': @@ -432,6 +436,7 @@ main (int argc, char *argv[]) x_perl_keyword (optarg); x_php_keyword (optarg); x_glade_keyword (optarg); + x_lua_keyword (optarg); if (optarg == NULL) no_default_keywords = true; else @@ -857,7 +862,7 @@ Choice of input file language:\n")); (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\ EmacsLisp, librep, Scheme, Smalltalk, Java,\n\ JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\ - GCC-source, NXStringTable, RST, Glade)\n")); + Lua, GCC-source, NXStringTable, RST, Glade)\n")); printf (_("\ -C, --c++ shorthand for --language=C++\n")); printf (_("\ @@ -890,21 +895,21 @@ Language specific options:\n")); printf (_("\ (only languages C, C++, ObjectiveC, Shell,\n\ Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\ - C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n")); + C#, awk, Tcl, Perl, PHP, Lua, GCC-source, Glade)\n")); printf (_("\ -kWORD, --keyword=WORD look for WORD as an additional keyword\n\ -k, --keyword do not to use default keywords\n")); printf (_("\ (only languages C, C++, ObjectiveC, Shell,\n\ Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\ - C#, awk, Tcl, Perl, PHP, GCC-source, Glade)\n")); + C#, awk, Tcl, Perl, PHP, Lua, GCC-source, Glade)\n")); printf (_("\ --flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\ number ARG of keyword WORD\n")); printf (_("\ (only languages C, C++, ObjectiveC, Shell,\n\ Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\ - C#, awk, YCP, Tcl, Perl, PHP, GCC-source)\n")); + C#, awk, YCP, Tcl, Perl, PHP, Lua, GCC-source)\n")); printf (_("\ -T, --trigraphs understand ANSI C trigraphs for input\n")); printf (_("\ @@ -1768,6 +1773,11 @@ xgettext_record_flag (const char *optionstring) name_start, name_end, argnum, value, pass); break; + case format_lua: + flag_context_list_table_insert (&flag_table_lua, 0, + name_start, name_end, + argnum, value, pass); + break; default: abort (); } @@ -3185,6 +3195,7 @@ language_to_extractor (const char *name) SCANNERS_STRINGTABLE SCANNERS_RST SCANNERS_GLADE + SCANNERS_LUA /* Here may follow more languages and their scanners: pike, etc... Make sure new scanners honor the --exclude-file option. */ }; @@ -3268,6 +3279,7 @@ extension_to_language (const char *extension) EXTENSIONS_STRINGTABLE EXTENSIONS_RST EXTENSIONS_GLADE + EXTENSIONS_LUA /* Here may follow more file extensions... */ }; diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index a8c85f058..46cb9952c 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,13 @@ +2013-04-11 Ľubomír Remák + + Support for Lua. + * format-lua-1: New file. + * format-lua-2: New file. + * xgettext-lua-1: New file. + * xgettext-lua-2: New file. + * lang-lua: New file. + * Makefile.am (TESTS): Add them. + 2013-03-03 Miguel Angel Arruga Vivas Add a test case for atkproperty tag in Glade2 files. diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index e66185e8c..340523934 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -100,6 +100,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ xgettext-stringtable-1 \ xgettext-tcl-1 xgettext-tcl-2 xgettext-tcl-3 \ xgettext-ycp-1 xgettext-ycp-2 xgettext-ycp-3 xgettext-ycp-4 \ + xgettext-lua-1 xgettext-lua-2 \ format-awk-1 format-awk-2 \ format-boost-1 format-boost-2 \ format-c-1 format-c-2 format-c-3 format-c-4 format-c-5 \ @@ -123,12 +124,14 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ format-sh-1 format-sh-2 \ format-tcl-1 format-tcl-2 \ format-ycp-1 format-ycp-2 \ + format-lua-1 format-lua-2 \ plural-1 plural-2 \ gettextpo-1 \ lang-c lang-c++ lang-objc lang-sh lang-bash lang-python-1 \ lang-python-2 lang-clisp lang-elisp lang-librep lang-guile \ lang-smalltalk lang-java lang-csharp lang-gawk lang-pascal \ - lang-ycp lang-tcl lang-perl-1 lang-perl-2 lang-php lang-po lang-rst + lang-ycp lang-tcl lang-perl-1 lang-perl-2 lang-php lang-po lang-rst \ + lang-lua EXTRA_DIST += $(TESTS) \ test.mo xg-c-1.ok.po mex-test2.ok \ diff --git a/gettext-tools/tests/format-lua-1 b/gettext-tools/tests/format-lua-1 new file mode 100755 index 000000000..722a6e095 --- /dev/null +++ b/gettext-tools/tests/format-lua-1 @@ -0,0 +1,98 @@ +#! /bin/sh + +# Test recognition of Lua format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-lu-1.data" +cat <<\EOF > f-lu-1.data +# Valid: no argument +"abc%%" +# Valid: one string argument +"abc%s" +# Valid: one string argument +"abc%q" +# Valid: one character argument +"abc%c" +# Valid: one integer argument +"abc%i" +# Valid: one integer argument +"abc%d" +# Valid: one integer argument +"abc%o" +# Valid: one integer argument +"abc%u" +# Valid: one integer argument +"abc%X" +# Valid: one integer argument +"abc%x" +# Valid: one float argument +"abc%e" +# Valid: one float argument +"abc%E" +# Valid: one float argument +"abc%f" +# Valid: one float argument +"abc%g" +# Valid: one float argument +"abc%G" +# Valid: one float argument with width +"abc%4f" +# Valid: one float argument with precision +"abc%.8f" +# Valid: one float argument with width and precision +"abc%12.4f" +# Valid: three arguments +"abc%c%d%e" +# Valid: three arguments with width and/or precision +"abc%4.3s%.3f%0e" +# Invalid: unterminated +"abc%" +# Invalid: unknown format specifier +"abc%T" +# Invalid: unknown modifier +"abc%lf" +# Invalid: extra precision +"abc%1.1.1f" +# Invalid: unterminated +"abc%3" +EOF +tmpfiles="$tmpfiles f-l-1.err" +: ${XGETTEXT=xgettext} +n=0 +while read comment; do + read string + n=`expr $n + 1` + tmpfiles="$tmpfiles f-lu-1-$n.in f-lu-1-$n.po" + echo "gettext.gettext(${string});" > f-lu-1-$n.in + ${XGETTEXT} -L Lua -o f-lu-1-$n.po f-lu-1-$n.in || exit 1 + test -f f-lu-1-$n.po || exit 1 + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if grep lua-format f-lu-1-$n.po > /dev/null; then + : + else + fail=yes + fi + else + if grep lua-format f-lu-1-$n.po > /dev/null; then + fail=yes + else + : + fi + fi + if test -n "$fail"; then + echo "Format string recognition error:" 1>&2 + cat f-lu-1-$n.in 1>&2 + echo "Got:" 1>&2 + cat f-lu-1-$n.po 1>&2 + exit 1 + fi + rm -f f-lu-1-$n.in f-lu-1-$n.po +done < f-lu-1.data + +rm -fr $tmpfiles + +exit 0 + diff --git a/gettext-tools/tests/format-lua-2 b/gettext-tools/tests/format-lua-2 new file mode 100755 index 000000000..6e38bf60f --- /dev/null +++ b/gettext-tools/tests/format-lua-2 @@ -0,0 +1,124 @@ +#! /bin/sh + +# Test checking of Python format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-lu-2.data" +cat <<\EOF > f-lu-2.data +# Valid: %% doesn't count +msgid "abc%%def" +msgstr "xyz" +# Invalid: invalid msgstr +msgid "abc%%def" +msgstr "xyz%" +# Valid: same arguments, with different widths +msgid "abc%2sdef" +msgstr "xyz%3s" +# Invalid: too few arguments +msgid "abc%sdef%u" +msgstr "xyz%s" +# Invalid: too many arguments +msgid "abc%udef" +msgstr "xyz%uvw%c" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%d" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%u" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%E" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%f" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%g" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%.0s" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%i" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%i" +# Invalid: type incompatibility +msgid "abc%.0s" +msgstr "xyz%i" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%.0s" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%q" +# Invalid: type incompatibility +msgid "abc%q" +msgstr "xyz%s" +EOF + +: ${MSGFMT=msgfmt} +n=0 +while read comment; do + read msgid_line + read msgstr_line + n=`expr $n + 1` + tmpfiles="$tmpfiles f-lu-2-$n.po f-lu-2-$n.mo" + cat < f-lu-2-$n.po +#, lua-format +${msgid_line} +${msgstr_line} +EOF + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if ${MSGFMT} --check-format -o f-lu-2-$n.mo f-lu-2-$n.po; then + : + else + fail=yes + fi + else + ${MSGFMT} --check-format -o f-lu-2-$n.mo f-lu-2-$n.po 2> /dev/null + if test $? = 1; then + : + else + fail=yes + fi + fi + if test -n "$fail"; then + echo "Format string checking error:" 1>&2 + cat f-lu-2-$n.po 1>&2 + exit 1 + fi + rm -f f-lu-2-$n.po f-lu-2-$n.mo +done < f-lu-2.data + +rm -fr $tmpfiles + +exit 0 diff --git a/gettext-tools/tests/lang-lua b/gettext-tools/tests/lang-lua new file mode 100755 index 000000000..31d1394f2 --- /dev/null +++ b/gettext-tools/tests/lang-lua @@ -0,0 +1,154 @@ +#! /bin/sh + +# Test of gettext facilities in the Lua language. +# Assumes an fr_FR locale is installed. +# Assumes the following packages are installed: lua, lua-gettext. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles prog.lua" +cat <<\EOF > prog.lua +require("gettext") +gettext.bindtextdomain("prog", ".") +gettext.textdomain("prog") +_ = gettext.gettext +n = 42 + +print(gettext.gettext("'Your command, please?', asked the waiter.")) +print(string.format(gettext.ngettext('a piece of cake', [[%d pieces of cake]], n), n)) +print(string.format(_[==[%s is replaced by %s.]==], "FF", "EUR")) +EOF + +tmpfiles="$tmpfiles prog.tmp prog.pot" +: ${XGETTEXT=xgettext} +${XGETTEXT} -o prog.tmp --omit-header --no-location prog.lua +test $? = 0 || { rm -fr $tmpfiles; exit 1; } +LC_ALL=C tr -d '\r' < prog.tmp > prog.pot +test $? = 0 || { rm -fr $tmpfiles; exit 1; } + +tmpfiles="$tmpfiles prog.ok" +cat <<\EOF > prog.ok +msgid "'Your command, please?', asked the waiter." +msgstr "" + +#, lua-format +msgid "a piece of cake" +msgid_plural "%d pieces of cake" +msgstr[0] "" +msgstr[1] "" + +#, lua-format +msgid "%s is replaced by %s." +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} prog.ok prog.pot || exit 1 + +tmpfiles="$tmpfiles fr.po" +cat <<\EOF > fr.po +msgid "" +msgstr "" +"Content-Type: text/plain; charset=ISO-8859-1\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +msgid "'Your command, please?', asked the waiter." +msgstr "«Votre commande, s'il vous plait», dit le garçon." + +#, lua-format +msgid "a piece of cake" +msgid_plural "%d pieces of cake" +msgstr[0] "un morceau de gateau" +msgstr[1] "%d morceaux de gateau" + +#, lua-format +msgid "%s is replaced by %s." +msgstr "%s remplace %s." +EOF + +tmpfiles="$tmpfiles fr.po.tmp fr.po.new" +: ${MSGMERGE=msgmerge} +${MSGMERGE} -q -o fr.po.tmp fr.po prog.pot +test $? = 0 || { rm -fr $tmpfiles; exit 1; } +LC_ALL=C tr -d '\r' < fr.po.tmp > fr.po.new +test $? = 0 || { rm -fr $tmpfiles; exit 1; } + +: ${DIFF=diff} +${DIFF} fr.po fr.po.new || exit 1 + +tmpfiles="$tmpfiles fr" +test -d fr || mkdir fr +test -d fr/LC_MESSAGES || mkdir fr/LC_MESSAGES + +: ${MSGFMT=msgfmt} +${MSGFMT} -o fr/LC_MESSAGES/prog.mo fr.po + +# Test for presence of lua version 5.0 or newer with gettext support. +(lua -v) >/dev/null 2>/dev/null +test $? -le 1 \ + || { echo "Skipping test: lua not found"; rm -fr $tmpfiles; exit 77; } +case `lua -v 2>&1 | sed -e 's/^[^0-9]*//'` in + [5-9].*) ;; + *) echo "Skipping test: lua version too old"; rm -fr $tmpfiles; exit 77;; +esac +{ lua -v -l gettext > /dev/null 2>/dev/null; } \ + || { echo "Skipping test: lua gettext module not found" + rm -fr $tmpfiles; exit 77 + } + +# Test which of the fr_FR locales are installed. +: ${LOCALE_FR=fr_FR} +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR != none; then + LC_ALL=$LOCALE_FR ./testlocale + case $? in + 0) ;; + 77) LOCALE_FR=none;; + *) exit 1;; + esac +fi +if test $LOCALE_FR_UTF8 != none; then + LC_ALL=$LOCALE_FR_UTF8 ./testlocale + case $? in + 0) ;; + 77) LOCALE_FR_UTF8=none;; + *) exit 1;; + esac +fi +if test $LOCALE_FR = none && test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no french locale is installed" + else + echo "Skipping test: no french locale is supported" + fi + rm -fr $tmpfiles; exit 77 +fi + +tmpfiles="$tmpfiles prog.ok prog.oku prog.out" +: ${DIFF=diff} +cat <<\EOF > prog.ok +«Votre commande, s'il vous plait», dit le garçon. +42 morceaux de gateau +FF remplace EUR. +EOF +cat <<\EOF > prog.oku +«Votre commande, s'il vous plait», dit le garçon. +42 morceaux de gateau +FF remplace EUR. +EOF + +: ${LOCALE_FR=fr_FR} +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR != none; then + LANGUAGE= LC_ALL=$LOCALE_FR lua -l gettext prog.lua > prog.out || exit 1 + ${DIFF} prog.ok prog.out || exit 1 +fi +if test $LOCALE_FR_UTF8 != none; then + LANGUAGE= LC_ALL=$LOCALE_FR_UTF8 lua -l gettext prog.lua > prog.out || exit 1 + ${DIFF} prog.oku prog.out || exit 1 +fi + +rm -fr $tmpfiles + +exit 0 diff --git a/gettext-tools/tests/xgettext-lua-1 b/gettext-tools/tests/xgettext-lua-1 new file mode 100755 index 000000000..8ac845797 --- /dev/null +++ b/gettext-tools/tests/xgettext-lua-1 @@ -0,0 +1,71 @@ +#!/bin/sh + +# Test Lua support + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles xg-lu-1.lua" +cat <<\EOF > xg-lu-1.lua +print(_("This is a test string.")) +print(_('And another one.')) +print(_("Don't be afraid to extract this one.")) +print(_('I don\'t bite!')) +print(_("I said: \"This is a test.\"")) +print(_('He said: "You\'re right!"')) +print(_([[I like brackets.]])) +print(_([===[Brackets are awesome!]===])) +print(_(hmm["nope"])) +print({_"yep"}) +print(_["nope"]) +print(_("\097")) +EOF + +tmpfiles="$tmpfiles xg-lu-1.tmp.po xg-lu-1.po" +: ${XGETTEXT=xgettext} +${XGETTEXT} --omit-header --no-location --add-comments=TRANSLATORS: \ + -d xg-lu-1.tmp xg-lu-1.lua +test $? = 0 || { rm -fr $tmpfiles; exit 1; } +LC_ALL=C tr -d '\r' < xg-lu-1.tmp.po > xg-lu-1.po +test $? = 0 || { rm -fr $tmpfiles; exit 1; } + +tmpfiles="$tmpfiles xg-lu-1.ok" +cat < xg-lu-1.ok +msgid "This is a test string." +msgstr "" + +msgid "And another one." +msgstr "" + +msgid "Don't be afraid to extract this one." +msgstr "" + +msgid "I don't bite!" +msgstr "" + +msgid "I said: \"This is a test.\"" +msgstr "" + +msgid "He said: \"You're right!\"" +msgstr "" + +msgid "I like brackets." +msgstr "" + +msgid "Brackets are awesome!" +msgstr "" + +msgid "yep" +msgstr "" + +msgid "a" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-lu-1.ok xg-lu-1.po +result=$? + +rm -fr $tmpfiles + +exit $result diff --git a/gettext-tools/tests/xgettext-lua-2 b/gettext-tools/tests/xgettext-lua-2 new file mode 100755 index 000000000..4bd9fbbd9 --- /dev/null +++ b/gettext-tools/tests/xgettext-lua-2 @@ -0,0 +1,68 @@ +#!/bin/sh + +# Test Lua support + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles xg-lu-2.lua" +cat <<\EOF > xg-lu-2.lua +-- This comment won't be extracted. +print(_"Hmm.") +-- Neither this one. +print(_"Test.") +-- TRANSLATORS: Hello translators, how are you today? +print(_"We're fine, thanks.") +--[[ Do not extract this, please ]] + +--[[ TRANSLATORS: +Attention! +]] +print(--[=[ TRANSLATORS: This is a comment for you!]=] _(--[[TRANSLATORS: Nobody else is supposed to read this!]] +"flowers" --[===[ TRANSLATORS: Nobody will see this.]===] .. --[[ TRANSLATORS: How sad.]] " and " .. --[[Secret text!]] +"biscuits" --[=[TRANSLATORS: Hey you!]=])) + +--[==[TRANSLATORS: :-]]==] +print(_"A string.") +EOF + +tmpfiles="$tmpfiles xg-lu-2.tmp.po xg-lu-2.po" +: ${XGETTEXT=xgettext} +${XGETTEXT} --omit-header --no-location --add-comments=TRANSLATORS: \ + -d xg-lu-2.tmp xg-lu-2.lua +test $? = 0 || { rm -fr $tmpfiles; exit 1; } +LC_ALL=C tr -d '\r' < xg-lu-2.tmp.po > xg-lu-2.po +test $? = 0 || { rm -fr $tmpfiles; exit 1; } + +tmpfiles="$tmpfiles xg-lu-2.ok" +cat < xg-lu-2.ok +msgid "Hmm." +msgstr "" + +msgid "Test." +msgstr "" + +#. TRANSLATORS: Hello translators, how are you today? +msgid "We're fine, thanks." +msgstr "" + +#. TRANSLATORS: +#. Attention! +#. +#. TRANSLATORS: This is a comment for you! +#. TRANSLATORS: Nobody else is supposed to read this! +msgid "flowers and biscuits" +msgstr "" + +#. TRANSLATORS: :-] +msgid "A string." +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-lu-2.ok xg-lu-2.po +result=$? + +rm -fr $tmpfiles + +exit $result