From: Andreas Stricker Date: Tue, 23 Feb 2010 13:14:09 +0000 (+0100) Subject: Support for JavaScript. X-Git-Tag: v0.18.3~64 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fe8db3b5b44cd23385e3dfccf54249b742d8d994;p=thirdparty%2Fgettext.git Support for JavaScript. --- diff --git a/gettext-tools/ChangeLog b/gettext-tools/ChangeLog index 9d2aad69b..ca9e2f0f3 100644 --- a/gettext-tools/ChangeLog +++ b/gettext-tools/ChangeLog @@ -1,3 +1,8 @@ +2013-04-17 Andreas Stricker + + Support for JavaScript. + * woe32dll/gettextsrc-exports.c: Export formatstring_javascript. + 2013-04-15 Daiki Ueno * woe32dll/gettextsrc-exports.c: Export formatstring_lua. diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog index 5b597d67f..6eccb0655 100644 --- a/gettext-tools/doc/ChangeLog +++ b/gettext-tools/doc/ChangeLog @@ -1,3 +1,12 @@ +2013-04-17 Andreas Stricker + + Support for JavaScript. + * gettext.texi (PO Files): Mention javascript-format. + (javascript-format): New subsection. + (JavaScript): New subsection. + * xgettext.texi: Document JavaScript source language. Document + that it is applicable to --flag. + 2013-04-11 Ľubomír Remák Support for Lua. diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index f479ed853..e09632c9d 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -410,6 +410,7 @@ The Translator's View * kde-format:: KDE Format Strings * boost-format:: Boost Format Strings * lua-format:: Lua Format Strings +* javascript-format:: JavaScript Format Strings Individual Programming Languages @@ -435,6 +436,7 @@ Individual Programming Languages * Pike:: Pike * GCC-source:: GNU Compiler Collection sources * Lua:: Lua +* JavaScript:: JavaScript sh - Shell Script @@ -1631,6 +1633,12 @@ Likewise for Boost, see @ref{boost-format}. @kwindex no-lua-format@r{ flag} Likewise for Lua, see @ref{lua-format}. +@item javascript-format +@kwindex javascript-format@r{ flag} +@itemx no-javascript-format +@kwindex no-javascript-format@r{ flag} +Likewise for JavaScript, see @ref{javascript-format}. + @end table @kwindex msgctxt @@ -8974,6 +8982,7 @@ strings. * kde-format:: KDE Format Strings * boost-format:: Boost Format Strings * lua-format:: Lua Format Strings +* javascript-format:: JavaScript Format Strings @end menu @node c-format, objc-format, Translators for other Languages, Translators for other Languages @@ -9230,12 +9239,27 @@ such as @samp{%1$+5d}, or may be surrounded by vertical bars, such as @samp{%|1$+5d|} or @samp{%|1$+5|}, or consists of just an argument number between percent signs, such as @samp{%1%}. -@node lua-format, , boost-format, Translators for other Languages +@node lua-format, javascript-format, boost-format, Translators for other Languages @subsection Lua Format Strings Lua format strings are described in the Lua reference manual, section @w{String Manipulation}, @uref{http://www.lua.org/manual/5.1/manual.html#pdf-string.format}. +@node javascript-format, , lua-format, Translators for other Languages +@subsection JavaScript Format Strings + +Although JavaScript specification itself does not define any format +strings, many JavaScript implementations provide printf-like +functions. @code{xgettext} understands a set of common format strings +used in popular JavaScript implementations including Gjs, Seed, and +Node.JS. In such a format string, a directive starts with @samp{%} +and is finished by a specifier: @samp{%} denotes a literal percent +sign, @samp{c} denotes a character, @samp{s} denotes a string, +@samp{b}, @samp{d}, @samp{o}, @samp{x}, @samp{X} denote an integer, +@samp{f} denotes floating-point number, @samp{j} denotes a JSON +object. + + @node Maintainers for other Languages, List of Programming Languages, Translators for other Languages, Programming Languages @section The Maintainer's View @@ -9340,6 +9364,7 @@ that language, and to combine the resulting files using @code{msgcat}. * Pike:: Pike * GCC-source:: GNU Compiler Collection sources * Lua:: Lua +* JavaScript:: JavaScript @end menu @node C, sh, List of Programming Languages, List of Programming Languages @@ -11800,7 +11825,7 @@ Uses autoconf macros yes @end table -@node Lua, , GCC-source, List of Programming Languages +@node Lua, JavaScript, GCC-source, List of Programming Languages @subsection Lua @table @asis @@ -11862,6 +11887,60 @@ On platforms without gettext, the functions are not available. --- @end table +@node JavaScript +@subsection JavaScript + +@table @asis +@item RPMs +js + +@item File extension +@code{js} + +@item String syntax +@itemize @bullet + +@item @code{"abc"} + +@item @code{'abc'} + +@end itemize + +@item gettext shorthand +@code{_("abc")} + +@item gettext/ngettext functions +@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext}, +@code{dngettext} + +@item textdomain +@code{textdomain} function + +@item bindtextdomain +@code{bindtextdomain} function + +@item setlocale +automatic + +@item Prerequisite +--- + +@item Use or emulate GNU gettext +use, or emulate + +@item Extractor +@code{xgettext} + +@item Formatting with positions +--- + +@item Portability +On platforms without gettext, the functions are not available. + +@item po-mode marking +--- +@end table + @c This is the template for new languages. @ignore diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi index b6dd0dc43..ccee65794 100644 --- a/gettext-tools/doc/xgettext.texi +++ b/gettext-tools/doc/xgettext.texi @@ -73,7 +73,7 @@ are @code{C}, @code{C++}, @code{ObjectiveC}, @code{PO}, @code{Shell}, @code{Python}, @code{Lisp}, @code{EmacsLisp}, @code{librep}, @code{Scheme}, @code{Smalltalk}, @code{Java}, @code{JavaProperties}, @code{C#}, @code{awk}, @code{YCP}, @code{Tcl}, @code{Perl}, @code{PHP}, @code{GCC-source}, -@code{NXStringTable}, @code{RST}, @code{Glade}, @code{Lua}. +@code{NXStringTable}, @code{RST}, @code{Glade}, @code{Lua}, @code{JavaScript}. @item -C @itemx --c++ @@ -137,7 +137,7 @@ Extract all strings. This option has an effect with most languages, namely C, C++, ObjectiveC, Shell, Python, Lisp, EmacsLisp, librep, Java, C#, awk, Tcl, Perl, PHP, -GCC-source, Glade, Lua. +GCC-source, Glade, Lua, JavaScript. @item -k[@var{keywordspec}] @itemx --keyword[=@var{keywordspec}] @@ -180,7 +180,7 @@ escaped. This option has an effect with most languages, namely C, C++, ObjectiveC, Shell, Python, Lisp, EmacsLisp, librep, Java, C#, awk, Tcl, Perl, PHP, -GCC-source, Glade, Lua. +GCC-source, Glade, Lua, JavaScript. The default keyword specifications, which are always looked for if not explicitly disabled, are language dependent. They are: @@ -250,6 +250,11 @@ For Glade 1: @code{label}, @code{title}, @code{text}, @code{format}, For Lua: @code{_}, @code{gettext.gettext}, @code{gettext.dgettext:2}, @code{gettext.dcgettext:2}, @code{gettext.ngettext:1,2}, @code{gettext.dngettext:2,3}, @code{gettext.dcngettext:2,3}. + +@item +For JavaScript: @code{_}, @code{gettext}, @code{dgettext:2}, +@code{dcgettext:2}, @code{ngettext:1,2}, @code{dngettext:2,3}, +@code{pgettext:1c,2}, @code{dpgettext:2c,3}. @end itemize To disable the default keyword specifications, the option @samp{-k} or @@ -302,7 +307,7 @@ lead to a crash at runtime. @* This option has an effect with most languages, namely C, C++, ObjectiveC, Shell, Python, Lisp, EmacsLisp, librep, Scheme, Java, C#, awk, YCP, Tcl, Perl, PHP, -GCC-source, Lua. +GCC-source, Lua, JavaScript. @item -T @itemx --trigraphs diff --git a/gettext-tools/libgettextpo/ChangeLog b/gettext-tools/libgettextpo/ChangeLog index 576966f30..633f84e86 100644 --- a/gettext-tools/libgettextpo/ChangeLog +++ b/gettext-tools/libgettextpo/ChangeLog @@ -1,3 +1,8 @@ +2013-04-17 Andreas Stricker + + Support for JavaScript. + * Makefile.am (libgettextpo_la_AUXSOURCES): Add format-javascript.c. + 2013-04-11 Ľubomír Remák * Makefile.am (libgettextpo_la_AUXSOURCES): Add format-lua.c. diff --git a/gettext-tools/libgettextpo/Makefile.am b/gettext-tools/libgettextpo/Makefile.am index 9765f6ac1..417f44a50 100644 --- a/gettext-tools/libgettextpo/Makefile.am +++ b/gettext-tools/libgettextpo/Makefile.am @@ -70,6 +70,7 @@ libgettextpo_la_AUXSOURCES = \ ../src/format-librep.c \ ../src/format-scheme.c \ ../src/format-java.c \ + ../src/format-javascript.c \ ../src/format-csharp.c \ ../src/format-awk.c \ ../src/format-pascal.c \ diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 11a7c87cd..f5dc8a73d 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,28 @@ +2013-04-17 Andreas Stricker + + Support for JavaScript. + * message.h (format_type): New enum value 'format_javascript'. + (NFORMATS): Increment. + * message.c (format_language): Add format_javascript entry. + (format_language_pretty): Likewise. + * format.h (formatstring_javascript): New declaration. + * format-javascript.c: New file. + * format.c (formatstring_parsers): Add formatstring_javascript. + * x-javascript.h: New file. + * x-javascript.c: New file. + * xgettext.c: Include x-javascript.h. + (flag_table_javascript): New variable. + (main): Invoke init_flag_table_javascript, x_javascript_extract_all, + x_javascript_keyword. + (usage): Mention JavaScript source language. + (xgettext_record_flag): Handle format_javascript. + (language_to_extractor): Add JavaScript rule. + (extension_to_language): Add JavaScript rule. + * Makefile.am (noinst_HEADERS): Add x-javascript.h. + (FORMAT_SOURCE): Add format-javascript.c. + (xgettext_SOURCES): Add x-javascript.c. + * FILES: Update. + 2013-04-16 Ľubomír Remák Support for escape sequences added in Lua 5.2. diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES index f48c9a1a0..02bf07966 100644 --- a/gettext-tools/src/FILES +++ b/gettext-tools/src/FILES @@ -234,6 +234,7 @@ format-qt-plural.c Format string handling for Qt plural forms. format-kde.c Format string handling for KDE. format-boost.c Format string handling for Boost. format-lua.c Format string handling for Lua. +format-javascript.c Format string handling for JavaScript. format.c Table of the language dependent format string handlers. plural-exp.c @@ -337,6 +338,9 @@ msgl-check.c | x-lua.h | x-lua.c | String extractor for Lua. +| x-javascript.h +| x-javascript.c +| String extractor for JavaScript. | xgettext.c | Main source for the 'xgettext' program. | diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index b7bb9d788..ccd7d24d8 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -51,7 +51,8 @@ write-qt.h \ po-time.h plural-table.h lang-table.h format.h filters.h \ xgettext.h x-c.h x-po.h x-sh.h x-python.h x-lisp.h x-elisp.h x-librep.h \ x-scheme.h x-smalltalk.h x-java.h x-properties.h x-csharp.h x-awk.h x-ycp.h \ -x-tcl.h x-perl.h x-php.h x-stringtable.h x-rst.h x-glade.h x-lua.h +x-tcl.h x-perl.h x-php.h x-stringtable.h x-rst.h x-glade.h x-lua.h \ +x-javascript.h EXTRA_DIST += FILES project-id ChangeLog.0 @@ -136,7 +137,8 @@ FORMAT_SOURCE += \ format-qt-plural.c \ format-kde.c \ format-boost.c \ - format-lua.c + format-lua.c \ + format-javascript.c # libgettextsrc contains all code that is needed by at least two programs. libgettextsrc_la_SOURCES = \ @@ -174,7 +176,7 @@ endif xgettext_SOURCES += \ x-c.c x-po.c x-sh.c x-python.c x-lisp.c x-elisp.c x-librep.c x-scheme.c \ x-smalltalk.c x-java.c x-csharp.c x-awk.c x-ycp.c x-tcl.c x-perl.c x-php.c \ - x-rst.c x-glade.c x-lua.c + x-rst.c x-glade.c x-lua.c x-javascript.c if !WOE32DLL msgattrib_SOURCES = msgattrib.c else diff --git a/gettext-tools/src/format-javascript.c b/gettext-tools/src/format-javascript.c new file mode 100644 index 000000000..d76a8d7ea --- /dev/null +++ b/gettext-tools/src/format-javascript.c @@ -0,0 +1,333 @@ +/* JavaScript format strings. + Copyright (C) 2001-2004, 2006-2009, 2013 Free Software Foundation, Inc. + Written by Andreas Stricker , 2010. + It's based on python format module from Bruno Haible. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include + +#include "format.h" +#include "c-ctype.h" +#include "xalloc.h" +#include "xvasprintf.h" +#include "format-invalid.h" +#include "gettext.h" + +#define _(str) gettext (str) + +/* Although JavaScript specification itself does not define any format + strings, many implementations provide printf-like functions. + We provide a permissive parser which accepts commonly used format + strings, where: + + A directive + - starts with '%', + - is optionally followed by any of the characters '0', '-', ' ', + or, each of which acts as a flag, + - is optionally followed by a width specification: a nonempty digit + sequence, + - is optionally followed by '.' and a precision specification: a nonempty + digit sequence, + - is finished by a specifier + - 's', that needs a string argument, + - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument, + - 'f', that need a floating-point argument, + - 'c', that needs a character argument. + - 'j', that needs an argument of any type. + Additionally there is the directive '%%', which takes no argument. */ + +enum format_arg_type +{ + FAT_NONE, + FAT_ANY, + FAT_CHARACTER, + FAT_STRING, + FAT_INTEGER, + FAT_FLOAT +}; + +struct spec +{ + unsigned int directives; + unsigned int format_args_count; + unsigned int allocated; + enum format_arg_type *format_args; +}; + +/* Locale independent test for a decimal digit. + Argument can be 'char' or 'unsigned char'. (Whereas the argument of + isdigit must be an 'unsigned char'.) */ +#undef isdigit +#define isdigit(c) ((unsigned int) ((c) - '0') < 10) + + +static void * +format_parse (const char *format, bool translated, char *fdi, + char **invalid_reason) +{ + const char *const format_start = format; + struct spec spec; + struct spec *result; + + spec.directives = 0; + spec.format_args_count = 0; + spec.allocated = 0; + spec.format_args = NULL; + + for (; *format != '\0';) + if (*format++ == '%') + { + /* A directive. */ + enum format_arg_type type; + + FDI_SET (format - 1, FMTDIR_START); + spec.directives++; + + while (*format == '-' || *format == '+' || *format == ' ' + || *format == '0' || *format == 'I') + format++; + + while (isdigit (*format)) + format++; + + if (*format == '.') + { + format++; + + while (isdigit (*format)) + format++; + } + + switch (*format) + { + case '%': + type = FAT_NONE; + break; + case 'c': + type = FAT_CHARACTER; + break; + case 's': + type = FAT_STRING; + break; + case 'b': case 'd': case 'o': case 'x': case 'X': + type = FAT_INTEGER; + break; + case 'f': + type = FAT_FLOAT; + break; + case 'j': + type = FAT_ANY; + break; + default: + if (*format == '\0') + { + *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); + FDI_SET (format - 1, FMTDIR_ERROR); + } + else + { + *invalid_reason = + INVALID_CONVERSION_SPECIFIER (spec.directives, *format); + FDI_SET (format, FMTDIR_ERROR); + } + goto bad_format; + } + + if (*format != '%') + { + if (spec.allocated == spec.format_args_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.format_args = (enum format_arg_type *) xrealloc (spec.format_args, spec.allocated * sizeof (enum format_arg_type)); + } + spec.format_args[spec.format_args_count] = type; + spec.format_args_count++; + } + + FDI_SET (format, FMTDIR_END); + + format++; + } + + result = XMALLOC (struct spec); + *result = spec; + return result; + + bad_format: + if (spec.format_args != NULL) + free (spec.format_args); + return NULL; +} + +static void +format_free (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + if (spec->format_args != NULL) + free (spec->format_args); + free (spec); +} + +static int +format_get_number_of_directives (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (void *msgid_descr, void *msgstr_descr, bool equality, + formatstring_error_logger_t error_logger, + const char *pretty_msgid, const char *pretty_msgstr) +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + bool err = false; + + if (spec1->format_args_count + spec2->format_args_count > 0) + { + unsigned int i; + + /* Check the argument types are the same. */ + if (spec1->format_args_count != spec2->format_args_count) + { + if (error_logger) + error_logger (_("number of format specifications in '%s' and '%s' does not match"), + pretty_msgid, pretty_msgstr); + err = true; + } + else + for (i = 0; i < spec2->format_args_count; i++) + if (!(spec1->format_args[i] == spec2->format_args[i] + || (!equality + && (spec1->format_args[i] == FAT_ANY + || spec2->format_args[i] == FAT_ANY)))) + { + if (error_logger) + error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"), + pretty_msgid, pretty_msgstr, i + 1); + err = true; + } + } + + return err; +} + + +struct formatstring_parser formatstring_javascript = +{ + format_parse, + format_free, + format_get_number_of_directives, + NULL, + format_check +}; + + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include + +static void +format_print (void *descr) +{ + struct spec *spec = (struct spec *) descr; + unsigned int i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + printf ("("); + for (i = 0; i < spec->format_args_count; i++) + { + if (i > 0) + printf (" "); + switch (spec->format_args[i]) + { + case FAT_ANY: + printf ("*"); + break; + case FAT_CHARACTER: + printf ("c"); + break; + case FAT_STRING: + printf ("s"); + break; + case FAT_INTEGER: + printf ("i"); + break; + case FAT_FLOAT: + printf ("f"); + break; + default: + abort (); + } + } + printf (")"); +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_size = 0; + int line_len; + char *invalid_reason; + void *descr; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + invalid_reason = NULL; + descr = format_parse (line, false, NULL, &invalid_reason); + + format_print (descr); + printf ("\n"); + if (descr == NULL) + printf ("%s\n", invalid_reason); + + free (invalid_reason); + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-javascript.c ../gnulib-lib/libgettextlib.la" + * End: + */ +#endif /* TEST */ diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c index 7200a9430..b52e6440b 100644 --- a/gettext-tools/src/format.c +++ b/gettext-tools/src/format.c @@ -58,7 +58,8 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] = /* format_qt_plural */ &formatstring_qt_plural, /* format_kde */ &formatstring_kde, /* format_boost */ &formatstring_boost, - /* format_lua */ &formatstring_lua + /* format_lua */ &formatstring_lua, + /* format_javascript */ &formatstring_javascript }; /* Check whether both formats strings contain compatible format diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h index 9d3530eac..2297e6d50 100644 --- a/gettext-tools/src/format.h +++ b/gettext-tools/src/format.h @@ -120,6 +120,7 @@ extern DLL_VARIABLE struct formatstring_parser formatstring_qt_plural; extern DLL_VARIABLE struct formatstring_parser formatstring_kde; extern DLL_VARIABLE struct formatstring_parser formatstring_boost; extern DLL_VARIABLE struct formatstring_parser formatstring_lua; +extern DLL_VARIABLE struct formatstring_parser formatstring_javascript; /* Table of all format string parsers. */ extern DLL_VARIABLE struct formatstring_parser *formatstring_parsers[NFORMATS]; diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c index 4d1ce6ec7..4e627761a 100644 --- a/gettext-tools/src/message.c +++ b/gettext-tools/src/message.c @@ -58,7 +58,8 @@ const char *const format_language[NFORMATS] = /* format_qt_plursl */ "qt-plural", /* format_kde */ "kde", /* format_boost */ "boost", - /* format_lua */ "lua" + /* format_lua */ "lua", + /* format_javascript */ "javascript" }; const char *const format_language_pretty[NFORMATS] = @@ -87,7 +88,8 @@ const char *const format_language_pretty[NFORMATS] = /* format_qt_plural */ "Qt plural", /* format_kde */ "KDE", /* format_boost */ "Boost", - /* format_lua */ "Lua" + /* format_lua */ "Lua", + /* format_javascript */ "JavaScript" }; diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h index 24d6c16b3..91e88a483 100644 --- a/gettext-tools/src/message.h +++ b/gettext-tools/src/message.h @@ -67,9 +67,10 @@ enum format_type format_qt_plural, format_kde, format_boost, - format_lua + format_lua, + format_javascript }; -#define NFORMATS 25 /* Number of format_type enum values. */ +#define NFORMATS 26 /* Number of format_type enum values. */ extern DLL_VARIABLE const char *const format_language[NFORMATS]; extern DLL_VARIABLE const char *const format_language_pretty[NFORMATS]; diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c new file mode 100644 index 000000000..312f3f632 --- /dev/null +++ b/gettext-tools/src/x-javascript.c @@ -0,0 +1,1683 @@ +/* xgettext JavaScript backend. + Copyright (C) 2002-2003, 2005-2009 Free Software Foundation, Inc. + + This file was written by Andreas Stricker , 2010 + It's based on x-python from Bruno Haible. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* Specification. */ +#include "x-javascript.h" + +#include +#include +#include +#include +#include +#include + +#include "message.h" +#include "xgettext.h" +#include "error.h" +#include "error-progname.h" +#include "progname.h" +#include "basename.h" +#include "xerror.h" +#include "xvasprintf.h" +#include "xalloc.h" +#include "c-strstr.h" +#include "c-ctype.h" +#include "po-charset.h" +#include "uniname.h" +#include "unistr.h" +#include "gettext.h" + +#define _(s) gettext(s) + +#define max(a,b) ((a) > (b) ? (a) : (b)) + +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +/* The JavaScript aka ECMA-Script syntax is defined in ECMA-262 + specification: + http://www.ecma-international.org/publications/standards/Ecma-262.htm */ + +/* ====================== Keyword set customization. ====================== */ + +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + + +void +x_javascript_extract_all () +{ + extract_all = true; +} + + +void +x_javascript_keyword (const char *name) +{ + if (name == NULL) + default_keywords = false; + else + { + const char *end; + struct callshape shape; + const char *colon; + + if (keywords.table == NULL) + hash_init (&keywords, 100); + + split_keywordspec (name, &end, &shape); + + /* The characters between name and end should form a valid C identifier. + A colon means an invalid parse in split_keywordspec(). */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + insert_keyword_callshape (&keywords, name, end - name, &shape); + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + /* When adding new keywords here, also update the documentation in + xgettext.texi! */ + x_javascript_keyword ("gettext"); + x_javascript_keyword ("dgettext:2"); + x_javascript_keyword ("dcgettext:2"); + x_javascript_keyword ("ngettext:1,2"); + x_javascript_keyword ("dngettext:2,3"); + x_javascript_keyword ("pgettext:1c,2"); + x_javascript_keyword ("dpgettext:2c,3"); + x_javascript_keyword ("_"); + default_keywords = false; + } +} + +void +init_flag_table_javascript () +{ + xgettext_record_flag ("gettext:1:pass-javascript-format"); + xgettext_record_flag ("dgettext:2:pass-javascript-format"); + xgettext_record_flag ("dcgettext:2:pass-javascript-format"); + xgettext_record_flag ("ngettext:1:pass-javascript-format"); + xgettext_record_flag ("ngettext:2:pass-javascript-format"); + xgettext_record_flag ("dngettext:2:pass-javascript-format"); + xgettext_record_flag ("dngettext:3:pass-javascript-format"); + xgettext_record_flag ("pgettext:2:pass-javascript-format"); + xgettext_record_flag ("dpgettext:3:pass-javascript-format"); + xgettext_record_flag ("_:1:pass-javascript-format"); +} + + +/* ======================== Reading of characters. ======================== */ + +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; + +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; + +/* The input file stream. */ +static FILE *fp; + + +/* 1. line_number handling. */ + +/* Maximum used, roughly a safer MB_LEN_MAX. */ +#define MAX_PHASE1_PUSHBACK 16 +static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK]; +static int phase1_pushback_length; + +/* Read the next single byte from the input file. */ +static int +phase1_getc () +{ + int c; + + if (phase1_pushback_length) + c = phase1_pushback[--phase1_pushback_length]; + else + { + c = getc (fp); + + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("error while reading \"%s\""), + real_file_name); + return EOF; + } + } + + if (c == '\n') + ++line_number; + + return c; +} + +/* Supports MAX_PHASE1_PUSHBACK characters of pushback. */ +static void +phase1_ungetc (int c) +{ + if (c != EOF) + { + if (c == '\n') + --line_number; + + if (phase1_pushback_length == SIZEOF (phase1_pushback)) + abort (); + phase1_pushback[phase1_pushback_length++] = c; + } +} + + +/* Phase 2: Conversion to Unicode. + For now, we expect JavaScript files to be encoded as UTF-8. */ + +/* End-of-file indicator for functions returning an UCS-4 character. */ +#define UEOF -1 + +static lexical_context_ty lexical_context; + +static int phase2_pushback[max (9, UNINAME_MAX + 3)]; +static int phase2_pushback_length; + +/* Read the next Unicode UCS-4 character from the input file. */ +static int +phase2_getc () +{ + if (phase2_pushback_length) + return phase2_pushback[--phase2_pushback_length]; + + if (xgettext_current_source_encoding == po_charset_ascii) + { + int c = phase1_getc (); + if (c == EOF) + return UEOF; + if (!c_isascii (c)) + { + multiline_error (xstrdup (""), + xasprintf ("%s\n%s\n", + non_ascii_error_message (lexical_context, + real_file_name, + line_number), + _("\ +Please specify the source encoding through --from-code\n"))); + exit (EXIT_FAILURE); + } + return c; + } + else if (xgettext_current_source_encoding != po_charset_utf8) + { +#if HAVE_ICONV + /* Use iconv on an increasing number of bytes. Read only as many bytes + through phase1_getc as needed. This is needed to give reasonable + interactive behaviour when fp is connected to an interactive tty. */ + unsigned char buf[MAX_PHASE1_PUSHBACK]; + size_t bufcount; + int c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[0] = (unsigned char) c; + bufcount = 1; + + for (;;) + { + unsigned char scratchbuf[6]; + const char *inptr = (const char *) &buf[0]; + size_t insize = bufcount; + char *outptr = (char *) &scratchbuf[0]; + size_t outsize = sizeof (scratchbuf); + + size_t res = iconv (xgettext_current_source_iconv, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + /* We expect that a character has been produced if and only if + some input bytes have been consumed. */ + if ((insize < bufcount) != (outsize < sizeof (scratchbuf))) + abort (); + if (outsize == sizeof (scratchbuf)) + { + /* No character has been produced. Must be an error. */ + if (res != (size_t)(-1)) + abort (); + + if (errno == EILSEQ) + { + /* An invalid multibyte sequence was encountered. */ + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Invalid multibyte sequence.\n\ +Please specify the correct source encoding through --from-code\n"), + real_file_name, line_number)); + exit (EXIT_FAILURE); + } + else if (errno == EINVAL) + { + /* An incomplete multibyte character. */ + int c; + + if (bufcount == MAX_PHASE1_PUSHBACK) + { + /* An overlong incomplete multibyte sequence was + encountered. */ + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Long incomplete multibyte sequence.\n\ +Please specify the correct source encoding through --from-code\n"), + real_file_name, line_number)); + exit (EXIT_FAILURE); + } + + /* Read one more byte and retry iconv. */ + c = phase1_getc (); + if (c == EOF) + { + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Incomplete multibyte sequence at end of file.\n\ +Please specify the correct source encoding through --from-code\n"), + real_file_name, line_number)); + exit (EXIT_FAILURE); + } + if (c == '\n') + { + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Incomplete multibyte sequence at end of line.\n\ +Please specify the correct source encoding through --from-code\n"), + real_file_name, line_number - 1)); + exit (EXIT_FAILURE); + } + buf[bufcount++] = (unsigned char) c; + } + else + error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"), + real_file_name, line_number); + } + else + { + size_t outbytes = sizeof (scratchbuf) - outsize; + size_t bytes = bufcount - insize; + ucs4_t uc; + + /* We expect that one character has been produced. */ + if (bytes == 0) + abort (); + if (outbytes == 0) + abort (); + /* Push back the unused bytes. */ + while (insize > 0) + phase1_ungetc (buf[--insize]); + /* Convert the character from UTF-8 to UCS-4. */ + if (u8_mbtoucr (&uc, scratchbuf, outbytes) < (int) outbytes) + { + /* scratchbuf contains an out-of-range Unicode character + (> 0x10ffff). */ + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Invalid multibyte sequence.\n\ +Please specify the source encoding through --from-code\n"), + real_file_name, line_number)); + exit (EXIT_FAILURE); + } + return uc; + } + } +#else + /* If we don't have iconv(), the only supported values for + xgettext_global_source_encoding and thus also for + xgettext_current_source_encoding are ASCII and UTF-8. */ + abort (); +#endif + } + else + { + /* Read an UTF-8 encoded character. */ + unsigned char buf[6]; + unsigned int count; + int c; + ucs4_t uc; + + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[0] = c; + count = 1; + + if (buf[0] >= 0xc0) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[1] = c; + count = 2; + } + + if (buf[0] >= 0xe0 + && ((buf[1] ^ 0x80) < 0x40)) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[2] = c; + count = 3; + } + + if (buf[0] >= 0xf0 + && ((buf[1] ^ 0x80) < 0x40) + && ((buf[2] ^ 0x80) < 0x40)) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[3] = c; + count = 4; + } + + if (buf[0] >= 0xf8 + && ((buf[1] ^ 0x80) < 0x40) + && ((buf[2] ^ 0x80) < 0x40) + && ((buf[3] ^ 0x80) < 0x40)) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[4] = c; + count = 5; + } + + if (buf[0] >= 0xfc + && ((buf[1] ^ 0x80) < 0x40) + && ((buf[2] ^ 0x80) < 0x40) + && ((buf[3] ^ 0x80) < 0x40) + && ((buf[4] ^ 0x80) < 0x40)) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[5] = c; + count = 6; + } + + u8_mbtouc (&uc, buf, count); + return uc; + } +} + +/* Supports max (9, UNINAME_MAX + 3) pushback characters. */ +static void +phase2_ungetc (int c) +{ + if (c != UEOF) + { + if (phase2_pushback_length == SIZEOF (phase2_pushback)) + abort (); + phase2_pushback[phase2_pushback_length++] = c; + } +} + + +/* ========================= Accumulating strings. ======================== */ + +/* A string buffer type that allows appending Unicode characters. + Returns the entire string in UTF-8 encoding. */ + +struct unicode_string_buffer +{ + /* The part of the string that has already been converted to UTF-8. */ + char *utf8_buffer; + size_t utf8_buflen; + size_t utf8_allocated; +}; + +/* Initialize a 'struct unicode_string_buffer' to empty. */ +static inline void +init_unicode_string_buffer (struct unicode_string_buffer *bp) +{ + bp->utf8_buffer = NULL; + bp->utf8_buflen = 0; + bp->utf8_allocated = 0; +} + +/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */ +static inline void +unicode_string_buffer_append_unicode_grow (struct unicode_string_buffer *bp, + size_t count) +{ + if (bp->utf8_buflen + count > bp->utf8_allocated) + { + size_t new_allocated = 2 * bp->utf8_allocated + 10; + if (new_allocated < bp->utf8_buflen + count) + new_allocated = bp->utf8_buflen + count; + bp->utf8_allocated = new_allocated; + bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated); + } +} + +/* Auxiliary function: Append a Unicode character to bp->utf8. + uc must be < 0x110000. */ +static inline void +unicode_string_buffer_append_unicode (struct unicode_string_buffer *bp, + unsigned int uc) +{ + unsigned char utf8buf[6]; + int count = u8_uctomb (utf8buf, uc, 6); + + if (count < 0) + /* The caller should have ensured that uc is not out-of-range. */ + abort (); + + unicode_string_buffer_append_unicode_grow (bp, count); + memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count); + bp->utf8_buflen += count; +} + +/* Return the string buffer's contents. */ +static char * +unicode_string_buffer_result (struct unicode_string_buffer *bp) +{ + /* NUL-terminate it. */ + unicode_string_buffer_append_unicode_grow (bp, 1); + bp->utf8_buffer[bp->utf8_buflen] = '\0'; + /* Return it. */ + return bp->utf8_buffer; +} + +/* Free the memory pointed to by a 'struct unicode_string_buffer'. */ +static inline void +free_unicode_string_buffer (struct unicode_string_buffer *bp) +{ + free (bp->utf8_buffer); +} + + +/* ======================== Accumulating comments. ======================== */ + + +/* Accumulating a single comment line. */ + +static struct unicode_string_buffer comment_buffer; + +static inline void +comment_start () +{ + lexical_context = lc_comment; + comment_buffer.utf8_buflen = 0; +} + +static inline bool +comment_at_start () +{ + return (comment_buffer.utf8_buflen == 0); +} + +static inline void +comment_add (int c) +{ + unicode_string_buffer_append_unicode (&comment_buffer, c); +} + +static inline const char * +comment_line_end () +{ + char *buffer = unicode_string_buffer_result (&comment_buffer); + size_t buflen = strlen (buffer); + + while (buflen >= 1 + && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) + --buflen; + buffer[buflen] = '\0'; + savable_comment_add (buffer); + lexical_context = lc_outside; + return buffer; +} + + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + + +/* ======================== Recognizing comments. ======================== */ + + +/* Canonicalized encoding name for the current input file. */ +static const char *xgettext_current_file_source_encoding; + +#if HAVE_ICONV +/* Converter from xgettext_current_file_source_encoding to UTF-8 (except from + ASCII or UTF-8, when this conversion is a no-op). */ +static iconv_t xgettext_current_file_source_iconv; +#endif + +/* Tracking whether the current line is a continuation line or contains a + non-blank character. */ +static bool continuation_or_nonblank_line = false; + + +/* Phase 3: Outside strings, replace backslash-newline with nothing and a + comment with nothing. */ + +static int +phase3_getc () +{ + int c; + + for (;;) + { + c = phase2_getc (); + if (c == '\\') + { + c = phase2_getc (); + if (c != '\n') + { + phase2_ungetc (c); + /* This shouldn't happen usually, because "A backslash is + illegal elsewhere on a line outside a string literal." */ + return '\\'; + } + /* Eat backslash-newline. */ + continuation_or_nonblank_line = true; + } + else if (c == '/') + { + c = phase2_getc (); + if (c == '/') + { + /* C++ style comment. */ + last_comment_line = line_number; + comment_start (); + for (;;) + { + c = phase2_getc (); + if (c == UEOF || c == '\n') + break; + /* We skip all leading white space, but not EOLs. */ + if (!(comment_at_start () && (c == ' ' || c == '\t'))) + comment_add (c); + } + continuation_or_nonblank_line = false; + return c; + } + else if (c == '*') + { + /* C style comment. */ + bool last_was_star = false; + last_comment_line = line_number; + comment_start (); + for (;;) + { + c = phase2_getc (); + if (c == UEOF) + break; + /* We skip all leading white space, but not EOLs. */ + if (!(comment_at_start () && (c == ' ' || c == '\t'))) + comment_add (c); + switch (c) + { + case '\n': + comment_line_end (1); + comment_start (); + last_was_star = false; + continue; + + case '*': + last_was_star = true; + continue; + case '/': + if (last_was_star) + { + comment_line_end (2); + break; + } + /* FALLTHROUGH */ + + default: + last_was_star = false; + continue; + } + break; + } + continuation_or_nonblank_line = false; + } + else + { + phase2_ungetc (c); + return '/'; + } + } + else + { + if (c == '\n') + continuation_or_nonblank_line = false; + else if (!(c == ' ' || c == '\t' || c == '\f')) + continuation_or_nonblank_line = true; + return c; + } + } +} + +/* Supports only one pushback character. */ +static void +phase3_ungetc (int c) +{ + phase2_ungetc (c); +} + + +/* ========================= Accumulating strings. ======================== */ + +/* Return value of phase7_getuc when EOF is reached. */ +#define P7_EOF (-1) +#define P7_STRING_END (-2) + +/* Convert an UTF-16 or UTF-32 code point to a return value that can be + distinguished from a single-byte return value. */ +#define UNICODE(code) (0x100 + (code)) + +/* Test a return value of phase7_getuc whether it designates an UTF-16 or + UTF-32 code point. */ +#define IS_UNICODE(p7_result) ((p7_result) >= 0x100) + +/* Extract the UTF-16 or UTF-32 code of a return value that satisfies + IS_UNICODE. */ +#define UNICODE_VALUE(p7_result) ((p7_result) - 0x100) + +/* A string buffer type that allows appending bytes (in the + xgettext_current_source_encoding) or Unicode characters. + Returns the entire string in UTF-8 encoding. */ + +struct mixed_string_buffer +{ + /* The part of the string that has already been converted to UTF-8. */ + char *utf8_buffer; + size_t utf8_buflen; + size_t utf8_allocated; + /* The first half of an UTF-16 surrogate character. */ + unsigned short utf16_surr; + /* The part of the string that is still in the source encoding. */ + char *curr_buffer; + size_t curr_buflen; + size_t curr_allocated; + /* The lexical context. Used only for error message purposes. */ + lexical_context_ty lcontext; +}; + +/* Initialize a 'struct mixed_string_buffer' to empty. */ +static inline void +init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext) +{ + bp->utf8_buffer = NULL; + bp->utf8_buflen = 0; + bp->utf8_allocated = 0; + bp->utf16_surr = 0; + bp->curr_buffer = NULL; + bp->curr_buflen = 0; + bp->curr_allocated = 0; + bp->lcontext = lcontext; +} + +/* Auxiliary function: Append a byte to bp->curr. */ +static inline void +mixed_string_buffer_append_byte (struct mixed_string_buffer *bp, unsigned char c) +{ + if (bp->curr_buflen == bp->curr_allocated) + { + bp->curr_allocated = 2 * bp->curr_allocated + 10; + bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated); + } + bp->curr_buffer[bp->curr_buflen++] = c; +} + +/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */ +static inline void +mixed_string_buffer_append_unicode_grow (struct mixed_string_buffer *bp, size_t count) +{ + if (bp->utf8_buflen + count > bp->utf8_allocated) + { + size_t new_allocated = 2 * bp->utf8_allocated + 10; + if (new_allocated < bp->utf8_buflen + count) + new_allocated = bp->utf8_buflen + count; + bp->utf8_allocated = new_allocated; + bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated); + } +} + +/* Auxiliary function: Append a Unicode character to bp->utf8. + uc must be < 0x110000. */ +static inline void +mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, ucs4_t uc) +{ + unsigned char utf8buf[6]; + int count = u8_uctomb (utf8buf, uc, 6); + + if (count < 0) + /* The caller should have ensured that uc is not out-of-range. */ + abort (); + + mixed_string_buffer_append_unicode_grow (bp, count); + memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count); + bp->utf8_buflen += count; +} + +/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer. */ +static inline void +mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp) +{ + if (bp->utf16_surr != 0) + { + /* A half surrogate is invalid, therefore use U+FFFD instead. */ + mixed_string_buffer_append_unicode (bp, 0xfffd); + bp->utf16_surr = 0; + } +} + +/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer. */ +static inline void +mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int lineno) +{ + if (bp->curr_buflen > 0) + { + char *curr; + size_t count; + + mixed_string_buffer_append_byte (bp, '\0'); + + /* Convert from the source encoding to UTF-8. */ + curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext, + logical_file_name, lineno); + + /* Append it to bp->utf8_buffer. */ + count = strlen (curr); + mixed_string_buffer_append_unicode_grow (bp, count); + memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count); + bp->utf8_buflen += count; + + if (curr != bp->curr_buffer) + free (curr); + bp->curr_buflen = 0; + } +} + +/* Append a character or Unicode character to a 'struct mixed_string_buffer'. */ +static void +mixed_string_buffer_append (struct mixed_string_buffer *bp, int c) +{ + if (IS_UNICODE (c)) + { + /* Append a Unicode character. */ + + /* Switch from multibyte character mode to Unicode character mode. */ + mixed_string_buffer_flush_curr_buffer (bp, line_number); + + /* Test whether this character and the previous one form a Unicode + surrogate character pair. */ + if (bp->utf16_surr != 0 + && (c >= UNICODE (0xdc00) && c < UNICODE (0xe000))) + { + unsigned short utf16buf[2]; + ucs4_t uc; + + utf16buf[0] = bp->utf16_surr; + utf16buf[1] = UNICODE_VALUE (c); + if (u16_mbtouc (&uc, utf16buf, 2) != 2) + abort (); + + mixed_string_buffer_append_unicode (bp, uc); + bp->utf16_surr = 0; + } + else + { + mixed_string_buffer_flush_utf16_surr (bp); + + if (c >= UNICODE (0xd800) && c < UNICODE (0xdc00)) + bp->utf16_surr = UNICODE_VALUE (c); + else if (c >= UNICODE (0xdc00) && c < UNICODE (0xe000)) + { + /* A half surrogate is invalid, therefore use U+FFFD instead. */ + mixed_string_buffer_append_unicode (bp, 0xfffd); + } + else + mixed_string_buffer_append_unicode (bp, UNICODE_VALUE (c)); + } + } + else + { + /* Append a single byte. */ + + /* Switch from Unicode character mode to multibyte character mode. */ + mixed_string_buffer_flush_utf16_surr (bp); + + /* When a newline is seen, convert the accumulated multibyte sequence. + This ensures a correct line number in the error message in case of + a conversion error. The "- 1" is to account for the newline. */ + if (c == '\n') + mixed_string_buffer_flush_curr_buffer (bp, line_number - 1); + + mixed_string_buffer_append_byte (bp, (unsigned char) c); + } +} + +/* Return the string buffer's contents. */ +static char * +mixed_string_buffer_result (struct mixed_string_buffer *bp) +{ + /* Flush all into bp->utf8_buffer. */ + mixed_string_buffer_flush_utf16_surr (bp); + mixed_string_buffer_flush_curr_buffer (bp, line_number); + /* NUL-terminate it. */ + mixed_string_buffer_append_unicode_grow (bp, 1); + bp->utf8_buffer[bp->utf8_buflen] = '\0'; + /* Return it. */ + return bp->utf8_buffer; +} + +/* Free the memory pointed to by a 'struct mixed_string_buffer'. */ +static inline void +free_mixed_string_buffer (struct mixed_string_buffer *bp) +{ + free (bp->utf8_buffer); + free (bp->curr_buffer); +} + + +/* ========================== Reading of tokens. ========================== */ + + +enum token_type_ty +{ + token_type_eof, + token_type_lparen, /* ( */ + token_type_rparen, /* ) */ + token_type_comma, /* , */ + token_type_lbracket, /* [ */ + token_type_rbracket, /* ] */ + token_type_plus, /* + */ + token_type_regexp, /* /.../ */ + token_type_operator, /* - * / % . < > = ~ ! | & ? : ^ */ + token_type_string, /* "abc", 'abc' */ + token_type_keyword, /* return, else */ + token_type_symbol, /* symbol, number */ + token_type_other /* misc. operator */ +}; +typedef enum token_type_ty token_type_ty; + +typedef struct token_ty token_ty; +struct token_ty +{ + token_type_ty type; + char *string; /* for token_type_string, token_type_symbol, + token_type_keyword */ + refcounted_string_list_ty *comment; /* for token_type_string */ + int line_number; +}; + + +/* Free the memory pointed to by a 'struct token_ty'. */ +static inline void +free_token (token_ty *tp) +{ + if (tp->type == token_type_string || tp->type == token_type_symbol) + free (tp->string); + if (tp->type == token_type_string) + drop_reference (tp->comment); +} + + +/* JavaScript provides strings with either double or single quotes: + "abc" or 'abc' + Both may contain special sequences after a backslash: + \', \", \\, \b, \f, \n, \r, \t, \v + Special characters can be entered using hexadecimal escape + sequences or deprecated octal escape sequences: + \xXX, \OOO + Any unicode point can be entered using Unicode escape sequences: + \uNNNN */ + +static int +phase7_getuc (int quote_char, unsigned int *backslash_counter) +{ + int c; + + for (;;) + { + /* Use phase 2, because phase 3 elides comments. */ + c = phase2_getc (); + + if (c == UEOF) + return P7_EOF; + + if (c == quote_char && (*backslash_counter & 1) == 0) + { + return P7_STRING_END; + } + + if (c == '\n') + { + phase2_ungetc (c); + error_with_progname = false; + error (0, 0, _("%s:%d: warning: unterminated string"), + logical_file_name, line_number); + error_with_progname = true; + return P7_STRING_END; + } + + if (c != '\\') + { + *backslash_counter = 0; + return UNICODE (c); + } + + /* Dispatch according to the character following the backslash. */ + c = phase2_getc (); + if (c == UEOF) + { + ++*backslash_counter; + return UNICODE ('\\'); + } + + switch (c) + { + case '\n': + continue; + case '\\': + ++*backslash_counter; + return UNICODE (c); + case '\'': case '"': + *backslash_counter = 0; + return UNICODE (c); + case 'b': + *backslash_counter = 0; + return UNICODE ('\b'); + case 'f': + *backslash_counter = 0; + return UNICODE ('\f'); + case 'n': + *backslash_counter = 0; + return UNICODE ('\n'); + case 'r': + *backslash_counter = 0; + return UNICODE ('\r'); + case 't': + *backslash_counter = 0; + return UNICODE ('\t'); + case 'v': + *backslash_counter = 0; + return UNICODE ('\v'); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': + { + int n = c - '0'; + + c = phase2_getc (); + if (c != UEOF) + { + if (c >= '0' && c <= '7') + { + n = (n << 3) + (c - '0'); + c = phase2_getc (); + if (c != UEOF) + { + if (c >= '0' && c <= '7') + n = (n << 3) + (c - '0'); + else + phase2_ungetc (c); + } + } + else + phase2_ungetc (c); + } + *backslash_counter = 0; + return UNICODE (n); + } + case 'x': + { + int c1 = phase2_getc (); + int n1; + + if (c1 >= '0' && c1 <= '9') + n1 = c1 - '0'; + else if (c1 >= 'A' && c1 <= 'F') + n1 = c1 - 'A' + 10; + else if (c1 >= 'a' && c1 <= 'f') + n1 = c1 - 'a' + 10; + else + n1 = -1; + + if (n1 >= 0) + { + int c2 = phase2_getc (); + int n2; + + if (c2 >= '0' && c2 <= '9') + n2 = c2 - '0'; + else if (c2 >= 'A' && c2 <= 'F') + n2 = c2 - 'A' + 10; + else if (c2 >= 'a' && c2 <= 'f') + n2 = c2 - 'a' + 10; + else + n2 = -1; + + if (n2 >= 0) + { + int n = (n1 << 4) + n2; + *backslash_counter = 0; + return UNICODE (n); + } + + phase2_ungetc (c2); + } + phase2_ungetc (c1); + phase2_ungetc (c); + ++*backslash_counter; + return UNICODE ('\\'); + } + case 'u': + { + unsigned char buf[4]; + unsigned int n = 0; + int i; + + for (i = 0; i < 4; i++) + { + int c1 = phase2_getc (); + + if (c1 >= '0' && c1 <= '9') + n = (n << 4) + (c1 - '0'); + else if (c1 >= 'A' && c1 <= 'F') + n = (n << 4) + (c1 - 'A' + 10); + else if (c1 >= 'a' && c1 <= 'f') + n = (n << 4) + (c1 - 'a' + 10); + else + { + phase2_ungetc (c1); + while (--i >= 0) + phase2_ungetc (buf[i]); + phase2_ungetc (c); + ++*backslash_counter; + return UNICODE ('\\'); + } + + buf[i] = c1; + } + *backslash_counter = 0; + return UNICODE (n); + } + } + } +} + + +/* Combine characters into tokens. Discard whitespace except newlines at + the end of logical lines. */ + +static token_ty phase5_pushback[2]; +static int phase5_pushback_length; + +static token_type_ty last_token_type = token_type_other; + +static void +phase5_scan_regexp () +{ + int c; + + /* Scan for end of RegExp literal ('/'). */ + for (;;) + { + /* Must use phase2 as there can't be comments. */ + c = phase2_getc (); + if (c == '/') + break; + if (c == '\\') + { + c = phase2_getc (); + if (c != UEOF) + continue; + } + if (c == UEOF) + { + error_with_progname = false; + error (0, 0, + _("%s:%d: warning: RegExp literal terminated too early"), + logical_file_name, line_number); + error_with_progname = true; + return; + } + } + + /* Scan for modifier flags (ECMA-262 5th section 15.10.4.1). */ + c = phase2_getc (); + if (!(c == 'g' || c == 'i' || c == 'm')) + phase2_ungetc (c); +} + +static void +phase5_get (token_ty *tp) +{ + int c; + + if (phase5_pushback_length) + { + *tp = phase5_pushback[--phase5_pushback_length]; + last_token_type = tp->type; + return; + } + + for (;;) + { + tp->line_number = line_number; + c = phase3_getc (); + + switch (c) + { + case UEOF: + tp->type = last_token_type = token_type_eof; + return; + + case '\n': + if (last_non_comment_line > last_comment_line) + savable_comment_reset (); + /* FALLTHROUGH */ + case ' ': + case '\t': + case '\f': + /* Ignore whitespace and comments. */ + continue; + } + + last_non_comment_line = tp->line_number; + + switch (c) + { + case '.': + { + int c1 = phase3_getc (); + phase3_ungetc (c1); + if (!(c1 >= '0' && c1 <= '9')) + { + + tp->type = last_token_type = token_type_other; + return; + } + } + /* FALLTHROUGH */ + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* Symbol, or part of a number. */ + { + static char *buffer; + static int bufmax; + int bufpos; + + bufpos = 0; + for (;;) + { + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = c; + c = phase3_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + default: + phase3_ungetc (c); + break; + } + break; + } + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = '\0'; + tp->string = xstrdup (buffer); + if (strcmp (buffer, "return") == 0 + || strcmp (buffer, "else") == 0) + tp->type = last_token_type = token_type_keyword; + else + tp->type = last_token_type = token_type_symbol; + return; + } + + /* Strings. */ + { + struct mixed_string_buffer literal; + int quote_char; + unsigned int backslash_counter; + + case '"': case '\'': + quote_char = c; + lexical_context = lc_string; + backslash_counter = 0; + /* Start accumulating the string. */ + init_mixed_string_buffer (&literal, lc_string); + for (;;) + { + int uc = phase7_getuc (quote_char, &backslash_counter); + + if (uc == P7_EOF || uc == P7_STRING_END) + break; + + if (IS_UNICODE (uc)) + assert (UNICODE_VALUE (uc) >= 0 + && UNICODE_VALUE (uc) < 0x110000); + + mixed_string_buffer_append (&literal, uc); + } + tp->string = xstrdup (mixed_string_buffer_result (&literal)); + free_mixed_string_buffer (&literal); + tp->comment = add_reference (savable_comment); + lexical_context = lc_outside; + tp->type = last_token_type = token_type_string; + return; + } + + case '+': + tp->type = last_token_type = token_type_plus; + return; + + /* Identify operators. The multiple character ones are simply ignored + * as they are recognized here and are otherwise not relevant. */ + /* FALLTHROUGH */ + case '-': case '*': /* '+' and '/' are not listed here! */ + case '%': case '<': case '>': case '=': + case '~': case '!': case '|': case '&': case '^': + case '?': case ':': + tp->type = last_token_type = token_type_operator; + return; + + case '/': + /* Either a division operator or the start of a regular + expression literal. If the '/' token is spotted after a + symbol it's a division, otherwise it's a regular + expression. */ + if (last_token_type == token_type_symbol + || last_token_type == token_type_rparen + || last_token_type == token_type_rbracket) + tp->type = last_token_type = token_type_operator; + else + { + phase5_scan_regexp (tp); + tp->type = last_token_type = token_type_regexp; + } + return; + + case '(': + tp->type = last_token_type = token_type_lparen; + return; + + case ')': + tp->type = last_token_type = token_type_rparen; + return; + + case ',': + tp->type = last_token_type = token_type_comma; + return; + + case '[': case '{': + tp->type = last_token_type = (c == '[' ? token_type_lbracket : token_type_other); + return; + + case ']': case '}': + tp->type = last_token_type = (c == ']' ? token_type_rbracket : token_type_other); + return; + + default: + /* We could carefully recognize each of the 2 and 3 character + operators, but it is not necessary, as we only need to recognize + gettext invocations. Don't bother. */ + tp->type = last_token_type = token_type_other; + return; + } + } +} + +/* Supports only one pushback token. */ +static void +phase5_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + { + if (phase5_pushback_length == SIZEOF (phase5_pushback)) + abort (); + phase5_pushback[phase5_pushback_length++] = *tp; + } +} + + +/* Combine adjacent strings to form a single string. Note that the end + of a logical line appears as a token of its own, therefore strings that + belong to different logical lines will not be concatenated. */ + +static void +x_javascript_lex (token_ty *tp) +{ + phase5_get (tp); + if (tp->type == token_type_string) + { + char *sum = tp->string; + size_t sum_len = strlen (sum); + + for (;;) + { + token_ty token2; + + phase5_get (&token2); + if (token2.type == token_type_plus) + { + token_ty token3; + + phase5_get (&token3); + if (token3.type == token_type_string) + { + char *addend = token3.string; + size_t addend_len = strlen (addend); + + sum = (char *) xrealloc (sum, sum_len + addend_len + 1); + memcpy (sum + sum_len, addend, addend_len + 1); + sum_len += addend_len; + + free_token (&token3); + free_token (&token2); + continue; + } + phase5_unget (&token3); + } + phase5_unget (&token2); + break; + } + tp->string = sum; + } +} + + +/* ========================= Extracting strings. ========================== */ + + +/* Context lookup table. */ +static flag_context_list_table_ty *flag_context_list_table; + + +/* The file is broken into tokens. Scan the token stream, looking for + a keyword, followed by a left paren, followed by a string. When we + see this sequence, we have something to remember. We assume we are + looking at a valid JavaScript program, and leave the complaints about + the grammar to the compiler. + + Normal handling: Look for + keyword ( ... msgid ... ) + Plural handling: Look for + keyword ( ... msgid ... msgid_plural ... ) + + We use recursion because the arguments before msgid or between msgid + and msgid_plural can contain subexpressions of the same form. */ + + +/* Extract messages until the next balanced closing parenthesis or bracket. + Extracted messages are added to MLP. + DELIM can be either token_type_rparen or token_type_rbracket, or + token_type_eof to accept both. + Return true upon eof, false upon closing parenthesis or bracket. */ +static bool +extract_balanced (message_list_ty *mlp, + token_type_ty delim, + flag_context_ty outer_context, + flag_context_list_iterator_ty context_iter, + struct arglist_parser *argparser) +{ + /* Current argument number. */ + int arg = 1; + /* 0 when no keyword has been seen. 1 right after a keyword is seen. */ + int state; + /* Parameters of the keyword just seen. Defined only in state 1. */ + const struct callshapes *next_shapes = NULL; + /* Context iterator that will be used if the next token is a '('. */ + flag_context_list_iterator_ty next_context_iter = + passthrough_context_list_iterator; + /* Current context. */ + flag_context_ty inner_context = + inherited_context (outer_context, + flag_context_list_iterator_advance (&context_iter)); + + /* Start state is 0. */ + state = 0; + + for (;;) + { + token_ty token; + + x_javascript_lex (&token); + switch (token.type) + { + case token_type_symbol: + { + void *keyword_value; + + if (hash_find_entry (&keywords, token.string, strlen (token.string), + &keyword_value) + == 0) + { + next_shapes = (const struct callshapes *) keyword_value; + state = 1; + } + else + state = 0; + } + next_context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + token.string, strlen (token.string))); + free (token.string); + continue; + + case token_type_lparen: + if (extract_balanced (mlp, token_type_rparen, + inner_context, next_context_iter, + arglist_parser_alloc (mlp, + state ? next_shapes : NULL))) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_current_file_source_encoding; + return true; + } + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_rparen: + if (delim == token_type_rparen || delim == token_type_eof) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_current_file_source_encoding; + return false; + } + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_comma: + arg++; + inner_context = + inherited_context (outer_context, + flag_context_list_iterator_advance ( + &context_iter)); + next_context_iter = passthrough_context_list_iterator; + state = 0; + continue; + + case token_type_lbracket: + if (extract_balanced (mlp, token_type_rbracket, + null_context, null_context_list_iterator, + arglist_parser_alloc (mlp, NULL))) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_current_file_source_encoding; + return true; + } + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_rbracket: + if (delim == token_type_rbracket || delim == token_type_eof) + { + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_current_file_source_encoding; + return false; + } + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_string: + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + + xgettext_current_source_encoding = po_charset_utf8; + if (extract_all) + remember_a_message (mlp, NULL, token.string, inner_context, + &pos, NULL, token.comment); + else + arglist_parser_remember (argparser, arg, token.string, + inner_context, + pos.file_name, pos.line_number, + token.comment); + xgettext_current_source_encoding = xgettext_current_file_source_encoding; + } + drop_reference (token.comment); + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_eof: + xgettext_current_source_encoding = po_charset_utf8; + arglist_parser_done (argparser, arg); + xgettext_current_source_encoding = xgettext_current_file_source_encoding; + return true; + + /* FALLTHROUGH */ + case token_type_keyword: + case token_type_plus: + case token_type_regexp: + case token_type_operator: + case token_type_other: + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + default: + abort (); + } + } +} + + +void +extract_javascript (FILE *f, + const char *real_filename, const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp) +{ + message_list_ty *mlp = mdlp->item[0]->messages; + + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + lexical_context = lc_outside; + + last_comment_line = -1; + last_non_comment_line = -1; + + xgettext_current_file_source_encoding = xgettext_global_source_encoding; +#if HAVE_ICONV + xgettext_current_file_source_iconv = xgettext_global_source_iconv; +#endif + + xgettext_current_source_encoding = xgettext_current_file_source_encoding; +#if HAVE_ICONV + xgettext_current_source_iconv = xgettext_current_file_source_iconv; +#endif + + continuation_or_nonblank_line = false; + + flag_context_list_table = flag_table; + + init_keywords (); + + /* Eat tokens until eof is seen. When extract_balanced returns + due to an unbalanced closing parenthesis, just restart it. */ + while (!extract_balanced (mlp, token_type_eof, + null_context, null_context_list_iterator, + arglist_parser_alloc (mlp, NULL))) + ; + + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} diff --git a/gettext-tools/src/x-javascript.h b/gettext-tools/src/x-javascript.h new file mode 100644 index 000000000..d6681365b --- /dev/null +++ b/gettext-tools/src/x-javascript.h @@ -0,0 +1,52 @@ +/* xgettext Python backend. + Copyright (C) 2002-2003, 2006 Free Software Foundation, Inc. + This file was written by Andreas Stricker , 2010. + It's based on x-python from Bruno Haible. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + +#include + +#include "message.h" +#include "xgettext.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define EXTENSIONS_JAVASCRIPT \ + { "js", "JavaScript" }, \ + +#define SCANNERS_JAVASCRIPT \ + { "JavaScript", extract_javascript, \ + &flag_table_javascript, &formatstring_javascript, NULL }, \ + +/* Scan a Python file and add its translatable strings to mdlp. */ +extern void extract_javascript (FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp); + +extern void x_javascript_keyword (const char *keyword); +extern void x_javascript_extract_all (void); + +extern void init_flag_table_javascript (void); + + +#ifdef __cplusplus +} +#endif diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index 032d0aea5..9ea97f5c9 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -93,6 +93,7 @@ #include "x-rst.h" #include "x-glade.h" #include "x-lua.h" +#include "x-javascript.h" /* If nonzero add all comments immediately preceding one of the keywords. */ @@ -162,6 +163,7 @@ static flag_context_list_table_ty flag_table_tcl; static flag_context_list_table_ty flag_table_perl; static flag_context_list_table_ty flag_table_php; static flag_context_list_table_ty flag_table_lua; +static flag_context_list_table_ty flag_table_javascript; /* If true, recognize Qt format strings. */ static bool recognize_format_qt; @@ -334,6 +336,7 @@ main (int argc, char *argv[]) init_flag_table_perl (); init_flag_table_php (); init_flag_table_lua (); + init_flag_table_javascript (); while ((optchar = getopt_long (argc, argv, "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:", @@ -359,6 +362,7 @@ main (int argc, char *argv[]) x_php_extract_all (); x_glade_extract_all (); x_lua_extract_all (); + x_javascript_extract_all (); break; case 'c': @@ -437,6 +441,7 @@ main (int argc, char *argv[]) x_php_keyword (optarg); x_glade_keyword (optarg); x_lua_keyword (optarg); + x_javascript_keyword (optarg); if (optarg == NULL) no_default_keywords = true; else @@ -862,7 +867,7 @@ Choice of input file language:\n")); (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\ EmacsLisp, librep, Scheme, Smalltalk, Java,\n\ JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\ - Lua, GCC-source, NXStringTable, RST, Glade)\n")); + Lua, JavaScript, GCC-source, NXStringTable, RST, Glade)\n")); printf (_("\ -C, --c++ shorthand for --language=C++\n")); printf (_("\ @@ -895,21 +900,21 @@ Language specific options:\n")); printf (_("\ (only languages C, C++, ObjectiveC, Shell,\n\ Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\ - C#, awk, Tcl, Perl, PHP, Lua, GCC-source, Glade)\n")); + C#, awk, Tcl, Perl, PHP, Lua, JavaScript, GCC-source, Glade)\n")); printf (_("\ -kWORD, --keyword=WORD look for WORD as an additional keyword\n\ -k, --keyword do not to use default keywords\n")); printf (_("\ (only languages C, C++, ObjectiveC, Shell,\n\ Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\ - C#, awk, Tcl, Perl, PHP, Lua, GCC-source, Glade)\n")); + C#, awk, Tcl, Perl, PHP, Lua, JavaScript, GCC-source, Glade)\n")); printf (_("\ --flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\ number ARG of keyword WORD\n")); printf (_("\ (only languages C, C++, ObjectiveC, Shell,\n\ Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\ - C#, awk, YCP, Tcl, Perl, PHP, Lua, GCC-source)\n")); + C#, awk, YCP, Tcl, Perl, PHP, Lua, JavaScript, GCC-source)\n")); printf (_("\ -T, --trigraphs understand ANSI C trigraphs for input\n")); printf (_("\ @@ -1778,6 +1783,11 @@ xgettext_record_flag (const char *optionstring) name_start, name_end, argnum, value, pass); break; + case format_javascript: + flag_context_list_table_insert (&flag_table_javascript, 0, + name_start, name_end, + argnum, value, pass); + break; default: abort (); } @@ -3196,6 +3206,7 @@ language_to_extractor (const char *name) SCANNERS_RST SCANNERS_GLADE SCANNERS_LUA + SCANNERS_JAVASCRIPT /* Here may follow more languages and their scanners: pike, etc... Make sure new scanners honor the --exclude-file option. */ }; @@ -3280,6 +3291,7 @@ extension_to_language (const char *extension) EXTENSIONS_RST EXTENSIONS_GLADE EXTENSIONS_LUA + EXTENSIONS_JAVASCRIPT /* Here may follow more file extensions... */ }; diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index 5ee060f0e..72f46b1ae 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,15 @@ +2013-04-17 Andreas Stricker + + Support for JavaScript. + * format-javascript-1: New file. + * format-javascript-2: New file. + * xgettext-javascript-1: New file. + * xgettext-javascript-2: New file. + * xgettext-javascript-3: New file. + * xgettext-javascript-4: New file. + * lang-javascript: New file. + * Makefile.am (TESTS): Add them. + 2013-04-16 Ľubomír Remák Support for escape sequences added in Lua 5.2. diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index 340523934..8f7b52d07 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -101,6 +101,8 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ xgettext-tcl-1 xgettext-tcl-2 xgettext-tcl-3 \ xgettext-ycp-1 xgettext-ycp-2 xgettext-ycp-3 xgettext-ycp-4 \ xgettext-lua-1 xgettext-lua-2 \ + xgettext-javascript-1 xgettext-javascript-2 xgettext-javascript-3 \ + xgettext-javascript-4 \ format-awk-1 format-awk-2 \ format-boost-1 format-boost-2 \ format-c-1 format-c-2 format-c-3 format-c-4 format-c-5 \ @@ -125,13 +127,14 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ format-tcl-1 format-tcl-2 \ format-ycp-1 format-ycp-2 \ format-lua-1 format-lua-2 \ + format-javascript-1 format-javascript-2 \ plural-1 plural-2 \ gettextpo-1 \ lang-c lang-c++ lang-objc lang-sh lang-bash lang-python-1 \ lang-python-2 lang-clisp lang-elisp lang-librep lang-guile \ lang-smalltalk lang-java lang-csharp lang-gawk lang-pascal \ lang-ycp lang-tcl lang-perl-1 lang-perl-2 lang-php lang-po lang-rst \ - lang-lua + lang-lua lang-javascript EXTRA_DIST += $(TESTS) \ test.mo xg-c-1.ok.po mex-test2.ok \ diff --git a/gettext-tools/tests/format-javascript-1 b/gettext-tools/tests/format-javascript-1 new file mode 100644 index 000000000..f632d1a2c --- /dev/null +++ b/gettext-tools/tests/format-javascript-1 @@ -0,0 +1,90 @@ +#! /bin/sh + +# Test recognition of JavaScript format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-js-1.data" +cat <<\EOF > f-js-1.data +# Valid: no argument +"abc%%" +# Valid: one character argument +"abc%c" +# Valid: one string argument +"abc%s" +# Valid: one integer argument +"abc%b" +# Valid: one integer argument +"abc%d" +# Valid: one integer argument +"abc%o" +# Valid: one integer argument +"abc%x" +# Valid: one integer argument +"abc%X" +# Valid: one floating-point argument +"abc%f" +# Valid: one object argument +"abc%j" +# Valid: one argument with flags +"abc%Id" +# Valid: one argument with width +"abc%2d" +# Valid: one argument with precision +"abc%.4f" +# Valid: one argument with width and precision +"abc%14.4f" +# Invalid: unterminated +"abc%" +# Invalid: unknown format specifier +"abc%y" +# Invalid: flags after width +"abc%1Ig" +# Invalid: twice precision +"abc%.4.2f" +# Valid: three arguments +"abc%d%j%j" +EOF + +tmpfiles="$tmpfiles f-js-1.err" +: ${XGETTEXT=xgettext} +n=0 +while read comment; do + read string + n=`expr $n + 1` + tmpfiles="$tmpfiles f-js-1-$n.in f-js-1-$n.po" + cat < f-js-1-$n.in +gettext(${string}); +EOF + # Hide xgettext's "The translator cannot reorder the arguments." warnings. + ${XGETTEXT} -L JavaScript -o f-js-1-$n.po f-js-1-$n.in 2> f-js-1.err \ + || { cat f-js-1.err 1>&2; exit 1; } + test -f f-js-1-$n.po || exit 1 + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if grep javascript-format f-js-1-$n.po > /dev/null; then + : + else + fail=yes + fi + else + if grep javascript-format f-js-1-$n.po > /dev/null; then + fail=yes + else + : + fi + fi + if test -n "$fail"; then + echo "Format string recognition error:" 1>&2 + cat f-js-1-$n.in 1>&2 + echo "Got:" 1>&2 + cat f-js-1-$n.po 1>&2 + exit 1 + fi + rm -f f-js-1-$n.in f-js-1-$n.po +done < f-js-1.data + +rm -fr $tmpfiles + +exit 0 diff --git a/gettext-tools/tests/format-javascript-2 b/gettext-tools/tests/format-javascript-2 new file mode 100644 index 000000000..3dfcfc821 --- /dev/null +++ b/gettext-tools/tests/format-javascript-2 @@ -0,0 +1,88 @@ +#! /bin/sh + +# Test checking of JavaScript format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-js-2.data" +cat <<\EOF > f-js-2.data +# Valid: %% doesn't count +msgid "abc%%def" +msgstr "xyz" +# Invalid: invalid msgstr +msgid "abc%%def" +msgstr "xyz%" +# Valid: same arguments, with different widths +msgid "abc%2sdef" +msgstr "xyz%3s" +# Invalid: too few arguments +msgid "abc%sdef%u" +msgstr "xyz%s" +# Invalid: too many arguments +msgid "abc%udef" +msgstr "xyz%uvw%c" +# Valid: type compatibility +msgid "abc%o" +msgstr "xyz%d" +# Valid: type compatibility +msgid "abc%o" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%o" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%X" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%d" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%d" +EOF + +: ${MSGFMT=msgfmt} +n=0 +while read comment; do + read msgid_line + read msgstr_line + n=`expr $n + 1` + tmpfiles="$tmpfiles f-js-2-$n.po f-js-2-$n.mo" + cat < f-js-2-$n.po +#, python-format +${msgid_line} +${msgstr_line} +EOF + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if ${MSGFMT} --check-format -o f-js-2-$n.mo f-js-2-$n.po; then + : + else + fail=yes + fi + else + ${MSGFMT} --check-format -o f-js-2-$n.mo f-js-2-$n.po 2> /dev/null + if test $? = 1; then + : + else + fail=yes + fi + fi + if test -n "$fail"; then + echo "Format string checking error:" 1>&2 + cat f-js-2-$n.po 1>&2 + exit 1 + fi + rm -f f-js-2-$n.po f-js-2-$n.mo +done < f-js-2.data + +rm -fr $tmpfiles + +exit 0 diff --git a/gettext-tools/tests/lang-javascript b/gettext-tools/tests/lang-javascript new file mode 100755 index 000000000..13ea59046 --- /dev/null +++ b/gettext-tools/tests/lang-javascript @@ -0,0 +1,132 @@ +#! /bin/sh + +# Test of gettext facilities in the JavaScript language. +# Assumes an fr_FR locale is installed. +# Assumes the following packages are installed: gjs. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles prog.js" +cat <<\EOF > prog.js +const Format = imports.format; +const Gettext = imports.gettext; + +String.prototype.format = Format.format; +const _ = Gettext.gettext; + +Gettext.textdomain ("prog"); +Gettext.bindtextdomain ("prog", "."); +print(_("'Your command, please?', asked the waiter.")); +print(_("%s is replaced by %s.").format("FF", "EUR")); +EOF + +tmpfiles="$tmpfiles prog.tmp prog.pot" +: ${XGETTEXT=xgettext} +${XGETTEXT} -o prog.tmp --omit-header --no-location prog.js +test $? = 0 || { rm -fr $tmpfiles; exit 1; } +LC_ALL=C tr -d '\r' < prog.tmp > prog.pot +test $? = 0 || { rm -fr $tmpfiles; exit 1; } + +tmpfiles="$tmpfiles prog.ok" +cat < prog.ok +msgid "'Your command, please?', asked the waiter." +msgstr "" + +#, javascript-format +msgid "%s is replaced by %s." +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} prog.ok prog.pot || exit 1 + +tmpfiles="$tmpfiles fr.po" +cat <<\EOF > fr.po +msgid "" +msgstr "Content-Type: text/plain; charset=ISO-8859-1\n" + +msgid "'Your command, please?', asked the waiter." +msgstr "«Votre commande, s'il vous plait», dit le garçon." + +# Reverse the arguments. +#, javascript-format +msgid "%s is replaced by %s." +msgstr "%s remplace %s." +EOF + +tmpfiles="$tmpfiles fr.po.tmp fr.po.new" +: ${MSGMERGE=msgmerge} +${MSGMERGE} -q -o fr.po.tmp fr.po prog.pot +test $? = 0 || { rm -fr $tmpfiles; exit 1; } +LC_ALL=C tr -d '\r' < fr.po.tmp > fr.po.new +test $? = 0 || { rm -fr $tmpfiles; exit 1; } + +: ${DIFF=diff} +${DIFF} fr.po fr.po.new || exit 1 + +tmpfiles="$tmpfiles fr" +test -d fr || mkdir fr +test -d fr/LC_MESSAGES || mkdir fr/LC_MESSAGES + +: ${MSGFMT=msgfmt} +${MSGFMT} -o fr/LC_MESSAGES/prog.mo fr.po + +# Test for presence of gjs. +(gjs -h) >/dev/null 2>/dev/null +test $? -le 1 \ + || { echo "Skipping test: gjs not found"; rm -fr $tmpfiles; exit 77; } + +# Test which of the fr_FR locales are installed. +: ${LOCALE_FR=fr_FR} +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR != none; then + LC_ALL=$LOCALE_FR ./testlocale + case $? in + 0) ;; + 77) LOCALE_FR=none;; + *) exit 1;; + esac +fi +if test $LOCALE_FR_UTF8 != none; then + LC_ALL=$LOCALE_FR_UTF8 ./testlocale + case $? in + 0) ;; + 77) LOCALE_FR_UTF8=none;; + *) exit 1;; + esac +fi +if test $LOCALE_FR = none && test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no french locale is installed" + else + echo "Skipping test: no french locale is supported" + fi + rm -fr $tmpfiles; exit 77 +fi + +tmpfiles="$tmpfiles prog.ok prog.oku prog.out" +: ${DIFF=diff} +cat <<\EOF > prog.ok +«Votre commande, s'il vous plait», dit le garçon. +FF remplace EUR. +EOF +cat <<\EOF > prog.oku +«Votre commande, s'il vous plait», dit le garçon. +FF remplace EUR. +EOF + +: ${LOCALE_FR=fr_FR} +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR != none; then + LANGUAGE= LC_ALL=$LOCALE_FR gjs prog.js > prog.out || exit 1 + ${DIFF} prog.ok prog.out || exit 1 +fi +if test $LOCALE_FR_UTF8 != none; then + LANGUAGE= LC_ALL=$LOCALE_FR_UTF8 gjs prog.js > prog.out || exit 1 + ${DIFF} -u prog.oku prog.out || exit 1 +fi + +rm -fr $tmpfiles + +exit 0 diff --git a/gettext-tools/tests/xgettext-javascript-1 b/gettext-tools/tests/xgettext-javascript-1 new file mode 100755 index 000000000..986265626 --- /dev/null +++ b/gettext-tools/tests/xgettext-javascript-1 @@ -0,0 +1,64 @@ +#!/bin/sh + +# Test of JavaScript support. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles xg-js-1.js" +cat <<\EOF > xg-js-1.js +var s1 = "Simple string, no gettext needed", + s2 = _("Extract this first string"); +function foo(a) { + var s3 = "Prefix _(" + _("Extract this second string") + ") Postfix"; +} +if (document.getElementsById("foo")[0].innerHTML == _("Extract this thirth string")) { + /* _("This is a comment and must not be extracted!") */ +} +EOF + +tmpfiles="$tmpfiles xg-js-1.err xg-js-1.tmp xg-js-1.pot" +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-js-1.tmp xg-js-1.js 2>xg-js-1.err +test $? = 0 || { cat xg-js-1.err; rm -fr $tmpfiles; exit 1; } +# Don't simplify this to "grep ... < xg-js-1.tmp", otherwise OpenBSD 4.0 grep +# only outputs "Binary file (standard input) matches". +cat xg-js-1.tmp | grep -v 'POT-Creation-Date' | LC_ALL=C tr -d '\r' > xg-js-1.pot + +tmpfiles="$tmpfiles xg-js-1.ok" +cat <<\EOF > xg-js-1.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "Extract this first string" +msgstr "" + +msgid "Extract this second string" +msgstr "" + +msgid "Extract this thirth string" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-js-1.ok xg-js-1.pot +result=$? + +rm -fr $tmpfiles + +exit $result diff --git a/gettext-tools/tests/xgettext-javascript-2 b/gettext-tools/tests/xgettext-javascript-2 new file mode 100755 index 000000000..da1e34a9a --- /dev/null +++ b/gettext-tools/tests/xgettext-javascript-2 @@ -0,0 +1,107 @@ +#!/bin/sh + +# Test of JavaScript support. +# Playing with regex and division operator + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles xg-js-2.js" +cat <<\EOF > xg-js-2.js +// RegExp literals containing string quotes must not desync the parser +var d = 1 / 2 / 4; +var s = " x " + /^\d/.match("0815").replace(/[a-z]/g, '@'); +var s1 = /"/.match(_("RegExp test string #1")); +var s2 = /'/.match(_("RegExp test string #2")); +var s3 = /['a-b]/.match(_('RegExp test string #3')); +var s4 = /["a-b]/.match(_('RegExp test string #4')); +var s5 = /[a-b']/.match(_('RegExp test string #5')); +var s6 = /[a-b"]/.match(_('RegExp test string #6')); +var c = 35 / 2 / 8 + _("RegExp test string #7").length / 32.0; +var sizestr = Math.round(size/1024*factor)/factor+_("RegExp test string #8"); +var cssClassType = attr.type.replace(/^.*\//, _('RegExp test string #9')).replace(/\./g, '-'); +var lookup = lookuptable[idx]/factor+_("RegExp test string #10"); +function doit() { + return /\./.match(_("RegExp test string #11")); +} +if (false) + /foo/.match(_("RegExp test string #12")); +else + /foo/.match(_("RegExp test string #13")); +EOF + +tmpfiles="$tmpfiles xg-js-2.err xg-js-2.tmp xg-js-2.pot" +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-js-2.tmp xg-js-2.js 2>xg-js-2.err +test $? = 0 || { cat xg-js-2.err; rm -fr $tmpfiles; exit 1; } +# Don't simplify this to "grep ... < xg-js-2.tmp", otherwise OpenBSD 4.0 grep +# only outputs "Binary file (standard input) matches". +cat xg-js-2.tmp | grep -v 'POT-Creation-Date' | LC_ALL=C tr -d '\r' > xg-js-2.pot + +tmpfiles="$tmpfiles xg-js-2.ok" +cat <<\EOF > xg-js-2.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "RegExp test string #1" +msgstr "" + +msgid "RegExp test string #2" +msgstr "" + +msgid "RegExp test string #3" +msgstr "" + +msgid "RegExp test string #4" +msgstr "" + +msgid "RegExp test string #5" +msgstr "" + +msgid "RegExp test string #6" +msgstr "" + +msgid "RegExp test string #7" +msgstr "" + +msgid "RegExp test string #8" +msgstr "" + +msgid "RegExp test string #9" +msgstr "" + +msgid "RegExp test string #10" +msgstr "" + +msgid "RegExp test string #11" +msgstr "" + +msgid "RegExp test string #12" +msgstr "" + +msgid "RegExp test string #13" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-js-2.ok xg-js-2.pot +result=$? + +rm -fr $tmpfiles + +exit $result diff --git a/gettext-tools/tests/xgettext-javascript-3 b/gettext-tools/tests/xgettext-javascript-3 new file mode 100755 index 000000000..2d758f066 --- /dev/null +++ b/gettext-tools/tests/xgettext-javascript-3 @@ -0,0 +1,63 @@ +#!/bin/sh + +# Test of JavaScript support. +# Playing with concatenation of string literals within the gettext function + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles xg-js-2.js" +cat <<\EOF > xg-js-2.js +// The usual way to concatenate strings is the plus '+' sign +var s1 = _("Concatenation #1 " + "- String part added"); +var s2 = _('Concatenation #2 ' + '- String part added'); +var s3 = _("This" + " whole " + + "string" + + ' should' + " be " + 'extracted'); +EOF + +tmpfiles="$tmpfiles xg-js-2.err xg-js-2.tmp xg-js-2.pot" +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-js-2.tmp xg-js-2.js 2>xg-js-2.err +test $? = 0 || { cat xg-js-2.err; rm -fr $tmpfiles; exit 1; } +# Don't simplify this to "grep ... < xg-js-2.tmp", otherwise OpenBSD 4.0 grep +# only outputs "Binary file (standard input) matches". +cat xg-js-2.tmp | grep -v 'POT-Creation-Date' | LC_ALL=C tr -d '\r' > xg-js-2.pot + +tmpfiles="$tmpfiles xg-js-2.ok" +cat <<\EOF > xg-js-2.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "Concatenation #1 - String part added" +msgstr "" + +msgid "Concatenation #2 - String part added" +msgstr "" + +msgid "This whole string should be extracted" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-js-2.ok xg-js-2.pot +result=$? + +rm -fr $tmpfiles + +exit $result diff --git a/gettext-tools/tests/xgettext-javascript-4 b/gettext-tools/tests/xgettext-javascript-4 new file mode 100755 index 000000000..7272fb58d --- /dev/null +++ b/gettext-tools/tests/xgettext-javascript-4 @@ -0,0 +1,55 @@ +#!/bin/sh + +# Test of JavaScript Unicode support. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles xg-js-1.js" +cat <<\EOF > xg-js-1.js +var s1 = _("Unicode escape \u3042"); +var s2 = _("Surrogate pair \uD835\uDC9C"); +EOF + +tmpfiles="$tmpfiles xg-js-1.err xg-js-1.tmp xg-js-1.pot" +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-js-1.tmp xg-js-1.js 2>xg-js-1.err +test $? = 0 || { cat xg-js-1.err; rm -fr $tmpfiles; exit 1; } +# Don't simplify this to "grep ... < xg-js-1.tmp", otherwise OpenBSD 4.0 grep +# only outputs "Binary file (standard input) matches". +cat xg-js-1.tmp | grep -v 'POT-Creation-Date' | LC_ALL=C tr -d '\r' > xg-js-1.pot + +tmpfiles="$tmpfiles xg-js-1.ok" +cat <<\EOF > xg-js-1.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "Unicode escape あ" +msgstr "" + +msgid "Surrogate pair 𝒜" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-js-1.ok xg-js-1.pot +result=$? + +rm -fr $tmpfiles + +exit $result diff --git a/gettext-tools/woe32dll/gettextsrc-exports.c b/gettext-tools/woe32dll/gettextsrc-exports.c index 2b0cb3331..e37b17831 100644 --- a/gettext-tools/woe32dll/gettextsrc-exports.c +++ b/gettext-tools/woe32dll/gettextsrc-exports.c @@ -30,6 +30,7 @@ VARIABLE(formatstring_elisp) VARIABLE(formatstring_gcc_internal) VARIABLE(formatstring_gfc_internal) VARIABLE(formatstring_java) +VARIABLE(formatstring_javascript) VARIABLE(formatstring_kde) VARIABLE(formatstring_librep) VARIABLE(formatstring_lisp)