From: Bruno Haible Date: Sun, 7 Apr 2019 23:54:50 +0000 (+0200) Subject: xgettext: Recognize ECMAScript-262 6th edition template literals. X-Git-Tag: v0.20~69 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=db8e4b76206ec179112b8171b92270e025c62a20;p=thirdparty%2Fgettext.git xgettext: Recognize ECMAScript-262 6th edition template literals. Reported by Philip Chimento in . * gettext-tools/src/x-javascript.c (P7_TEMPLATE_START_OF_EXPRESSION): New macro. (enum token_type_ty): Add token_type_template, token_type_ltemplate, token_type_mtemplate, token_type_rtemplate. (free_token): Treat token_type_template like token_type_string. (phase7_getuc): Add support for backquote quote_char. (brace_depth, template_literal_depth): New variables. (phase5_get): Parse template literals. (x_javascript_lex): Treat token_type_template like token_type_string. Handle the combination of symbol followed by template. (extract_balanced): Treat token_type_template like token_type_string, and token_type_ltemplate, token_type_mtemplate, token_type_rtemplate like token_type_other. (extract_javascript): Initialize brace_depth, template_literal_depth. * gettext-tools/tests/xgettext-javascript-7: New file. * gettext-tools/tests/Makefile.am (TESTS): Add it. * gettext-tools/doc/gettext.texi (JavaScript): Mention the template literal syntax for strings. * NEWS: Mention the improvement. --- diff --git a/NEWS b/NEWS index f8e9f1155..d80445b66 100644 --- a/NEWS +++ b/NEWS @@ -50,6 +50,8 @@ Free Pascal compiler version 3.0.0 or newer. - Vala: xgettext now parses escape sequences in strings more accurately. + - JavaScript: + xgettext now parses template literals correctly. * Runtime behaviour: - The interpretation of the language preferences on macOS has been fixed. diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index 40e0d1ff8..a07701a0e 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -12070,6 +12070,8 @@ gjs @item @code{'abc'} +@item @code{`abc`} + @end itemize @item gettext shorthand diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c index 88bc55349..dd19b372e 100644 --- a/gettext-tools/src/x-javascript.c +++ b/gettext-tools/src/x-javascript.c @@ -633,6 +633,7 @@ phase3_ungetc (int c) /* Return value of phase7_getuc when EOF is reached. */ #define P7_EOF (-1) #define P7_STRING_END (-2) +#define P7_TEMPLATE_START_OF_EXPRESSION (-3) /* ${ */ /* Convert an UTF-16 or UTF-32 code point to a return value that can be distinguished from a single-byte return value. */ @@ -663,6 +664,10 @@ enum token_type_ty token_type_operator, /* - * / % . < > = ~ ! | & ? : ^ */ token_type_equal, /* = */ token_type_string, /* "abc", 'abc' */ + token_type_template, /* `abc` */ + token_type_ltemplate, /* left part of template: `abc${ */ + token_type_mtemplate, /* middle part of template: }abc${ */ + token_type_rtemplate, /* right part of template: }abc` */ token_type_keyword, /* return, else */ token_type_symbol, /* symbol, number */ token_type_other /* misc. operator */ @@ -674,8 +679,8 @@ struct token_ty { token_type_ty type; char *string; /* for token_type_symbol, token_type_keyword */ - mixed_string_ty *mixed_string; /* for token_type_string */ - refcounted_string_list_ty *comment; /* for token_type_string */ + mixed_string_ty *mixed_string; /* for token_type_string, token_type_template */ + refcounted_string_list_ty *comment; /* for token_type_string, token_type_template */ int line_number; }; @@ -686,7 +691,7 @@ free_token (token_ty *tp) { if (tp->type == token_type_symbol || tp->type == token_type_keyword) free (tp->string); - if (tp->type == token_type_string) + if (tp->type == token_type_string || tp->type == token_type_template) { mixed_string_free (tp->mixed_string); drop_reference (tp->comment); @@ -695,7 +700,7 @@ free_token (token_ty *tp) /* JavaScript provides strings with either double or single quotes: - "abc" or 'abc' + "abc" or 'abc' or `abc` Both may contain special sequences after a backslash: \', \", \\, \b, \f, \n, \r, \t, \v Special characters can be entered using hexadecimal escape @@ -723,14 +728,39 @@ phase7_getuc (int quote_char) if (c == quote_char) return P7_STRING_END; + if (c == '$' && quote_char == '`') + { + int c1 = phase2_getc (); + + if (c1 == '{') + return P7_TEMPLATE_START_OF_EXPRESSION; + phase2_ungetc (c1); + } + if (c == '\n') { - phase2_ungetc (c); - error_with_progname = false; - error (0, 0, _("%s:%d: warning: unterminated string"), - logical_file_name, line_number); - error_with_progname = true; - return P7_STRING_END; + if (quote_char == '`') + return UNICODE ('\n'); + else + { + phase2_ungetc (c); + error_with_progname = false; + error (0, 0, _("%s:%d: warning: unterminated string"), + logical_file_name, line_number); + error_with_progname = true; + return P7_STRING_END; + } + } + + if (c == '\r' && quote_char == '`') + { + /* Line terminators inside template literals are normalized to \n, + says . */ + int c1 = phase2_getc (); + + if (c1 == '\n') + return UNICODE ('\n'); + phase2_ungetc (c1); } if (c != '\\') @@ -899,6 +929,12 @@ phase5_scan_regexp (void) phase2_ungetc (c); } +/* Number of open '{' tokens. */ +static int brace_depth; + +/* Number of open template literals `...${ */ +static int template_literal_depth; + /* Number of open XML elements. */ static int xml_element_depth; static bool inside_embedded_js_in_xml; @@ -1162,6 +1198,52 @@ phase5_get (token_ty *tp) return; } + case '`': + /* Template literals. */ + { + struct mixed_string_buffer msb; + + lexical_context = lc_string; + /* Start accumulating the string. */ + mixed_string_buffer_init (&msb, lexical_context, + logical_file_name, line_number); + for (;;) + { + int uc = phase7_getuc ('`'); + + /* Keep line_number in sync. */ + msb.line_number = line_number; + + if (uc == P7_EOF || uc == P7_STRING_END) + { + tp->mixed_string = mixed_string_buffer_result (&msb); + tp->comment = add_reference (savable_comment); + tp->type = last_token_type = token_type_template; + break; + } + + if (uc == P7_TEMPLATE_START_OF_EXPRESSION) + { + mixed_string_buffer_destroy (&msb); + tp->type = last_token_type = token_type_ltemplate; + template_literal_depth++; + break; + } + + if (IS_UNICODE (uc)) + { + assert (UNICODE_VALUE (uc) >= 0 + && UNICODE_VALUE (uc) < 0x110000); + mixed_string_buffer_append_unicode (&msb, + UNICODE_VALUE (uc)); + } + else + mixed_string_buffer_append_char (&msb, uc); + } + lexical_context = lc_outside; + return; + } + case '+': tp->type = last_token_type = token_type_plus; return; @@ -1274,12 +1356,38 @@ phase5_get (token_ty *tp) case '{': if (xml_element_depth > 0 && !inside_embedded_js_in_xml) inside_embedded_js_in_xml = true; + else + brace_depth++; tp->type = last_token_type = token_type_other; return; case '}': if (xml_element_depth > 0 && inside_embedded_js_in_xml) inside_embedded_js_in_xml = false; + else if (brace_depth > 0) + brace_depth--; + else if (template_literal_depth > 0) + { + /* Middle or right part of template literal. */ + for (;;) + { + int uc = phase7_getuc ('`'); + + if (uc == P7_EOF || uc == P7_STRING_END) + { + tp->type = last_token_type = token_type_rtemplate; + template_literal_depth--; + break; + } + + if (uc == P7_TEMPLATE_START_OF_EXPRESSION) + { + tp->type = last_token_type = token_type_mtemplate; + break; + } + } + return; + } tp->type = last_token_type = token_type_other; return; @@ -1326,13 +1434,14 @@ phase5_unget (token_ty *tp) } -/* String concatenation with '+'. */ +/* String concatenation with '+'. + Handling of tagged template literals. */ static void x_javascript_lex (token_ty *tp) { phase5_get (tp); - if (tp->type == token_type_string) + if (tp->type == token_type_string || tp->type == token_type_template) { mixed_string_ty *sum = tp->mixed_string; @@ -1346,7 +1455,8 @@ x_javascript_lex (token_ty *tp) token_ty token3; phase5_get (&token3); - if (token3.type == token_type_string) + if (token3.type == token_type_string + || token3.type == token_type_template) { sum = mixed_string_concat_free1 (sum, token3.mixed_string); @@ -1361,6 +1471,24 @@ x_javascript_lex (token_ty *tp) } tp->mixed_string = sum; } + else if (tp->type == token_type_symbol) + { + token_ty token2; + + phase5_get (&token2); + if (token2.type == token_type_template) + { + /* The value of + tag `abc` + is the value of the function call + tag (["abc"]) + We don't know anything about this value. Therefore, don't + let the extractor see this template literal. */ + free_token (&token2); + } + else + phase5_unget (&token2); + } } @@ -1500,6 +1628,7 @@ extract_balanced (message_list_ty *mlp, continue; case token_type_string: + case token_type_template: { lex_pos_ty pos; @@ -1528,6 +1657,9 @@ extract_balanced (message_list_ty *mlp, arglist_parser_done (argparser, arg); return true; + case token_type_ltemplate: + case token_type_mtemplate: + case token_type_rtemplate: case token_type_keyword: case token_type_plus: case token_type_regexp: @@ -1563,6 +1695,8 @@ extract_javascript (FILE *f, last_comment_line = -1; last_non_comment_line = -1; + brace_depth = 0; + template_literal_depth = 0; xml_element_depth = 0; inside_embedded_js_in_xml = false; diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index b12851b16..083adebae 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -1,5 +1,5 @@ ## Makefile for the gettext-tools/tests subdirectory of GNU gettext -## Copyright (C) 1995-1997, 2001-2010, 2012-2016, 2018 Free Software Foundation, Inc. +## Copyright (C) 1995-1997, 2001-2010, 2012-2016, 2018-2019 Free Software Foundation, Inc. ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -118,6 +118,7 @@ TESTS = gettext-1 gettext-2 \ xgettext-lua-1 xgettext-lua-2 \ xgettext-javascript-1 xgettext-javascript-2 xgettext-javascript-3 \ xgettext-javascript-4 xgettext-javascript-5 xgettext-javascript-6 \ + xgettext-javascript-7 \ xgettext-vala-1 xgettext-vala-2 \ xgettext-gsettings-1 \ xgettext-desktop-1 \ diff --git a/gettext-tools/tests/xgettext-javascript-7 b/gettext-tools/tests/xgettext-javascript-7 new file mode 100755 index 000000000..c9516ec91 --- /dev/null +++ b/gettext-tools/tests/xgettext-javascript-7 @@ -0,0 +1,74 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of JavaScript template literal support. + +cat <<\EOF > xg-js-7.js +var s0 = _(`A template literal without substitutions`); +var s1 = _(`A template literal with +embedded +newlines`); +var s2 = _(`A template literal with ${n} substitutions`); +var s3 = _(`A template literal with several substitutions: ${a} and ${b} and ${c} and so on`); +var s4 = `/${looks_like_regex}`; +var s5 = _('not part of a regex'); +var s6 = `that's a valid string. ` + _('This too'); +var s7 = _(tag`A template literal with a tag`); +var s8 = `a${`b${`c`+d}`}e`; +var s9 = _("a normal string"); +var s10 = `abc${foo({},_('should be extracted'))}xyz`; +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-js-7.tmp xg-js-7.js 2>xg-js-7.err +test $? = 0 || { cat xg-js-7.err; Exit 1; } +# Don't simplify this to "grep ... < xg-js-7.tmp", otherwise OpenBSD 4.0 grep +# only outputs "Binary file (standard input) matches". +cat xg-js-7.tmp | grep -v 'POT-Creation-Date' | LC_ALL=C tr -d '\r' > xg-js-7.pot + +cat <<\EOF > xg-js-7.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "A template literal without substitutions" +msgstr "" + +msgid "" +"A template literal with\n" +"embedded\n" +"newlines" +msgstr "" + +msgid "not part of a regex" +msgstr "" + +msgid "This too" +msgstr "" + +msgid "a normal string" +msgstr "" + +msgid "should be extracted" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-js-7.ok xg-js-7.pot +result=$? + +exit $result