]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
xgettext: Recognize ECMAScript-262 6th edition template literals.
authorBruno Haible <bruno@clisp.org>
Sun, 7 Apr 2019 23:54:50 +0000 (01:54 +0200)
committerBruno Haible <bruno@clisp.org>
Mon, 8 Apr 2019 00:00:51 +0000 (02:00 +0200)
Reported by Philip Chimento <philip.chimento@gmail.com>
in <https://savannah.gnu.org/bugs/index.php?50920>.

* gettext-tools/src/x-javascript.c (P7_TEMPLATE_START_OF_EXPRESSION): New macro.
(enum token_type_ty): Add token_type_template, token_type_ltemplate,
token_type_mtemplate, token_type_rtemplate.
(free_token): Treat token_type_template like token_type_string.
(phase7_getuc): Add support for backquote quote_char.
(brace_depth, template_literal_depth): New variables.
(phase5_get): Parse template literals.
(x_javascript_lex): Treat token_type_template like token_type_string. Handle the
combination of symbol followed by template.
(extract_balanced): Treat token_type_template like token_type_string, and
token_type_ltemplate, token_type_mtemplate, token_type_rtemplate like
token_type_other.
(extract_javascript): Initialize brace_depth, template_literal_depth.
* gettext-tools/tests/xgettext-javascript-7: New file.
* gettext-tools/tests/Makefile.am (TESTS): Add it.
* gettext-tools/doc/gettext.texi (JavaScript): Mention the template literal
syntax for strings.
* NEWS: Mention the improvement.

NEWS
gettext-tools/doc/gettext.texi
gettext-tools/src/x-javascript.c
gettext-tools/tests/Makefile.am
gettext-tools/tests/xgettext-javascript-7 [new file with mode: 0755]

diff --git a/NEWS b/NEWS
index f8e9f1155d499183875b990f3900890cb8b69e76..d80445b6600489056a54e51968c9682e779fe3a4 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -50,6 +50,8 @@
     Free Pascal compiler version 3.0.0 or newer.
   - Vala:
     xgettext now parses escape sequences in strings more accurately.
+  - JavaScript:
+    xgettext now parses template literals correctly.
 
 * Runtime behaviour:
   - The interpretation of the language preferences on macOS has been fixed.
index 40e0d1ff86c144be4c15f98f330d4e73434ad7cf..a07701a0ef1a2571e38a27f81d008322d894addd 100644 (file)
@@ -12070,6 +12070,8 @@ gjs
 
 @item @code{'abc'}
 
+@item @code{`abc`}
+
 @end itemize
 
 @item gettext shorthand
index 88bc55349ff08f36b12e8e9dfd34c47c38fa4c40..dd19b372e46c4e601bb9e4681cc9d0f32e598176 100644 (file)
@@ -633,6 +633,7 @@ phase3_ungetc (int c)
 /* Return value of phase7_getuc when EOF is reached.  */
 #define P7_EOF (-1)
 #define P7_STRING_END (-2)
+#define P7_TEMPLATE_START_OF_EXPRESSION (-3) /* ${ */
 
 /* Convert an UTF-16 or UTF-32 code point to a return value that can be
    distinguished from a single-byte return value.  */
@@ -663,6 +664,10 @@ enum token_type_ty
   token_type_operator,          /* - * / % . < > = ~ ! | & ? : ^ */
   token_type_equal,             /* = */
   token_type_string,            /* "abc", 'abc' */
+  token_type_template,          /* `abc` */
+  token_type_ltemplate,         /* left part of template: `abc${ */
+  token_type_mtemplate,         /* middle part of template: }abc${ */
+  token_type_rtemplate,         /* right part of template: }abc` */
   token_type_keyword,           /* return, else */
   token_type_symbol,            /* symbol, number */
   token_type_other              /* misc. operator */
@@ -674,8 +679,8 @@ struct token_ty
 {
   token_type_ty type;
   char *string;                  /* for token_type_symbol, token_type_keyword */
-  mixed_string_ty *mixed_string;        /* for token_type_string */
-  refcounted_string_list_ty *comment;   /* for token_type_string */
+  mixed_string_ty *mixed_string;        /* for token_type_string, token_type_template */
+  refcounted_string_list_ty *comment;   /* for token_type_string, token_type_template */
   int line_number;
 };
 
@@ -686,7 +691,7 @@ free_token (token_ty *tp)
 {
   if (tp->type == token_type_symbol || tp->type == token_type_keyword)
     free (tp->string);
-  if (tp->type == token_type_string)
+  if (tp->type == token_type_string || tp->type == token_type_template)
     {
       mixed_string_free (tp->mixed_string);
       drop_reference (tp->comment);
@@ -695,7 +700,7 @@ free_token (token_ty *tp)
 
 
 /* JavaScript provides strings with either double or single quotes:
-     "abc" or 'abc'
+     "abc" or 'abc' or `abc`
    Both may contain special sequences after a backslash:
      \', \", \\, \b, \f, \n, \r, \t, \v
    Special characters can be entered using hexadecimal escape
@@ -723,14 +728,39 @@ phase7_getuc (int quote_char)
       if (c == quote_char)
         return P7_STRING_END;
 
+      if (c == '$' && quote_char == '`')
+        {
+          int c1 = phase2_getc ();
+
+          if (c1 == '{')
+            return P7_TEMPLATE_START_OF_EXPRESSION;
+          phase2_ungetc (c1);
+        }
+
       if (c == '\n')
         {
-          phase2_ungetc (c);
-          error_with_progname = false;
-          error (0, 0, _("%s:%d: warning: unterminated string"),
-                 logical_file_name, line_number);
-          error_with_progname = true;
-          return P7_STRING_END;
+          if (quote_char == '`')
+            return UNICODE ('\n');
+          else
+            {
+              phase2_ungetc (c);
+              error_with_progname = false;
+              error (0, 0, _("%s:%d: warning: unterminated string"),
+                     logical_file_name, line_number);
+              error_with_progname = true;
+              return P7_STRING_END;
+            }
+        }
+
+      if (c == '\r' && quote_char == '`')
+        {
+          /* Line terminators inside template literals are normalized to \n,
+             says <http://exploringjs.com/es6/ch_template-literals.html>.  */
+          int c1 = phase2_getc ();
+
+          if (c1 == '\n')
+            return UNICODE ('\n');
+          phase2_ungetc (c1);
         }
 
       if (c != '\\')
@@ -899,6 +929,12 @@ phase5_scan_regexp (void)
       phase2_ungetc (c);
 }
 
+/* Number of open '{' tokens.  */
+static int brace_depth;
+
+/* Number of open template literals `...${  */
+static int template_literal_depth;
+
 /* Number of open XML elements.  */
 static int xml_element_depth;
 static bool inside_embedded_js_in_xml;
@@ -1162,6 +1198,52 @@ phase5_get (token_ty *tp)
             return;
           }
 
+        case '`':
+          /* Template literals.  */
+          {
+            struct mixed_string_buffer msb;
+
+            lexical_context = lc_string;
+            /* Start accumulating the string.  */
+            mixed_string_buffer_init (&msb, lexical_context,
+                                      logical_file_name, line_number);
+            for (;;)
+              {
+                int uc = phase7_getuc ('`');
+
+                /* Keep line_number in sync.  */
+                msb.line_number = line_number;
+
+                if (uc == P7_EOF || uc == P7_STRING_END)
+                  {
+                    tp->mixed_string = mixed_string_buffer_result (&msb);
+                    tp->comment = add_reference (savable_comment);
+                    tp->type = last_token_type = token_type_template;
+                    break;
+                  }
+
+                if (uc == P7_TEMPLATE_START_OF_EXPRESSION)
+                  {
+                    mixed_string_buffer_destroy (&msb);
+                    tp->type = last_token_type = token_type_ltemplate;
+                    template_literal_depth++;
+                    break;
+                  }
+
+                if (IS_UNICODE (uc))
+                  {
+                    assert (UNICODE_VALUE (uc) >= 0
+                            && UNICODE_VALUE (uc) < 0x110000);
+                    mixed_string_buffer_append_unicode (&msb,
+                                                        UNICODE_VALUE (uc));
+                  }
+                else
+                  mixed_string_buffer_append_char (&msb, uc);
+              }
+            lexical_context = lc_outside;
+            return;
+          }
+
         case '+':
           tp->type = last_token_type = token_type_plus;
           return;
@@ -1274,12 +1356,38 @@ phase5_get (token_ty *tp)
         case '{':
           if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
             inside_embedded_js_in_xml = true;
+          else
+            brace_depth++;
           tp->type = last_token_type = token_type_other;
           return;
 
         case '}':
           if (xml_element_depth > 0 && inside_embedded_js_in_xml)
             inside_embedded_js_in_xml = false;
+          else if (brace_depth > 0)
+            brace_depth--;
+          else if (template_literal_depth > 0)
+            {
+              /* Middle or right part of template literal.  */
+              for (;;)
+                {
+                  int uc = phase7_getuc ('`');
+
+                  if (uc == P7_EOF || uc == P7_STRING_END)
+                    {
+                      tp->type = last_token_type = token_type_rtemplate;
+                      template_literal_depth--;
+                      break;
+                    }
+
+                  if (uc == P7_TEMPLATE_START_OF_EXPRESSION)
+                    {
+                      tp->type = last_token_type = token_type_mtemplate;
+                      break;
+                    }
+                }
+              return;
+            }
           tp->type = last_token_type = token_type_other;
           return;
 
@@ -1326,13 +1434,14 @@ phase5_unget (token_ty *tp)
 }
 
 
-/* String concatenation with '+'.  */
+/* String concatenation with '+'.
+   Handling of tagged template literals.  */
 
 static void
 x_javascript_lex (token_ty *tp)
 {
   phase5_get (tp);
-  if (tp->type == token_type_string)
+  if (tp->type == token_type_string || tp->type == token_type_template)
     {
       mixed_string_ty *sum = tp->mixed_string;
 
@@ -1346,7 +1455,8 @@ x_javascript_lex (token_ty *tp)
               token_ty token3;
 
               phase5_get (&token3);
-              if (token3.type == token_type_string)
+              if (token3.type == token_type_string
+                  || token3.type == token_type_template)
                 {
                   sum = mixed_string_concat_free1 (sum, token3.mixed_string);
 
@@ -1361,6 +1471,24 @@ x_javascript_lex (token_ty *tp)
         }
       tp->mixed_string = sum;
     }
+  else if (tp->type == token_type_symbol)
+    {
+      token_ty token2;
+
+      phase5_get (&token2);
+      if (token2.type == token_type_template)
+        {
+          /* The value of
+               tag `abc`
+             is the value of the function call
+               tag (["abc"])
+             We don't know anything about this value.  Therefore, don't
+             let the extractor see this template literal.  */
+          free_token (&token2);
+        }
+      else
+        phase5_unget (&token2);
+    }
 }
 
 
@@ -1500,6 +1628,7 @@ extract_balanced (message_list_ty *mlp,
           continue;
 
         case token_type_string:
+        case token_type_template:
           {
             lex_pos_ty pos;
 
@@ -1528,6 +1657,9 @@ extract_balanced (message_list_ty *mlp,
           arglist_parser_done (argparser, arg);
           return true;
 
+        case token_type_ltemplate:
+        case token_type_mtemplate:
+        case token_type_rtemplate:
         case token_type_keyword:
         case token_type_plus:
         case token_type_regexp:
@@ -1563,6 +1695,8 @@ extract_javascript (FILE *f,
   last_comment_line = -1;
   last_non_comment_line = -1;
 
+  brace_depth = 0;
+  template_literal_depth = 0;
   xml_element_depth = 0;
   inside_embedded_js_in_xml = false;
 
index b12851b16a069b61564790f0f3957edc755c13af..083adebaeb8c62c800233b0a3d37c9588f1d0ab1 100644 (file)
@@ -1,5 +1,5 @@
 ## Makefile for the gettext-tools/tests subdirectory of GNU gettext
-## Copyright (C) 1995-1997, 2001-2010, 2012-2016, 2018 Free Software Foundation, Inc.
+## Copyright (C) 1995-1997, 2001-2010, 2012-2016, 2018-2019 Free Software Foundation, Inc.
 ##
 ## This program is free software: you can redistribute it and/or modify
 ## it under the terms of the GNU General Public License as published by
@@ -118,6 +118,7 @@ TESTS = gettext-1 gettext-2 \
        xgettext-lua-1 xgettext-lua-2 \
        xgettext-javascript-1 xgettext-javascript-2 xgettext-javascript-3 \
        xgettext-javascript-4 xgettext-javascript-5 xgettext-javascript-6 \
+       xgettext-javascript-7 \
        xgettext-vala-1 xgettext-vala-2 \
        xgettext-gsettings-1 \
        xgettext-desktop-1 \
diff --git a/gettext-tools/tests/xgettext-javascript-7 b/gettext-tools/tests/xgettext-javascript-7
new file mode 100755 (executable)
index 0000000..c9516ec
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of JavaScript template literal support.
+
+cat <<\EOF > xg-js-7.js
+var s0 = _(`A template literal without substitutions`);
+var s1 = _(`A template literal with
+embedded
+newlines`);
+var s2 = _(`A template literal with ${n} substitutions`);
+var s3 = _(`A template literal with several substitutions: ${a} and ${b} and ${c} and so on`);
+var s4 = `/${looks_like_regex}`;
+var s5 = _('not part of a regex');
+var s6 = `that's a valid string. ` + _('This too');
+var s7 = _(tag`A template literal with a tag`);
+var s8 = `a${`b${`c`+d}`}e`;
+var s9 = _("a normal string");
+var s10 = `abc${foo({},_('should be extracted'))}xyz`;
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-js-7.tmp xg-js-7.js 2>xg-js-7.err
+test $? = 0 || { cat xg-js-7.err; Exit 1; }
+# Don't simplify this to "grep ... < xg-js-7.tmp", otherwise OpenBSD 4.0 grep
+# only outputs "Binary file (standard input) matches".
+cat xg-js-7.tmp | grep -v 'POT-Creation-Date' | LC_ALL=C tr -d '\r' > xg-js-7.pot
+
+cat <<\EOF > xg-js-7.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "A template literal without substitutions"
+msgstr ""
+
+msgid ""
+"A template literal with\n"
+"embedded\n"
+"newlines"
+msgstr ""
+
+msgid "not part of a regex"
+msgstr ""
+
+msgid "This too"
+msgstr ""
+
+msgid "a normal string"
+msgstr ""
+
+msgid "should be extracted"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-js-7.ok xg-js-7.pot
+result=$?
+
+exit $result