Improved javascript template string expression extracting (#939)

author Johannes Wilm <johanneswilm@gmail.com>

Fri, 6 Jan 2023 20:18:35 +0000 (21:18 +0100)

committer GitHub <noreply@github.com>

Fri, 6 Jan 2023 20:18:35 +0000 (22:18 +0200)
author Johannes Wilm <johanneswilm@gmail.com>
Fri, 6 Jan 2023 20:18:35 +0000 (21:18 +0100)
committer GitHub <noreply@github.com>
Fri, 6 Jan 2023 20:18:35 +0000 (22:18 +0200)
diff --git a/babel/messages/extract.py b/babel/messages/extract.py

index 4f0f649b35bd13136778b094e2221b6e51959b2b..c19dd5af2722de739f06f858642e13eab7f90e49 100644 (file)
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -16,9 +16,10 @@
      :license: BSD, see LICENSE for more details.
  """
  import ast
+import io
  import os
-from os.path import relpath
  import sys
+from os.path import relpath
  from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
  
  from babel.util import parse_encoding, parse_future_flags, pathmatch
@@ -532,7 +533,7 @@ def _parse_python_string(value, encoding, future_flags):
      return None
  
  
-def extract_javascript(fileobj, keywords, comment_tags, options):
+def extract_javascript(fileobj, keywords, comment_tags, options, lineno=1):
      """Extract messages from JavaScript source code.
  
      :param fileobj: the seekable, file-like object the messages should be
@@ -544,7 +545,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
      :param options: a dictionary of additional options (optional)
                      Supported options are:
                      * `jsx` -- set to false to disable JSX/E4X support.
-                    * `template_string` -- set to false to disable ES6 template string support.
+                    * `template_string` -- if `True`, supports gettext(`key`)
+                    * `parse_template_string` -- if `True` will parse the
+                                                 contents of javascript
+                                                 template strings.
+    :param lineno: line number offset (for parsing embedded fragments)
      """
      from babel.messages.jslexer import Token, tokenize, unquote_string
      funcname = message_lineno = None
@@ -556,12 +561,12 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
      last_token = None
      call_stack = -1
      dotted = any('.' in kw for kw in keywords)
-
      for token in tokenize(
          fileobj.read().decode(encoding),
          jsx=options.get("jsx", True),
          template_string=options.get("template_string", True),
-        dotted=dotted
+        dotted=dotted,
+        lineno=lineno
      ):
          if (  # Turn keyword`foo` expressions into keyword("foo") calls:
              funcname and  # have a keyword...
@@ -573,7 +578,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
              call_stack = 0
              token = Token('operator', ')', token.lineno)
  
-        if token.type == 'operator' and token.value == '(':
+        if options.get('parse_template_string') and not funcname and token.type == 'template_string':
+            for item in parse_template_string(token.value, keywords, comment_tags, options, token.lineno):
+                yield item
+
+        elif token.type == 'operator' and token.value == '(':
              if funcname:
                  message_lineno = token.lineno
                  call_stack += 1
@@ -665,3 +674,41 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
              funcname = token.value
  
          last_token = token
+
+
+def parse_template_string(template_string, keywords, comment_tags, options, lineno=1):
+    """Parse JavaScript template string.
+
+    :param template_string: the template string to be parsed
+    :param keywords: a list of keywords (i.e. function names) that should be
+                     recognized as translation functions
+    :param comment_tags: a list of translator tags to search for and include
+                         in the results
+    :param options: a dictionary of additional options (optional)
+    :param lineno: starting line number (optional)
+    """
+    from babel.messages.jslexer import line_re
+    prev_character = None
+    level = 0
+    inside_str = False
+    expression_contents = ''
+    for character in template_string[1:-1]:
+        if not inside_str and character in ('"', "'", '`'):
+            inside_str = character
+        elif inside_str == character and prev_character != r'\\':
+            inside_str = False
+        if level:
+            expression_contents += character
+        if not inside_str:
+            if character == '{' and prev_character == '$':
+                level += 1
+            elif level and character == '}':
+                level -= 1
+                if level == 0 and expression_contents:
+                    expression_contents = expression_contents[0:-1]
+                    fake_file_obj = io.BytesIO(expression_contents.encode())
+                    for item in extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno):
+                        yield item
+                    lineno += len(line_re.findall(expression_contents))
+                    expression_contents = ''
+        prev_character = character
diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py

index 1264b2dbc29fd1693c88fba3bc9b76f99129535e..886f69d20129a1bf8cad612bf419fc25f2b1adb3 100644 (file)
--- a/babel/messages/jslexer.py
+++ b/babel/messages/jslexer.py
@@ -151,17 +151,17 @@ def unquote_string(string):
      return u''.join(result)
  
  
-def tokenize(source, jsx=True, dotted=True, template_string=True):
+def tokenize(source, jsx=True, dotted=True, template_string=True, lineno=1):
      """
      Tokenize JavaScript/JSX source.  Returns a generator of tokens.
  
      :param jsx: Enable (limited) JSX parsing.
      :param dotted: Read dotted names as single name token.
      :param template_string: Support ES6 template strings
+    :param lineno: starting line number (optional)
      """
      may_divide = False
      pos = 0
-    lineno = 1
      end = len(source)
      rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)
  
diff --git a/tests/messages/test_js_extract.py b/tests/messages/test_js_extract.py

index 72c521144ce0b1ae0786b9b7958f2955877902a7..95985c0f7b90dd5b0b459fc165a949f790fa9a54 100644 (file)
--- a/tests/messages/test_js_extract.py
+++ b/tests/messages/test_js_extract.py
@@ -150,3 +150,42 @@ def test_template_string_tag_usage():
      )
  
      assert messages == [(1, 'Tag template, wow', [], None)]
+
+
+def test_inside_template_string():
+    buf = BytesIO(b"const msg = `${gettext('Hello')} ${user.name}`")
+    messages = list(
+        extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
+    )
+
+    assert messages == [(1, 'Hello', [], None)]
+
+
+def test_inside_template_string_with_linebreaks():
+    buf = BytesIO(b"""\
+const userName = gettext('Username')
+const msg = `${
+gettext('Hello')
+} ${userName} ${
+gettext('Are you having a nice day?')
+}`
+const msg2 = `${
+gettext('Howdy')
+} ${userName} ${
+gettext('Are you doing ok?')
+}`
+""")
+    messages = list(
+        extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
+    )
+
+    assert messages == [(1, 'Username', [], None), (3, 'Hello', [], None), (5, 'Are you having a nice day?', [], None), (8, 'Howdy', [], None), (10, 'Are you doing ok?', [], None)]
+
+
+def test_inside_nested_template_string():
+    buf = BytesIO(b"const msg = `${gettext('Greetings!')} ${ evening ? `${user.name}: ${gettext('This is a lovely evening.')}` : `${gettext('The day is really nice!')} ${user.name}`}`")
+    messages = list(
+        extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
+    )
+
+    assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]
author	Johannes Wilm <johanneswilm@gmail.com>
	Fri, 6 Jan 2023 20:18:35 +0000 (21:18 +0100)
committer	GitHub <noreply@github.com>
	Fri, 6 Jan 2023 20:18:35 +0000 (22:18 +0200)
babel/messages/extract.py		patch \| blob \| blame \| history
babel/messages/jslexer.py		patch \| blob \| blame \| history
tests/messages/test_js_extract.py		patch \| blob \| blame \| history