JavaScript: Add JSX/E4X tag lexing

author Aarni Koskela <akx@iki.fi>

Mon, 18 Jan 2016 15:09:08 +0000 (17:09 +0200)

committer Aarni Koskela <akx@iki.fi>

Sun, 7 Feb 2016 11:39:10 +0000 (13:39 +0200)
author Aarni Koskela <akx@iki.fi>
Mon, 18 Jan 2016 15:09:08 +0000 (17:09 +0200)
committer Aarni Koskela <akx@iki.fi>
Sun, 7 Feb 2016 11:39:10 +0000 (13:39 +0200)
diff --git a/babel/messages/extract.py b/babel/messages/extract.py

index c2dcd5b99c2728dff51e8b6bee4f46553dee96b1..153fe5116abecd5d4762da352035cdd268a04d38 100644 (file)
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -506,6 +506,8 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
      :param comment_tags: a list of translator tags to search for and include
                           in the results
      :param options: a dictionary of additional options (optional)
+                    Supported options are:
+                    * `jsx` -- set to false to disable JSX/E4X support.
      """
      from babel.messages.jslexer import tokenize, unquote_string
      funcname = message_lineno = None
@@ -517,7 +519,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
      last_token = None
      call_stack = -1
  
-    for token in tokenize(fileobj.read().decode(encoding)):
+    for token in tokenize(fileobj.read().decode(encoding), jsx=options.get("jsx", True)):
          if token.type == 'operator' and token.value == '(':
              if funcname:
                  message_lineno = token.lineno
diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py

index 282d294f36c840026bf7d01ec6df25cd70cef0cd..32500676aa28a5fe3d8e64a572b4fac21c248e16 100644 (file)
--- a/babel/messages/jslexer.py
+++ b/babel/messages/jslexer.py
@@ -31,7 +31,7 @@ name_re = re.compile(r'(\$+\w*|[^\W\d]\w*)(?u)')
  
  Token = namedtuple('Token', 'type value lineno')
  
-rules = [
+_rules = [
      (None, re.compile(r'\s+(?u)')),
      (None, re.compile(r'<!--.*')),
      ('linecomment', re.compile(r'//.*')),
@@ -43,6 +43,7 @@ rules = [
          ([eE][-+]?\d+)? |
          (0x[a-fA-F0-9]+)
      )''')),
+    ('jsx_tag', re.compile(r'<(?:/?)\w+.+?>', re.I)),  # May be mangled in `get_rules`
      ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
      ('string', re.compile(r'''(?xs)(
          '(?:[^'\\]*(?:\\.[^'\\]*)*)'  |
@@ -51,6 +52,20 @@ rules = [
  ]
  
  
+def get_rules(jsx):
+    """
+    Get a tokenization rule list given the passed syntax options.
+
+    Internal to this module.
+    """
+    rules = []
+    for token_type, rule in _rules:
+        if not jsx and token_type and 'jsx' in token_type:
+            continue
+        rules.append((token_type, rule))
+    return rules
+
+
  def indicates_division(token):
      """A helper function that helps the tokenizer to decide if the current
      token may be followed by a division operator.
@@ -116,13 +131,17 @@ def unquote_string(string):
      return u''.join(result)
  
  
-def tokenize(source):
-    """Tokenize a JavaScript source.  Returns a generator of tokens.
+def tokenize(source, jsx=True):
+    """
+    Tokenize JavaScript/JSX source.  Returns a generator of tokens.
+
+    :param jsx: Enable (limited) JSX parsing.
      """
      may_divide = False
      pos = 0
      lineno = 1
      end = len(source)
+    rules = get_rules(jsx=jsx)
  
      while pos < end:
          # handle regular rules first
diff --git a/tests/messages/test_js_extract.py b/tests/messages/test_js_extract.py

index 299240e21591a6b1bdbc1df62740f59ff0a4debc..ae6d277b37357d9be227885ee241de9d4bd08f06 100644 (file)
--- a/tests/messages/test_js_extract.py
+++ b/tests/messages/test_js_extract.py
@@ -1,4 +1,5 @@
  # -- encoding: UTF-8 --
+import pytest
  from babel._compat import BytesIO
  from babel.messages import extract
  
@@ -97,3 +98,27 @@ _('no comment here')
      assert messages[1][3] == [u'NOTE: this will show up', 'too.']
      assert messages[2][2] == u'no comment here'
      assert messages[2][3] == []
+
+
+JSX_SOURCE = b"""
+class Foo {
+    render() {
+        const value = gettext("hello");
+        return (
+            <option value="val1">{ i18n._('String1') }</option>
+            <option value="val2">{ i18n._('String 2') }</option>
+            <option value="val3">{ i18n._('String 3') }</option>
+        );
+    }
+"""
+EXPECTED_JSX_MESSAGES = ["hello", "String1", "String 2", "String 3"]
+
+
+@pytest.mark.parametrize("jsx_enabled", (False, True))
+def test_jsx_extraction(jsx_enabled):
+    buf = BytesIO(JSX_SOURCE)
+    messages = [m[2] for m in extract.extract_javascript(buf, ('_', 'gettext'), [], {"jsx": jsx_enabled})]
+    if jsx_enabled:
+        assert messages == EXPECTED_JSX_MESSAGES
+    else:
+        assert messages != EXPECTED_JSX_MESSAGES
author	Aarni Koskela <akx@iki.fi>
	Mon, 18 Jan 2016 15:09:08 +0000 (17:09 +0200)
committer	Aarni Koskela <akx@iki.fi>
	Sun, 7 Feb 2016 11:39:10 +0000 (13:39 +0200)
babel/messages/extract.py		patch \| blob \| blame \| history
babel/messages/jslexer.py		patch \| blob \| blame \| history
tests/messages/test_js_extract.py		patch \| blob \| blame \| history