Extract plural rule tokenization function and add tests

author benselme <benselme@gmail.com>

Thu, 8 Jan 2015 22:07:11 +0000 (17:07 -0500)

committer benselme <benselme@gmail.com>

Thu, 8 Jan 2015 22:07:11 +0000 (17:07 -0500)
author benselme <benselme@gmail.com>
Thu, 8 Jan 2015 22:07:11 +0000 (17:07 -0500)
committer benselme <benselme@gmail.com>
Thu, 8 Jan 2015 22:07:11 +0000 (17:07 -0500)
diff --git a/babel/plural.py b/babel/plural.py

index 9114757507014ee18bf8fb93e3c2b02ce09b9390..e2eb88b05e690bcaea7731f5ac861b0d06028f91 100644 (file)
--- a/babel/plural.py
+++ b/babel/plural.py
@@ -255,11 +255,56 @@ def cldr_modulo(a, b):
  class RuleError(Exception):
      """Raised if a rule is malformed."""
  
+_RULES = [
+    (None, re.compile(r'\s+(?u)')),
+    ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[nivwft])\b')),
+    ('value', re.compile(r'\d+')),
+    ('symbol', re.compile(r'%|,|!=|=')),
+    ('ellipsis', re.compile(r'\.\.'))
+]
+
+
+def tokenize_rule(s):
+    s = s.split('@')[0]
+    result = []
+    pos = 0
+    end = len(s)
+    while pos < end:
+        for tok, rule in _RULES:
+            match = rule.match(s, pos)
+            if match is not None:
+                pos = match.end()
+                if tok:
+                    result.append((tok, match.group()))
+                break
+        else:
+            raise RuleError('malformed CLDR pluralization rule.  '
+                            'Got unexpected %r' % s[pos])
+    return result[::-1]
+
  
  class _Parser(object):
      """Internal parser.  This class can translate a single rule into an abstract
      tree of tuples. It implements the following grammar::
  
+        condition     = and_condition ('or' and_condition)*
+                        ('@integer' samples)?
+                        ('@decimal' samples)?
+        and_condition = relation ('and' relation)*
+        relation      = is_relation | in_relation | within_relation
+        is_relation   = expr 'is' ('not')? value
+        in_relation   = expr (('not')? 'in' | '=' | '!=') range_list
+        within_relation = expr ('not')? 'within' range_list
+        expr          = operand (('mod' | '%') value)?
+        operand       = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
+        range_list    = (range | value) (',' range_list)*
+        value         = digit+
+        digit         = 0|1|2|3|4|5|6|7|8|9
+        range         = value'..'value
+        samples       = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
+        sampleRange   = decimalValue '~' decimalValue
+        decimalValue  = value ('.' value)?
+
          condition     = and_condition ('or' and_condition)*
          and_condition = relation ('and' relation)*
          relation      = is_relation | in_relation | within_relation | 'n' <EOL>
@@ -283,32 +328,8 @@ class _Parser(object):
      called `ast`.
      """
  
-    _rules = [
-        (None, re.compile(r'\s+(?u)')),
-        ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')),
-        ('value', re.compile(r'\d+')),
-        ('comma', re.compile(r',')),
-        ('ellipsis', re.compile(r'\.\.'))
-    ]
-
      def __init__(self, string):
-        string = string.lower()
-        result = []
-        pos = 0
-        end = len(string)
-        while pos < end:
-            for tok, rule in self._rules:
-                match = rule.match(string, pos)
-                if match is not None:
-                    pos = match.end()
-                    if tok:
-                        result.append((tok, match.group()))
-                    break
-            else:
-                raise RuleError('malformed CLDR pluralization rule.  '
-                                'Got unexpected %r' % string[pos])
-        self.tokens = result[::-1]
-
+        self.tokens = tokenize_rule(string)
          self.ast = self.condition()
          if self.tokens:
              raise RuleError('Expected end of rule, got %r' %
diff --git a/tests/test_plural.py b/tests/test_plural.py

index 7f31fd98f60ef596295b88cdf0789c82ef506d47..ad6da7030b10866575e0c644b014b63c572e056f 100644 (file)
--- a/tests/test_plural.py
+++ b/tests/test_plural.py
@@ -13,6 +13,7 @@
  
  import doctest
  import unittest
+import pytest
  
  from babel import plural
  
@@ -98,3 +99,30 @@ def test_locales_with_no_plural_rules_have_default():
      assert aa_plural(1) == 'other'
      assert aa_plural(2) == 'other'
      assert aa_plural(15) == 'other'
+
+
+WELL_FORMED_TOKEN_TESTS = (
+    ('', []),
+    ('n = 1', [('value', '1'), ('symbol', '='), ('word', 'n'), ]),
+    ('n = 1 @integer 1', [('value', '1'), ('symbol', '='), ('word', 'n'), ]),
+    ('n is 1', [('value', '1'), ('word', 'is'), ('word', 'n'), ]),
+    ('n % 100 = 3..10', [('value', '10'), ('ellipsis', '..'), ('value', '3'),
+                         ('symbol', '='),  ('value', '100'), ('symbol', '%'),
+                         ('word', 'n'), ]),
+)
+
+
+@pytest.mark.parametrize('rule_text,tokens', WELL_FORMED_TOKEN_TESTS)
+def test_tokenize_well_formed(rule_text, tokens):
+    assert plural.tokenize_rule(rule_text) == tokens
+
+
+MALFORMED_TOKEN_TESTS = (
+    ('a = 1'), ('n ! 2'),
+)
+
+
+@pytest.mark.parametrize('rule_text', MALFORMED_TOKEN_TESTS)
+def test_tokenize_malformed(rule_text):
+    with pytest.raises(plural.RuleError):
+        plural.tokenize_rule(rule_text)
author	benselme <benselme@gmail.com>
	Thu, 8 Jan 2015 22:07:11 +0000 (17:07 -0500)
committer	benselme <benselme@gmail.com>
	Thu, 8 Jan 2015 22:07:11 +0000 (17:07 -0500)
babel/plural.py		patch \| blob \| blame \| history
tests/test_plural.py		patch \| blob \| blame \| history