From: benselme Date: Thu, 8 Jan 2015 22:07:11 +0000 (-0500) Subject: Extract plural rule tokenization function and add tests X-Git-Tag: dev-2a51c9b95d06~51^2~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dd64686bdabddd155d77146e7af25d6a88f2d189;p=thirdparty%2Fbabel.git Extract plural rule tokenization function and add tests --- diff --git a/babel/plural.py b/babel/plural.py index 91147575..e2eb88b0 100644 --- a/babel/plural.py +++ b/babel/plural.py @@ -255,11 +255,56 @@ def cldr_modulo(a, b): class RuleError(Exception): """Raised if a rule is malformed.""" +_RULES = [ + (None, re.compile(r'\s+(?u)')), + ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[nivwft])\b')), + ('value', re.compile(r'\d+')), + ('symbol', re.compile(r'%|,|!=|=')), + ('ellipsis', re.compile(r'\.\.')) +] + + +def tokenize_rule(s): + s = s.split('@')[0] + result = [] + pos = 0 + end = len(s) + while pos < end: + for tok, rule in _RULES: + match = rule.match(s, pos) + if match is not None: + pos = match.end() + if tok: + result.append((tok, match.group())) + break + else: + raise RuleError('malformed CLDR pluralization rule. ' + 'Got unexpected %r' % s[pos]) + return result[::-1] + class _Parser(object): """Internal parser. This class can translate a single rule into an abstract tree of tuples. It implements the following grammar:: + condition = and_condition ('or' and_condition)* + ('@integer' samples)? + ('@decimal' samples)? + and_condition = relation ('and' relation)* + relation = is_relation | in_relation | within_relation + is_relation = expr 'is' ('not')? value + in_relation = expr (('not')? 'in' | '=' | '!=') range_list + within_relation = expr ('not')? 'within' range_list + expr = operand (('mod' | '%') value)? + operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w' + range_list = (range | value) (',' range_list)* + value = digit+ + digit = 0|1|2|3|4|5|6|7|8|9 + range = value'..'value + samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))? + sampleRange = decimalValue '~' decimalValue + decimalValue = value ('.' value)? + condition = and_condition ('or' and_condition)* and_condition = relation ('and' relation)* relation = is_relation | in_relation | within_relation | 'n' @@ -283,32 +328,8 @@ class _Parser(object): called `ast`. """ - _rules = [ - (None, re.compile(r'\s+(?u)')), - ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')), - ('value', re.compile(r'\d+')), - ('comma', re.compile(r',')), - ('ellipsis', re.compile(r'\.\.')) - ] - def __init__(self, string): - string = string.lower() - result = [] - pos = 0 - end = len(string) - while pos < end: - for tok, rule in self._rules: - match = rule.match(string, pos) - if match is not None: - pos = match.end() - if tok: - result.append((tok, match.group())) - break - else: - raise RuleError('malformed CLDR pluralization rule. ' - 'Got unexpected %r' % string[pos]) - self.tokens = result[::-1] - + self.tokens = tokenize_rule(string) self.ast = self.condition() if self.tokens: raise RuleError('Expected end of rule, got %r' % diff --git a/tests/test_plural.py b/tests/test_plural.py index 7f31fd98..ad6da703 100644 --- a/tests/test_plural.py +++ b/tests/test_plural.py @@ -13,6 +13,7 @@ import doctest import unittest +import pytest from babel import plural @@ -98,3 +99,30 @@ def test_locales_with_no_plural_rules_have_default(): assert aa_plural(1) == 'other' assert aa_plural(2) == 'other' assert aa_plural(15) == 'other' + + +WELL_FORMED_TOKEN_TESTS = ( + ('', []), + ('n = 1', [('value', '1'), ('symbol', '='), ('word', 'n'), ]), + ('n = 1 @integer 1', [('value', '1'), ('symbol', '='), ('word', 'n'), ]), + ('n is 1', [('value', '1'), ('word', 'is'), ('word', 'n'), ]), + ('n % 100 = 3..10', [('value', '10'), ('ellipsis', '..'), ('value', '3'), + ('symbol', '='), ('value', '100'), ('symbol', '%'), + ('word', 'n'), ]), +) + + +@pytest.mark.parametrize('rule_text,tokens', WELL_FORMED_TOKEN_TESTS) +def test_tokenize_well_formed(rule_text, tokens): + assert plural.tokenize_rule(rule_text) == tokens + + +MALFORMED_TOKEN_TESTS = ( + ('a = 1'), ('n ! 2'), +) + + +@pytest.mark.parametrize('rule_text', MALFORMED_TOKEN_TESTS) +def test_tokenize_malformed(rule_text): + with pytest.raises(plural.RuleError): + plural.tokenize_rule(rule_text)