Started work on supporting CLDR 24

author Armin Ronacher <armin.ronacher@active-4.com>

Wed, 20 Nov 2013 19:30:22 +0000 (19:30 +0000)

committer Armin Ronacher <armin.ronacher@active-4.com>

Wed, 20 Nov 2013 19:30:22 +0000 (19:30 +0000)
author Armin Ronacher <armin.ronacher@active-4.com>
Wed, 20 Nov 2013 19:30:22 +0000 (19:30 +0000)
committer Armin Ronacher <armin.ronacher@active-4.com>
Wed, 20 Nov 2013 19:30:22 +0000 (19:30 +0000)
diff --git a/babel/numbers.py b/babel/numbers.py

index 2f7fe16219c46351b4ee2b6d386f253f1e63668d..c2c782ee178f2f340ddc86b6cd47677dc4ee18df 100644 (file)
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -35,7 +35,7 @@ def get_currency_name(currency, count=None, locale=LC_NUMERIC):
  
      >>> get_currency_name('USD', locale='en_US')
      u'US Dollar'
-    
+
      .. versionadded:: 0.9.4
  
      :param currency: the currency code
diff --git a/babel/plural.py b/babel/plural.py

index 144a0dc020123f320f5a5b544b94cd813d8b0073..983f9fba6d0a0e5e4f97864fb8c2640baea7d996 100644 (file)
--- a/babel/plural.py
+++ b/babel/plural.py
@@ -10,6 +10,7 @@
  """
  
  import re
+import decimal
  
  
  _plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
@@ -50,6 +51,9 @@ class PluralRule(object):
          found = set()
          self.abstract = []
          for key, expr in sorted(list(rules)):
+            # Other only contains samples
+            if key == 'other':
+                continue
              if key not in _plural_tags:
                  raise ValueError('unknown tag %r' % key)
              elif key in found:
@@ -102,7 +106,22 @@ class PluralRule(object):
      def __call__(self, n):
          if not hasattr(self, '_func'):
              self._func = to_python(self)
-        return self._func(n)
+        if isinstance(n, float):
+            int_n = int(n)
+            if int_n == n:
+                n = int_n
+            else:
+                n = decimal.Decimal(n)
+        if isinstance(n, decimal.Decimal):
+            str_n = str(n).lstrip('+-')
+            trailing = '.' in str_n and str_n.split('.', 1)[1] or ''
+            v = len(trailing)
+            w = len(trailing.rstrip('0'))
+            f = int(trailing or 0)
+            t = int(trailing.rstrip('0') or 0)
+        else:
+            v = w = f = t = 0
+        return self._func(abs(n), v, w, f, t)
  
  
  def to_javascript(rule):
@@ -153,7 +172,10 @@ def to_python(rule):
          'MOD':      cldr_modulo
      }
      to_python = _PythonCompiler().compile
-    result = ['def evaluate(n):']
+    result = [
+        'def evaluate(n, v=0, w=0, f=0, t=0):',
+        ' i = int(n)',
+    ]
      for tag, ast in PluralRule.parse(rule).abstract:
          # the str() call is to coerce the tag to the native string.  It's
          # a limited ascii restricted set of tags anyways so that is fine.
@@ -259,16 +281,24 @@ class _Parser(object):
      tree of tuples. It implements the following grammar::
  
          condition     = and_condition ('or' and_condition)*
+                        ('@integer' samples)?
+                        ('@decimal' samples)?
          and_condition = relation ('and' relation)*
-        relation      = is_relation | in_relation | within_relation | 'n' <EOL>
+        relation      = is_relation | in_relation | within_relation
          is_relation   = expr 'is' ('not')? value
-        in_relation   = expr ('not')? 'in' range_list
+        in_relation   = expr (('not')? 'in' | '=' | '!=') range_list
          within_relation = expr ('not')? 'within' range_list
-        expr          = 'n' ('mod' value)?
+        expr          = operand (('mod' | '%') value)?
+        operand       = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
          range_list    = (range | value) (',' range_list)*
          value         = digit+
          digit         = 0|1|2|3|4|5|6|7|8|9
          range         = value'..'value
+        samples       = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
+        sampleRange   = decimalValue '~' decimalValue
+        decimalValue  = value ('.' value)?
+
+    (Samples are currently entirely ignored)
  
      - Whitespace can occur between or around any of the above tokens.
      - Rules should be mutually exclusive; for a given numeric value, only one
@@ -283,14 +313,15 @@ class _Parser(object):
  
      _rules = [
          (None, re.compile(r'\s+(?u)')),
-        ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')),
+        ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[nivwft])\b')),
          ('value', re.compile(r'\d+')),
-        ('comma', re.compile(r',')),
+        ('symbol', re.compile(r'%|,|!=|=')),
          ('ellipsis', re.compile(r'\.\.'))
      ]
+    _vars = tuple('nivwft')
  
      def __init__(self, string):
-        string = string.lower()
+        string = string.lower().split('@')[0]
          result = []
          pos = 0
          end = len(string)
@@ -352,30 +383,50 @@ class _Parser(object):
          if self.skip('word', 'within'):
              method = 'within'
          else:
-            self.expect('word', 'in', term="'within' or 'in'")
+            if not self.skip('word', 'in'):
+                if negated:
+                    raise RuleError('Cannot negate operator based rules.')
+                return self.newfangled_relation(left)
          rv = 'relation', (method, left, self.range_list())
          if negated:
              rv = 'not', (rv,)
          return rv
  
+    def newfangled_relation(self, left):
+        if self.skip('symbol', '='):
+            negated = False
+        elif self.skip('symbol', '!='):
+            negated = True
+        else:
+            raise RuleError('Expected "=" or "!=" or legacy relation')
+        rv = 'relation', ('in', left, self.range_list())
+        if negated:
+            rv = 'not', (rv,)
+        return rv
+
      def range_or_value(self):
          left = self.value()
          if self.skip('ellipsis'):
-            return((left, self.value()))
+            return (left, self.value())
          else:
-            return((left, left))
+            return (left, left)
  
      def range_list(self):
          range_list = [self.range_or_value()]
-        while self.skip('comma'):
+        while self.skip('symbol', ','):
              range_list.append(self.range_or_value())
          return 'range_list', range_list
  
      def expr(self):
-        self.expect('word', 'n')
+        word = self.skip('word')
+        if word is None or word[1] not in self._vars:
+            raise RuleError('Expected identifier variable')
+        name = word[1]
          if self.skip('word', 'mod'):
-            return 'mod', (('n', ()), self.value())
-        return 'n', ()
+            return 'mod', ((name, ()), self.value())
+        elif self.skip('symbol', '%'):
+            return 'mod', ((name, ()), self.value())
+        return name, ()
  
      def value(self):
          return 'value', (int(self.expect('value')[1]),)
@@ -401,6 +452,11 @@ class _Compiler(object):
          return getattr(self, 'compile_' + op)(*args)
  
      compile_n = lambda x: 'n'
+    compile_i = lambda x: 'i'
+    compile_v = lambda x: 'v'
+    compile_w = lambda x: 'w'
+    compile_f = lambda x: 'f'
+    compile_t = lambda x: 't'
      compile_value = lambda x, v: str(v)
      compile_and = _binary_compiler('(%s && %s)')
      compile_or = _binary_compiler('(%s || %s)')
@@ -455,18 +511,30 @@ class _GettextCompiler(_Compiler):
  class _JavaScriptCompiler(_GettextCompiler):
      """Compiles the expression to plain of JavaScript."""
  
+    # XXX: presently javascript does not support any of the
+    # fraction support and basically only deals with integers.
+    compile_i = lambda x: 'parseInt(n, 10)'
+    compile_v = lambda x: '0'
+    compile_w = lambda x: '0'
+    compile_f = lambda x: '0'
+    compile_t = lambda x: '0'
+
      def compile_relation(self, method, expr, range_list):
          code = _GettextCompiler.compile_relation(
              self, method, expr, range_list)
          if method == 'in':
              expr = self.compile(expr)
-            code = '(parseInt(%s) == %s && %s)' % (expr, expr, code)
+            code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code)
          return code
  
  
  class _UnicodeCompiler(_Compiler):
      """Returns a unicode pluralization rule again."""
  
+    # XXX: this currently spits out the old syntax instead of the new
+    # one.  We can change that, but it will break a whole bunch of stuff
+    # for users I suppose.
+
      compile_is = _binary_compiler('%s is %s')
      compile_isnot = _binary_compiler('%s is not %s')
      compile_and = _binary_compiler('%s and %s')
diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py

index 9c82fc88ac4aeace85152c4771c896f4c8a048e9..fe01053393814c740a5258ef5403a6665d239fe5 100755 (executable)
--- a/scripts/download_import_cldr.py
+++ b/scripts/download_import_cldr.py
@@ -13,9 +13,9 @@ except ImportError:
      from urllib import urlretrieve
  
  
-URL = 'http://unicode.org/Public/cldr/23.1/core.zip'
-FILENAME = 'core-23.1.zip'
-FILESUM = 'd44ff35f9b9160becbb3a575468d8a5a'
+URL = 'http://unicode.org/Public/cldr/24/core.zip'
+FILENAME = 'core-24.zip'
+FILESUM = 'cd2e8f31baf65c96bfc7e5377b3b793f'
  BLKSIZE = 131072
  
  
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py

index 3a2f1217ce3072e74b405c06849e6dbc93410c97..02a65324ec2ab8eac1304bdcbed198fe034728ed 100755 (executable)
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -186,6 +186,8 @@ def main():
              # pass our parser anyways.
              if '-' in alias.attrib['type']:
                  continue
+            if 'replacement' not in alias.attrib:
+                continue
              language_aliases[alias.attrib['type']] = alias.attrib['replacement']
  
          # Territory aliases
@@ -574,7 +576,8 @@ def main():
              if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                      and elem.attrib.get('type') in currency_formats:
                  continue
-            pattern = text_type(elem.findtext('currencyFormat/pattern'))
+            pattern = text_type(elem.findtext(
+                'currencyFormat[@type="standard"]/pattern'))
              currency_formats[elem.attrib.get('type')] = \
                  numbers.parse_pattern(pattern)
  
diff --git a/tests/test_numbers.py b/tests/test_numbers.py

index 99e0d1bda1dfc296b1060ce3f4dd498cfef95abe..fae6c71d097d3927db42496e624bb925f1dd6b38 100644 (file)
--- a/tests/test_numbers.py
+++ b/tests/test_numbers.py
@@ -175,11 +175,11 @@ class NumberParsingTestCase(unittest.TestCase):
  
  
  def test_get_currency_name():
-    assert numbers.get_currency_name('USD', 'en_US') == u'US dollars'
+    assert numbers.get_currency_name('USD', locale='en_US') == u'US Dollar'
  
  
  def test_get_currency_symbol():
-    assert numbers.get_currency_symbol('USD', 'en_US') == u'$'
+    assert numbers.get_currency_symbol('USD', locale='en_US') == u'$'
  
  
  def test_get_territory_currencies():
author	Armin Ronacher <armin.ronacher@active-4.com>
	Wed, 20 Nov 2013 19:30:22 +0000 (19:30 +0000)
committer	Armin Ronacher <armin.ronacher@active-4.com>
	Wed, 20 Nov 2013 19:30:22 +0000 (19:30 +0000)
babel/numbers.py		patch \| blob \| blame \| history
babel/plural.py		patch \| blob \| blame \| history
scripts/download_import_cldr.py		patch \| blob \| blame \| history
scripts/import_cldr.py		patch \| blob \| blame \| history
tests/test_numbers.py		patch \| blob \| blame \| history