Made babel work with the latest version of the CLDR (23)

author Armin Ronacher <armin.ronacher@active-4.com>

Thu, 4 Jul 2013 16:19:51 +0000 (18:19 +0200)

committer Armin Ronacher <armin.ronacher@active-4.com>

Thu, 4 Jul 2013 16:19:51 +0000 (18:19 +0200)
author Armin Ronacher <armin.ronacher@active-4.com>
Thu, 4 Jul 2013 16:19:51 +0000 (18:19 +0200)
committer Armin Ronacher <armin.ronacher@active-4.com>
Thu, 4 Jul 2013 16:19:51 +0000 (18:19 +0200)
diff --git a/.gitignore b/.gitignore

index 7f71705bd5aaeb4e49316bb624673a2b7dc4d38d..612f6bc801b25e7753e9076196098eab694df573 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
  *.so
-doc/_build
+docs/_build
  *.pyc
  *.pyo
  *.egg-info
diff --git a/babel/global.dat b/babel/global.dat

index 4eb6099013c1596ef7015be21d47fbe725a7f33e..8ddf2b9446822402bab837d264e3b10040e1b977 100644 (file)

Binary files a/babel/global.dat and b/babel/global.dat differ
diff --git a/babel/plural.py b/babel/plural.py

index 6ac4d051a43cc9863a80009483f1dfd0d6521856..378d81fd285b8a1ccc52d24c52617dd7506f781d 100644 (file)
--- a/babel/plural.py
+++ b/babel/plural.py
@@ -148,14 +148,19 @@ def to_python(rule):
      'one'
      >>> func(3)
      'few'
+    >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
+    >>> func(11)
+    'one'
+    >>> func(15)
+    'few'
  
      :param rule: the rules as list or dict, or a `PluralRule` object
      :return: a corresponding Python function
      :raise RuleError: if the expression is malformed
      """
      namespace = {
-        'IN':       in_range,
-        'WITHIN':   within_range,
+        'IN':       in_range_list,
+        'WITHIN':   within_range_list,
          'MOD':      cldr_modulo
      }
      to_python = _PythonCompiler().compile
@@ -191,36 +196,44 @@ def to_gettext(rule):
      return ''.join(result)
  
  
-def in_range(num, min, max):
-    """Integer range test.  This is the callback for the "in" operator
+def in_range_list(num, range_list):
+    """Integer range list test.  This is the callback for the "in" operator
      of the UTS #35 pluralization rule language:
  
-    >>> in_range(1, 1, 3)
+    >>> in_range_list(1, [(1, 3)])
+    True
+    >>> in_range_list(3, [(1, 3)])
      True
-    >>> in_range(3, 1, 3)
+    >>> in_range_list(3, [(1, 3), (5, 8)])
      True
-    >>> in_range(1.2, 1, 4)
+    >>> in_range_list(1.2, [(1, 4)])
      False
-    >>> in_range(10, 1, 4)
+    >>> in_range_list(10, [(1, 4)])
+    False
+    >>> in_range_list(10, [(1, 4), (6, 8)])
      False
      """
-    return num == int(num) and within_range(num, min, max)
+    return num == int(num) and within_range_list(num, range_list)
  
  
-def within_range(num, min, max):
+def within_range_list(num, range_list):
      """Float range test.  This is the callback for the "within" operator
      of the UTS #35 pluralization rule language:
  
-    >>> within_range(1, 1, 3)
+    >>> within_range_list(1, [(1, 3)])
+    True
+    >>> within_range_list(1.0, [(1, 3)])
      True
-    >>> within_range(1.0, 1, 3)
+    >>> within_range_list(1.2, [(1, 4)])
      True
-    >>> within_range(1.2, 1, 4)
+    >>> within_range_list(8.8, [(1, 4), (7, 15)])
      True
-    >>> within_range(10, 1, 4)
+    >>> within_range_list(10, [(1, 4)])
+    False
+    >>> within_range_list(10.5, [(1, 4), (20, 30)])
      False
      """
-    return num >= min and num <= max
+    return any(num >= min_ and num <= max_ for min_, max_ in range_list)
  
  
  def cldr_modulo(a, b):
@@ -254,21 +267,24 @@ class _Parser(object):
      """Internal parser.  This class can translate a single rule into an abstract
      tree of tuples. It implements the following grammar::
  
-        condition   = and_condition ('or' and_condition)*
+        condition     = and_condition ('or' and_condition)*
          and_condition = relation ('and' relation)*
-        relation    = is_relation | in_relation | within_relation | 'n' <EOL>
-        is_relation = expr 'is' ('not')? value
-        in_relation = expr ('not')? 'in' range
-        within_relation = expr ('not')? 'within' range
-        expr        = 'n' ('mod' value)?
-        value       = digit+
-        digit       = 0|1|2|3|4|5|6|7|8|9
-        range       = value'..'value
+        relation      = is_relation | in_relation | within_relation | 'n' <EOL>
+        is_relation   = expr 'is' ('not')? value
+        in_relation   = expr ('not')? 'in' range_list
+        within_relation = expr ('not')? 'within' range_list
+        expr          = 'n' ('mod' value)?
+        range_list    = (range | value) (',' range_list)*
+        value         = digit+
+        digit         = 0|1|2|3|4|5|6|7|8|9
+        range         = value'..'value
  
      - Whitespace can occur between or around any of the above tokens.
      - Rules should be mutually exclusive; for a given numeric value, only one
        rule should apply (i.e. the condition should only be true for one of
-      the plural rule elements.
+      the plural rule elements).
+    - The in and within relations can take comma-separated lists, such as:
+      'n in 3,5,7..15'.
  
      The translator parses the expression on instanciation into an attribute
      called `ast`.
@@ -278,6 +294,7 @@ class _Parser(object):
          (None, re.compile(r'\s+(?u)')),
          ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|n)\b')),
          ('value', re.compile(r'\d+')),
+        ('comma', re.compile(r',')),
          ('ellipsis', re.compile(r'\.\.'))
      ]
  
@@ -345,15 +362,23 @@ class _Parser(object):
              method = 'within'
          else:
              self.expect('word', 'in', term="'within' or 'in'")
-        rv = 'relation', (method, left, self.range())
+        rv = 'relation', (method, left, self.range_list())
          if negated:
              rv = 'not', (rv,)
          return rv
  
-    def range(self):
+    def range_or_value(self):
          left = self.value()
-        self.expect('ellipsis')
-        return 'range', (left, self.value())
+        if self.skip('ellipsis'):
+            return((left, self.value()))
+        else:
+            return((left, left))
+
+    def range_list(self):
+        range_list = [self.range_or_value()]
+        while self.skip('comma'):
+            range_list.append(self.range_or_value())
+        return 'range_list', range_list
  
      def expr(self):
          self.expect('word', 'n')
@@ -392,9 +417,12 @@ class _Compiler(object):
      compile_is = _binary_compiler('(%s == %s)')
      compile_isnot = _binary_compiler('(%s != %s)')
  
-    def compile_relation(self, method, expr, range):
-        range = '%s, %s' % tuple(map(self.compile, range[1]))
-        return '%s(%s, %s)' % (method.upper(), self.compile(expr), range)
+    def compile_relation(self, method, expr, range_list):
+        compile_range_list = '[%s]' % ','.join(
+            ['(%s, %s)' % tuple(map(self.compile, range_))
+             for range_ in range_list[1]])
+        return '%s(%s, %s)' % (method.upper(), self.compile(expr),
+                               compile_range_list)
  
  
  class _PythonCompiler(_Compiler):
diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py

index 27e37ac94a5eda8d750419653ba9cc8e703b16d6..da4c1efb6bcf98b39b91c006a1ee7beb9d09047e 100755 (executable)
--- a/scripts/download_import_cldr.py
+++ b/scripts/download_import_cldr.py
@@ -9,9 +9,9 @@ import urllib
  import subprocess
  
  
-URL = 'http://unicode.org/Public/cldr/1.9.1/core.zip'
-FILENAME = 'core-1.9.1.zip'
-FILESUM = '1c506cd7a30bf5b4f3cbb8a5b382d96c'
+URL = 'http://unicode.org/Public/cldr/23/core.zip'
+FILENAME = 'core-23.zip'
+FILESUM = '800373f275df21bb2c569ddee2d05dd5'
  BLKSIZE = 131072
  
  
@@ -72,7 +72,7 @@ def main():
          print
      common_path = os.path.join(cldr_path, 'common')
  
-    if changed:
+    if changed or not os.path.isdir(common_path):
          if os.path.isdir(common_path):
              log('Deleting old CLDR checkout in \'%s\'', cldr_path)
              shutil.rmtree(common_path)
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py

index b5504942552bafc4d5f9b30048f61581d68d1fae..67f82652a79376892f489a58e83feb8935e4599a 100755 (executable)
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -103,6 +103,9 @@ def main():
                             '..', 'babel')
  
      sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml')
+    bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
+    sup_windows_zones = parse(os.path.join(srcdir, 'supplemental',
+                                           'windowsZones.xml'))
      sup = parse(sup_filename)
  
      # Import global data from the supplemental files
@@ -112,13 +115,27 @@ def main():
          territory_zones = global_data.setdefault('territory_zones', {})
          zone_aliases = global_data.setdefault('zone_aliases', {})
          zone_territories = global_data.setdefault('zone_territories', {})
-        for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
-            tzid = elem.attrib['type']
-            territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
-            zone_territories[tzid] = elem.attrib['territory']
-            if 'aliases' in elem.attrib:
-                for alias in elem.attrib['aliases'].split():
-                    zone_aliases[alias] = tzid
+
+         # create auxiliary zone->territory map from the windows zones (we don't set
+         # the 'zones_territories' map directly here, because there are some zones
+         # aliases listed and we defer the decision of which ones to choose to the
+         # 'bcp47' data
+        _zone_territory_map = {}
+        for map_zone in sup_windows_zones.findall('.//windowsZones/mapTimezones/mapZone'):
+            for tzid in map_zone.attrib['type'].split():
+                _zone_territory_map[tzid] = map_zone.attrib['territory']
+
+        for key_elem in bcp47_timezone.findall('.//keyword/key'):
+            if key_elem.attrib['name'] == 'tz':
+                for elem in key_elem.findall('type'):
+                    aliases = elem.attrib['alias'].split()
+                    tzid = aliases.pop(0)
+                    territory = _zone_territory_map.get(tzid, '001')
+                    territory_zones.setdefault(territory, []).append(tzid)
+                    zone_territories[tzid] = territory
+                    for alias in aliases:
+                        zone_aliases[alias] = tzid
+                break
  
          # Import Metazone mapping
          meta_zones = global_data.setdefault('meta_zones', {})
@@ -273,6 +290,11 @@ def main():
                  zone_formats['fallback'] = unicode(elem.text) \
                      .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                  break
+        for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'):
+            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
+                zone_formats['fallback_region'] = unicode(elem.text) \
+                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
+                break
  
          time_zones = data.setdefault('time_zones', {})
          for elem in tree.findall('.//timeZoneNames/zone'):
@@ -380,16 +402,13 @@ def main():
  
              # AM/PM
              periods = data.setdefault('periods', {})
-            for elem in calendar.findall('am'):
-                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-                        and elem.tag in periods:
-                    continue
-                periods[elem.tag] = unicode(elem.text)
-            for elem in calendar.findall('pm'):
-                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-                        and elem.tag in periods:
-                    continue
-                periods[elem.tag] = unicode(elem.text)
+            for day_period_width in calendar.findall(
+                'dayPeriods/dayPeriodContext/dayPeriodWidth'):
+                if day_period_width.attrib['type'] == 'wide':
+                    for day_period in day_period_width.findall('dayPeriod'):
+                        if 'alt' not in day_period.attrib:
+                            periods[day_period.attrib['type']] = unicode(
+                                day_period.text)
  
              date_formats = data.setdefault('date_formats', {})
              for format in calendar.findall('dateFormats'):
@@ -455,7 +474,9 @@ def main():
              if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                      and elem.attrib.get('type') in decimal_formats:
                  continue
-            pattern = unicode(elem.findtext('decimalFormat/pattern'))
+            pattern = unicode(elem.findtext('./decimalFormat/pattern'))
+            if pattern == 'None':
+                continue
              decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
  
          scientific_formats = data.setdefault('scientific_formats', {})
author	Armin Ronacher <armin.ronacher@active-4.com>
	Thu, 4 Jul 2013 16:19:51 +0000 (18:19 +0200)
committer	Armin Ronacher <armin.ronacher@active-4.com>
	Thu, 4 Jul 2013 16:19:51 +0000 (18:19 +0200)
.gitignore		patch \| blob \| blame \| history
babel/global.dat		patch \| blob \| blame \| history
babel/plural.py		patch \| blob \| blame \| history
scripts/download_import_cldr.py		patch \| blob \| blame \| history
scripts/import_cldr.py		patch \| blob \| blame \| history