* Reduce size of locale data pickles by only storing the data provided by each local...

author Christopher Lenz <cmlenz@gmail.com>

Sun, 3 Jun 2007 15:27:27 +0000 (15:27 +0000)

committer Christopher Lenz <cmlenz@gmail.com>

Sun, 3 Jun 2007 15:27:27 +0000 (15:27 +0000)
author Christopher Lenz <cmlenz@gmail.com>
Sun, 3 Jun 2007 15:27:27 +0000 (15:27 +0000)
committer Christopher Lenz <cmlenz@gmail.com>
Sun, 3 Jun 2007 15:27:27 +0000 (15:27 +0000)
diff --git a/babel/catalog/pofile.py b/babel/catalog/pofile.py

index f557b04261928c86ef4d78c77777d23d1e694f9e..ba19a4e93adba8ea47e3b8f7f4b42e1433fc101f 100644 (file)
--- a/babel/catalog/pofile.py
+++ b/babel/catalog/pofile.py
@@ -199,7 +199,6 @@ def normalize(string, width=76):
      :param string: the string to normalize
      :param width: the maximum line width; use `None`, 0, or a negative number
                    to completely disable line wrapping
-    :param charset: the encoding to use for `unicode` strings
      :return: the normalized string
      :rtype: `unicode`
      """
diff --git a/babel/core.py b/babel/core.py

index c9c4e63d276d9e05e88b223a1e4c3d368e557004..62642b4e45d1744e0b6e22b2c3e42e963f2e79b5 100644 (file)
--- a/babel/core.py
+++ b/babel/core.py
@@ -13,12 +13,7 @@
  
  """Core locale representation and locale data access gateway."""
  
-import os
-import pickle
-try:
-    import threading
-except ImportError:
-    import dummy_threading as threading
+from babel import localedata
  
  __all__ = ['Locale', 'negotiate', 'parse']
  __docformat__ = 'restructuredtext en'
@@ -47,35 +42,6 @@ class Locale(object):
      
      :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_
      """
-    _cache = {}
-    _cache_lock = threading.Lock()
-
-    def __new__(cls, language, territory=None, variant=None):
-        """Create new locale object, or load it from the cache if it had already
-        been instantiated.
-        
-        >>> l1 = Locale('en')
-        >>> l2 = Locale('en')
-        >>> l1 is l2
-        True
-        
-        :param language: the language code
-        :param territory: the territory (country or region) code
-        :param variant: the variant code
-        :return: new or existing `Locale` instance
-        :rtype: `Locale`
-        """
-        key = (language, territory, variant)
-        cls._cache_lock.acquire()
-        try:
-            self = cls._cache.get(key)
-            if self is None:
-                self = super(Locale, cls).__new__(cls, language, territory,
-                                                  variant)
-                cls._cache[key] = self
-            return self
-        finally:
-            self._cache_lock.release()
  
      def __init__(self, language, territory=None, variant=None):
          """Initialize the locale object from the given identifier components.
@@ -93,7 +59,7 @@ class Locale(object):
          self.language = language
          self.territory = territory
          self.variant = variant
-        self.__data = None
+        self._data = localedata.load(str(self))
  
      def parse(cls, identifier, sep='_'):
          """Create a `Locale` instance for the given locale identifier.
@@ -127,18 +93,6 @@ class Locale(object):
          return '_'.join(filter(None, [self.language, self.territory,
                                        self.variant]))
  
-    def _data(self):
-        if self.__data is None:
-            filename = os.path.join(os.path.dirname(__file__),
-                                    'localedata/%s.dat' % self)
-            fileobj = open(filename, 'rb')
-            try:
-                self.__data = pickle.load(fileobj)
-            finally:
-                fileobj.close()
-        return self.__data
-    _data = property(_data)
-
      def display_name(self):
          retval = self.languages.get(self.language)
          if self.territory:
@@ -209,6 +163,32 @@ class Locale(object):
  
      #{ Number Formatting
  
+    def currencies(self):
+        return self._data['currency_names']
+    currencies = property(currencies, doc="""\
+        Mapping of currency codes to translated currency names.
+        
+        >>> Locale('en').currencies['COP']
+        u'Colombian Peso'
+        >>> Locale('de', 'DE').currencies['COP']
+        u'Kolumbianischer Peso'
+        
+        :type: `dict`
+        """)
+
+    def currency_symbols(self):
+        return self._data['currency_symbols']
+    currency_symbols = property(currency_symbols, doc="""\
+        Mapping of currency codes to symbols.
+        
+        >>> Locale('en').currency_symbols['USD']
+        u'US$'
+        >>> Locale('en', 'US').currency_symbols['USD']
+        u'$'
+        
+        :type: `dict`
+        """)
+
      def number_symbols(self):
          return self._data['number_symbols']
      number_symbols = property(number_symbols, doc="""\
diff --git a/babel/localedata.py b/babel/localedata.py

new file mode 100644 (file)

index 0000000..95e4d58
--- /dev/null
+++ b/babel/localedata.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Low-level locale data access.
+
+:note: The `Locale` class, which uses this module under the hood, provides a
+       more convenient interface for accessing the locale data.
+"""
+
+import copy
+import os
+import pickle
+try:
+    import threading
+except ImportError:
+    import dummy_threading as threading
+
+__all__ = ['load']
+__docformat__ = 'restructuredtext en'
+
+_cache = {}
+_cache_lock = threading.RLock()
+
+def load(name):
+    """Load the locale data for the given locale.
+    
+    The locale data is a dictionary that contains much of the data defined by
+    the Common Locale Data Repository (CLDR). This data is stored as a
+    collection of pickle files inside the ``babel`` package.
+    
+    >>> d = load('en_US')
+    >>> d['languages']['sv']
+    u'Swedish'
+    
+    Note that the results are cached, and subsequent requests for the same
+    locale return the same dictionary:
+    
+    >>> d1 = load('en_US')
+    >>> d2 = load('en_US')
+    >>> d1 is d2
+    True
+    
+    :param name: the locale identifier string (or "root")
+    :return: the locale data
+    :rtype: `dict`
+    :raise `IOError`: if no locale data file is found for the given locale
+                      identifer, or one of the locales it inherits from
+    """
+    _cache_lock.acquire()
+    try:
+        data = _cache.get(name)
+        if not data:
+            # Load inherited data
+            if name == 'root':
+                data = {}
+            else:
+                parts = name.split('_')
+                if len(parts) == 1:
+                    parent = 'root'
+                else:
+                    parent = '_'.join(parts[:-1])
+                data = load(parent).copy()
+            filename = os.path.join(os.path.dirname(__file__),
+                                    'localedata/%s.dat' % name)
+            fileobj = open(filename, 'rb')
+            try:
+                if name != 'root':
+                    merge(data, pickle.load(fileobj))
+                else:
+                    data = pickle.load(fileobj)
+                _cache[name] = data
+            finally:
+                fileobj.close()
+        return data
+    finally:
+        _cache_lock.release()
+
+def merge(dict1, dict2):
+    """Merge the data from `dict2` into the `dict1` dictionary, making copies
+    of nested dictionaries.
+    
+    :param dict1: the dictionary to merge into
+    :param dict2: the dictionary containing the data that should be merged
+    """
+    for key, value in dict2.items():
+        if value:
+            if type(value) is dict:
+                dict1[key] = dict1.get(key, {}).copy()
+                merge(dict1[key], value)
+            else:
+                dict1[key] = value
diff --git a/babel/numbers.py b/babel/numbers.py

index bb5dbefe2a9dfcc58f08eca7d50ab59e89e6a482..1ef6325e2a78f73920de13561cad030975afff28 100644 (file)
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -106,20 +106,20 @@ def format_decimal(number, format=None, locale=LC_NUMERIC):
          pattern = parse_pattern(format)
      return pattern.apply(number, locale)
  
-def format_currency(value, locale=LC_NUMERIC):
+def format_currency(number, locale=LC_NUMERIC):
      """Returns formatted currency value.
      
      >>> format_currency(1099.98, locale='en_US')
      u'1,099.98'
      
-    :param value: the number to format
+    :param number: the number to format
      :param locale: the `Locale` object or locale identifier
      :return: the formatted currency value
      :rtype: `unicode`
      """
-    return format_decimal(value, locale=locale)
+    return format_decimal(number, locale=locale)
  
-def format_percent(value, format=None, locale=LC_NUMERIC):
+def format_percent(number, format=None, locale=LC_NUMERIC):
      """Returns formatted percent value for a specific locale.
      
      >>> format_percent(0.34, locale='en_US')
@@ -139,9 +139,9 @@ def format_percent(value, format=None, locale=LC_NUMERIC):
      pattern = locale.percent_formats.get(format)
      if not pattern:
          pattern = parse_pattern(format)
-    return pattern.apply(value, locale)
+    return pattern.apply(number, locale)
  
-def format_scientific(value, locale=LC_NUMERIC):
+def format_scientific(number, locale=LC_NUMERIC):
      raise NotImplementedError
  
  def parse_number(string, locale=LC_NUMERIC):
diff --git a/babel/tests/__init__.py b/babel/tests/__init__.py

index 13a680bd162a2960a50e6b0bdc40540b50f8a2fa..af3f4e61d1dcc78a12d1c1d4ed6e9387cacf10a6 100644 (file)
--- a/babel/tests/__init__.py
+++ b/babel/tests/__init__.py
@@ -14,11 +14,12 @@
  import unittest
  
  def suite():
-    from babel.tests import core, dates, numbers, util
+    from babel.tests import core, dates, localedata, numbers, util
      from babel.catalog import tests as catalog
      suite = unittest.TestSuite()
      suite.addTest(core.suite())
      suite.addTest(dates.suite())
+    suite.addTest(localedata.suite())
      suite.addTest(numbers.suite())
      suite.addTest(util.suite())
      suite.addTest(catalog.suite())
diff --git a/babel/tests/localedata.py b/babel/tests/localedata.py

new file mode 100644 (file)

index 0000000..cc06908
--- /dev/null
+++ b/babel/tests/localedata.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+import doctest
+import unittest
+
+from babel import localedata
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(doctest.DocTestSuite(localedata))
+    return suite
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py

index e88edfd157e71a84183a456a149445dea921efa3..328a491c596a2f0a9b2ce8917cfb63fcd334da9e 100755 (executable)
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -33,13 +33,6 @@ except NameError:
      def any(iterable):
          return filter(None, list(iterable))
  
-def _parent(locale):
-    parts = locale.split('_')
-    if len(parts) == 1:
-        return 'root'
-    else:
-        return '_'.join(parts[:-1])
-
  def _text(elem):
      buf = [elem.text or '']
      for child in elem:
@@ -63,7 +56,6 @@ def main():
      regions = {}
      for elem in sup.findall('//territoryContainment/group'):
          regions[elem.attrib['type']] = elem.attrib['contains'].split()
-    from pprint import pprint
  
      # Resolve territory containment
      territory_containment = {}
@@ -89,10 +81,8 @@ def main():
          if ext != '.xml':
              continue
  
-        data = {}
-        if stem != 'root':
-            data.update(copy.deepcopy(dicts[_parent(stem)]))
          tree = parse(os.path.join(srcdir, 'main', filename))
+        data = {}
  
          language = None
          elem = tree.find('//identity/language')
@@ -229,7 +219,7 @@ def main():
                      date_formats[elem.attrib.get('type')] = \
                          dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
                  except ValueError, e:
-                    print e
+                    print>>sys.stderr, 'ERROR: %s' % e
  
              time_formats = data.setdefault('time_formats', {})
              for elem in calendar.findall('timeFormats/timeFormatLength'):
@@ -239,7 +229,7 @@ def main():
                      time_formats[elem.attrib.get('type')] = \
                          dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
                  except ValueError, e:
-                    print e
+                    print>>sys.stderr, 'ERROR: %s' % e
  
          # <numbers>
  
@@ -251,32 +241,39 @@ def main():
          for elem in tree.findall('//decimalFormats/decimalFormatLength'):
              if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
                  continue
-            decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern')))
+            pattern = unicode(elem.findtext('decimalFormat/pattern'))
+            decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
  
          scientific_formats = data.setdefault('scientific_formats', {})
          for elem in tree.findall('//scientificFormats/scientificFormatLength'):
              if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
                  continue
+            # FIXME: should use numbers.parse_pattern
              scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
  
          currency_formats = data.setdefault('currency_formats', {})
          for elem in tree.findall('//currencyFormats/currencyFormatLength'):
              if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
                  continue
+            # FIXME: should use numbers.parse_pattern
              currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
  
          percent_formats = data.setdefault('percent_formats', {})
          for elem in tree.findall('//percentFormats/percentFormatLength'):
              if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
                  continue
-            percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('percentFormat/pattern')))
+            pattern = unicode(elem.findtext('percentFormat/pattern'))
+            percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
  
-        currencies = data.setdefault('currencies', {})
+        currency_names = data.setdefault('currency_names', {})
+        currency_symbols = data.setdefault('currency_symbols', {})
          for elem in tree.findall('//currencies/currency'):
-            currencies[elem.attrib['type']] = {
-                'display_name': unicode(elem.findtext('displayName')),
-                'symbol': unicode(elem.findtext('symbol'))
-            }
+            name = elem.findtext('displayName')
+            if name:
+                currency_names[elem.attrib['type']] = unicode(name)
+            symbol = elem.findtext('symbol')
+            if symbol:
+                currency_symbols[elem.attrib['type']] = unicode(symbol)
  
          dicts[stem] = data
          outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
author	Christopher Lenz <cmlenz@gmail.com>
	Sun, 3 Jun 2007 15:27:27 +0000 (15:27 +0000)
committer	Christopher Lenz <cmlenz@gmail.com>
	Sun, 3 Jun 2007 15:27:27 +0000 (15:27 +0000)
babel/catalog/pofile.py		patch \| blob \| blame \| history
babel/core.py		patch \| blob \| blame \| history
babel/localedata.py	[new file with mode: 0644]	patch \| blob
babel/numbers.py		patch \| blob \| blame \| history
babel/tests/__init__.py		patch \| blob \| blame \| history
babel/tests/localedata.py	[new file with mode: 0644]	patch \| blob
scripts/import_cldr.py		patch \| blob \| blame \| history