* Move locale data loading from `babel.core` into a separate `babel.localedata` module.
* Add curency names and symbols to locale data.
:param string: the string to normalize
:param width: the maximum line width; use `None`, 0, or a negative number
to completely disable line wrapping
- :param charset: the encoding to use for `unicode` strings
:return: the normalized string
:rtype: `unicode`
"""
"""Core locale representation and locale data access gateway."""
-import os
-import pickle
-try:
- import threading
-except ImportError:
- import dummy_threading as threading
+from babel import localedata
__all__ = ['Locale', 'negotiate', 'parse']
__docformat__ = 'restructuredtext en'
:see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_
"""
- _cache = {}
- _cache_lock = threading.Lock()
-
- def __new__(cls, language, territory=None, variant=None):
- """Create new locale object, or load it from the cache if it had already
- been instantiated.
-
- >>> l1 = Locale('en')
- >>> l2 = Locale('en')
- >>> l1 is l2
- True
-
- :param language: the language code
- :param territory: the territory (country or region) code
- :param variant: the variant code
- :return: new or existing `Locale` instance
- :rtype: `Locale`
- """
- key = (language, territory, variant)
- cls._cache_lock.acquire()
- try:
- self = cls._cache.get(key)
- if self is None:
- self = super(Locale, cls).__new__(cls, language, territory,
- variant)
- cls._cache[key] = self
- return self
- finally:
- self._cache_lock.release()
def __init__(self, language, territory=None, variant=None):
"""Initialize the locale object from the given identifier components.
self.language = language
self.territory = territory
self.variant = variant
- self.__data = None
+ self._data = localedata.load(str(self))
def parse(cls, identifier, sep='_'):
"""Create a `Locale` instance for the given locale identifier.
return '_'.join(filter(None, [self.language, self.territory,
self.variant]))
- def _data(self):
- if self.__data is None:
- filename = os.path.join(os.path.dirname(__file__),
- 'localedata/%s.dat' % self)
- fileobj = open(filename, 'rb')
- try:
- self.__data = pickle.load(fileobj)
- finally:
- fileobj.close()
- return self.__data
- _data = property(_data)
-
def display_name(self):
retval = self.languages.get(self.language)
if self.territory:
#{ Number Formatting
+ def currencies(self):
+ return self._data['currency_names']
+ currencies = property(currencies, doc="""\
+ Mapping of currency codes to translated currency names.
+
+ >>> Locale('en').currencies['COP']
+ u'Colombian Peso'
+ >>> Locale('de', 'DE').currencies['COP']
+ u'Kolumbianischer Peso'
+
+ :type: `dict`
+ """)
+
+ def currency_symbols(self):
+ return self._data['currency_symbols']
+ currency_symbols = property(currency_symbols, doc="""\
+ Mapping of currency codes to symbols.
+
+ >>> Locale('en').currency_symbols['USD']
+ u'US$'
+ >>> Locale('en', 'US').currency_symbols['USD']
+ u'$'
+
+ :type: `dict`
+ """)
+
def number_symbols(self):
return self._data['number_symbols']
number_symbols = property(number_symbols, doc="""\
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Low-level locale data access.
+
+:note: The `Locale` class, which uses this module under the hood, provides a
+ more convenient interface for accessing the locale data.
+"""
+
+import copy
+import os
+import pickle
+try:
+ import threading
+except ImportError:
+ import dummy_threading as threading
+
+__all__ = ['load']
+__docformat__ = 'restructuredtext en'
+
+_cache = {}
+_cache_lock = threading.RLock()
+
+def load(name):
+ """Load the locale data for the given locale.
+
+ The locale data is a dictionary that contains much of the data defined by
+ the Common Locale Data Repository (CLDR). This data is stored as a
+ collection of pickle files inside the ``babel`` package.
+
+ >>> d = load('en_US')
+ >>> d['languages']['sv']
+ u'Swedish'
+
+ Note that the results are cached, and subsequent requests for the same
+ locale return the same dictionary:
+
+ >>> d1 = load('en_US')
+ >>> d2 = load('en_US')
+ >>> d1 is d2
+ True
+
+ :param name: the locale identifier string (or "root")
+ :return: the locale data
+ :rtype: `dict`
+ :raise `IOError`: if no locale data file is found for the given locale
+ identifer, or one of the locales it inherits from
+ """
+ _cache_lock.acquire()
+ try:
+ data = _cache.get(name)
+ if not data:
+ # Load inherited data
+ if name == 'root':
+ data = {}
+ else:
+ parts = name.split('_')
+ if len(parts) == 1:
+ parent = 'root'
+ else:
+ parent = '_'.join(parts[:-1])
+ data = load(parent).copy()
+ filename = os.path.join(os.path.dirname(__file__),
+ 'localedata/%s.dat' % name)
+ fileobj = open(filename, 'rb')
+ try:
+ if name != 'root':
+ merge(data, pickle.load(fileobj))
+ else:
+ data = pickle.load(fileobj)
+ _cache[name] = data
+ finally:
+ fileobj.close()
+ return data
+ finally:
+ _cache_lock.release()
+
+def merge(dict1, dict2):
+ """Merge the data from `dict2` into the `dict1` dictionary, making copies
+ of nested dictionaries.
+
+ :param dict1: the dictionary to merge into
+ :param dict2: the dictionary containing the data that should be merged
+ """
+ for key, value in dict2.items():
+ if value:
+ if type(value) is dict:
+ dict1[key] = dict1.get(key, {}).copy()
+ merge(dict1[key], value)
+ else:
+ dict1[key] = value
pattern = parse_pattern(format)
return pattern.apply(number, locale)
-def format_currency(value, locale=LC_NUMERIC):
+def format_currency(number, locale=LC_NUMERIC):
"""Returns formatted currency value.
>>> format_currency(1099.98, locale='en_US')
u'1,099.98'
- :param value: the number to format
+ :param number: the number to format
:param locale: the `Locale` object or locale identifier
:return: the formatted currency value
:rtype: `unicode`
"""
- return format_decimal(value, locale=locale)
+ return format_decimal(number, locale=locale)
-def format_percent(value, format=None, locale=LC_NUMERIC):
+def format_percent(number, format=None, locale=LC_NUMERIC):
"""Returns formatted percent value for a specific locale.
>>> format_percent(0.34, locale='en_US')
pattern = locale.percent_formats.get(format)
if not pattern:
pattern = parse_pattern(format)
- return pattern.apply(value, locale)
+ return pattern.apply(number, locale)
-def format_scientific(value, locale=LC_NUMERIC):
+def format_scientific(number, locale=LC_NUMERIC):
raise NotImplementedError
def parse_number(string, locale=LC_NUMERIC):
import unittest
def suite():
- from babel.tests import core, dates, numbers, util
+ from babel.tests import core, dates, localedata, numbers, util
from babel.catalog import tests as catalog
suite = unittest.TestSuite()
suite.addTest(core.suite())
suite.addTest(dates.suite())
+ suite.addTest(localedata.suite())
suite.addTest(numbers.suite())
suite.addTest(util.suite())
suite.addTest(catalog.suite())
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+import doctest
+import unittest
+
+from babel import localedata
+
+def suite():
+ suite = unittest.TestSuite()
+ suite.addTest(doctest.DocTestSuite(localedata))
+ return suite
+
+if __name__ == '__main__':
+ unittest.main(defaultTest='suite')
def any(iterable):
return filter(None, list(iterable))
-def _parent(locale):
- parts = locale.split('_')
- if len(parts) == 1:
- return 'root'
- else:
- return '_'.join(parts[:-1])
-
def _text(elem):
buf = [elem.text or '']
for child in elem:
regions = {}
for elem in sup.findall('//territoryContainment/group'):
regions[elem.attrib['type']] = elem.attrib['contains'].split()
- from pprint import pprint
# Resolve territory containment
territory_containment = {}
if ext != '.xml':
continue
- data = {}
- if stem != 'root':
- data.update(copy.deepcopy(dicts[_parent(stem)]))
tree = parse(os.path.join(srcdir, 'main', filename))
+ data = {}
language = None
elem = tree.find('//identity/language')
date_formats[elem.attrib.get('type')] = \
dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
except ValueError, e:
- print e
+ print>>sys.stderr, 'ERROR: %s' % e
time_formats = data.setdefault('time_formats', {})
for elem in calendar.findall('timeFormats/timeFormatLength'):
time_formats[elem.attrib.get('type')] = \
dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
except ValueError, e:
- print e
+ print>>sys.stderr, 'ERROR: %s' % e
# <numbers>
for elem in tree.findall('//decimalFormats/decimalFormatLength'):
if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
continue
- decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('decimalFormat/pattern')))
+ pattern = unicode(elem.findtext('decimalFormat/pattern'))
+ decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
scientific_formats = data.setdefault('scientific_formats', {})
for elem in tree.findall('//scientificFormats/scientificFormatLength'):
if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
continue
+ # FIXME: should use numbers.parse_pattern
scientific_formats[elem.attrib.get('type')] = unicode(elem.findtext('scientificFormat/pattern'))
currency_formats = data.setdefault('currency_formats', {})
for elem in tree.findall('//currencyFormats/currencyFormatLength'):
if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
continue
+ # FIXME: should use numbers.parse_pattern
currency_formats[elem.attrib.get('type')] = unicode(elem.findtext('currencyFormat/pattern'))
percent_formats = data.setdefault('percent_formats', {})
for elem in tree.findall('//percentFormats/percentFormatLength'):
if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
continue
- percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(unicode(elem.findtext('percentFormat/pattern')))
+ pattern = unicode(elem.findtext('percentFormat/pattern'))
+ percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
- currencies = data.setdefault('currencies', {})
+ currency_names = data.setdefault('currency_names', {})
+ currency_symbols = data.setdefault('currency_symbols', {})
for elem in tree.findall('//currencies/currency'):
- currencies[elem.attrib['type']] = {
- 'display_name': unicode(elem.findtext('displayName')),
- 'symbol': unicode(elem.findtext('symbol'))
- }
+ name = elem.findtext('displayName')
+ if name:
+ currency_names[elem.attrib['type']] = unicode(name)
+ symbol = elem.findtext('symbol')
+ if symbol:
+ currency_symbols[elem.attrib['type']] = unicode(symbol)
dicts[stem] = data
outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')