import threading
except ImportError:
import dummy_threading as threading
+from UserDict import DictMixin
-__all__ = ['exists', 'load']
+__all__ = ['exists', 'list', 'load']
__docformat__ = 'restructuredtext en'
_cache = {}
_cache_lock = threading.RLock()
_dirname = os.path.join(os.path.dirname(__file__), 'localedata')
+
def exists(name):
"""Check whether locale data is available for the given locale.
return True
return os.path.exists(os.path.join(_dirname, '%s.dat' % name))
+
def list():
"""Return a list of all locale identifiers for which locale data is
available.
os.path.splitext(filename) for filename in os.listdir(_dirname)
] if extension == '.dat' and stem != 'root']
-def load(name):
+
+def load(name, merge_inherited=True):
"""Load the locale data for the given locale.
The locale data is a dictionary that contains much of the data defined by
True
:param name: the locale identifier string (or "root")
+ :param merge_inherited: whether the inherited data should be merged into
+ the data of the requested locale
:return: the locale data
:rtype: `dict`
:raise `IOError`: if no locale data file is found for the given locale
data = _cache.get(name)
if not data:
# Load inherited data
- if name == 'root':
+ if name == 'root' or not merge_inherited:
data = {}
else:
parts = name.split('_')
filename = os.path.join(_dirname, '%s.dat' % name)
fileobj = open(filename, 'rb')
try:
- if name != 'root':
+ if name != 'root' and merge_inherited:
merge(data, pickle.load(fileobj))
else:
data = pickle.load(fileobj)
finally:
_cache_lock.release()
+
def merge(dict1, dict2):
"""Merge the data from `dict2` into the `dict1` dictionary, making copies
of nested dictionaries.
+ >>> d = {1: 'foo', 3: 'baz'}
+ >>> merge(d, {1: 'Foo', 2: 'Bar'})
+ >>> d
+ {1: 'Foo', 2: 'Bar', 3: 'baz'}
+
:param dict1: the dictionary to merge into
:param dict2: the dictionary containing the data that should be merged
"""
- for key, value in dict2.items():
- if value is not None:
- if type(value) is dict:
- dict1[key] = dict1.get(key, {}).copy()
- merge(dict1[key], value)
+ for key, val2 in dict2.items():
+ if val2 is not None:
+ val1 = dict1.get(key)
+ if isinstance(val2, dict):
+ if val1 is None:
+ val1 = {}
+ if isinstance(val1, Alias):
+ val1 = (val1, val2)
+ elif isinstance(val1, tuple):
+ alias, others = val1
+ others = others.copy()
+ merge(others, val2)
+ val1 = (alias, others)
+ else:
+ val1 = val1.copy()
+ merge(val1, val2)
else:
- dict1[key] = value
+ val1 = val2
+ dict1[key] = val1
+
+
+class Alias(object):
+ """Representation of an alias in the locale data.
+
+ An alias is a value that refers to some other part of the locale data,
+ as specified by the `keys`.
+ """
+
+ def __init__(self, keys):
+ self.keys = tuple(keys)
+
+ def __repr__(self):
+ return '<%s %r>' % (type(self).__name__, self.keys)
+
+ def resolve(self, data):
+ """Resolve the alias based on the given data.
+
+ This is done recursively, so if one alias resolves to a second alias,
+ that second alias will also be resolved.
+
+ :param data: the locale data
+ :type data: `dict`
+ """
+ base = data
+ for key in self.keys:
+ data = data[key]
+ if isinstance(data, Alias):
+ data = data.resolve(base)
+ return data
+
+
+class LocaleDataDict(DictMixin, dict):
+ """Dictionary wrapper that automatically resolves aliases to the actual
+ values.
+ """
+
+ def __init__(self, data, base=None):
+ dict.__init__(self, data)
+ if base is None:
+ base = self
+ self.base = base
+
+ def __getitem__(self, key):
+ val = dict.__getitem__(self, key)
+ if isinstance(val, Alias): # resolve an alias
+ val = val.resolve(self.base)
+ if isinstance(val, tuple): # Merge a partial dict with an alias
+ alias, others = val
+ val = alias.resolve(self.base).copy()
+ merge(val, others)
+ if isinstance(val, dict): # Return a nested alias-resolving dict
+ val = LocaleDataDict(val, base=self.base)
+ return val
+
+ def copy(self):
+ return LocaleDataDict(dict.copy(self), base=self.base)
from optparse import OptionParser
import os
import pickle
+import re
import sys
try:
from xml.etree.ElementTree import parse
sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
from babel import dates, numbers
+from babel.localedata import Alias
weekdays = {'mon': 0, 'tue': 1, 'wed': 2, 'thu': 3, 'fri': 4, 'sat': 5,
'sun': 6}
def any(iterable):
return filter(None, list(iterable))
+
def _text(elem):
buf = [elem.text or '']
for child in elem:
buf.append(elem.tail or '')
return u''.join(filter(None, buf)).strip()
+
+NAME_RE = re.compile(r"^\w+$")
+TYPE_ATTR_RE = re.compile(r"^\w+\[@type='(.*?)'\]$")
+
+NAME_MAP = {
+ 'dateFormats': 'date_formats',
+ 'dateTimeFormats': 'datetime_formats',
+ 'eraAbbr': 'abbreviated',
+ 'eraNames': 'wide',
+ 'eraNarrow': 'narrow',
+ 'timeFormats': 'time_formats'
+}
+
+def _translate_alias(ctxt, path):
+ parts = path.split('/')
+ keys = ctxt[:]
+ for part in parts:
+ if part == '..':
+ keys.pop()
+ else:
+ match = TYPE_ATTR_RE.match(part)
+ if match:
+ keys.append(match.group(1))
+ else:
+ assert NAME_RE.match(part)
+ keys.append(NAME_MAP.get(part, part))
+ return keys
+
+
def main():
parser = OptionParser(usage='%prog path/to/cldr')
options, args = parser.parse_args()
stem, ext = os.path.splitext(filename)
if ext != '.xml':
continue
+ #if stem != 'root':
+ # break
tree = parse(os.path.join(srcdir, 'main', filename))
data = {}
territories = data.setdefault('territories', {})
for elem in tree.findall('//territories/territory'):
- if 'draft' in elem.attrib and elem.attrib['type'] in territories:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib['type'] in territories:
continue
territories[elem.attrib['type']] = _text(elem)
languages = data.setdefault('languages', {})
for elem in tree.findall('//languages/language'):
- if 'draft' in elem.attrib and elem.attrib['type'] in languages:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib['type'] in languages:
continue
languages[elem.attrib['type']] = _text(elem)
variants = data.setdefault('variants', {})
for elem in tree.findall('//variants/variant'):
- if 'draft' in elem.attrib and elem.attrib['type'] in variants:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib['type'] in variants:
continue
variants[elem.attrib['type']] = _text(elem)
scripts = data.setdefault('scripts', {})
for elem in tree.findall('//scripts/script'):
- if 'draft' in elem.attrib and elem.attrib['type'] in scripts:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib['type'] in scripts:
continue
scripts[elem.attrib['type']] = _text(elem)
zone_formats = data.setdefault('zone_formats', {})
for elem in tree.findall('//timeZoneNames/gmtFormat'):
- if 'draft' not in elem.attrib:
+ if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
break
for elem in tree.findall('//timeZoneNames/regionFormat'):
- if 'draft' not in elem.attrib:
+ if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
break
for elem in tree.findall('//timeZoneNames/fallbackFormat'):
- if 'draft' not in elem.attrib:
+ if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
zone_formats['fallback'] = unicode(elem.text) \
.replace('{0}', '%(0)s').replace('{1}', '%(1)s')
break
months = data.setdefault('months', {})
for ctxt in calendar.findall('months/monthContext'):
- ctxts = months.setdefault(ctxt.attrib['type'], {})
+ ctxt_type = ctxt.attrib['type']
+ ctxts = months.setdefault(ctxt_type, {})
for width in ctxt.findall('monthWidth'):
- widths = ctxts.setdefault(width.attrib['type'], {})
- for elem in width.findall('month'):
- if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
- continue
- widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+ width_type = width.attrib['type']
+ widths = ctxts.setdefault(width_type, {})
+ for elem in width.getiterator():
+ if elem.tag == 'month':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and int(elem.attrib['type']) in widths:
+ continue
+ widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+ elif elem.tag == 'alias':
+ ctxts[width_type] = Alias(
+ _translate_alias(['months', ctxt_type, width_type],
+ elem.attrib['path'])
+ )
days = data.setdefault('days', {})
for ctxt in calendar.findall('days/dayContext'):
- ctxts = days.setdefault(ctxt.attrib['type'], {})
+ ctxt_type = ctxt.attrib['type']
+ ctxts = days.setdefault(ctxt_type, {})
for width in ctxt.findall('dayWidth'):
- widths = ctxts.setdefault(width.attrib['type'], {})
- for elem in width.findall('day'):
- dtype = weekdays[elem.attrib['type']]
- if 'draft' in elem.attrib and dtype in widths:
- continue
- widths[dtype] = unicode(elem.text)
+ width_type = width.attrib['type']
+ widths = ctxts.setdefault(width_type, {})
+ for elem in width.getiterator():
+ if elem.tag == 'day':
+ dtype = weekdays[elem.attrib['type']]
+ if ('draft' in elem.attrib or 'alt' not in elem.attrib) \
+ and dtype in widths:
+ continue
+ widths[dtype] = unicode(elem.text)
+ elif elem.tag == 'alias':
+ ctxts[width_type] = Alias(
+ _translate_alias(['days', ctxt_type, width_type],
+ elem.attrib['path'])
+ )
quarters = data.setdefault('quarters', {})
for ctxt in calendar.findall('quarters/quarterContext'):
+ ctxt_type = ctxt.attrib['type']
ctxts = quarters.setdefault(ctxt.attrib['type'], {})
for width in ctxt.findall('quarterWidth'):
- widths = ctxts.setdefault(width.attrib['type'], {})
- for elem in width.findall('quarter'):
- if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
- continue
- widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+ width_type = width.attrib['type']
+ widths = ctxts.setdefault(width_type, {})
+ for elem in width.getiterator():
+ if elem.tag == 'quarter':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and int(elem.attrib['type']) in widths:
+ continue
+ widths[int(elem.attrib['type'])] = unicode(elem.text)
+ elif elem.tag == 'alias':
+ ctxts[width_type] = Alias(
+ _translate_alias(['quarters', ctxt_type, width_type],
+ elem.attrib['path'])
+ )
eras = data.setdefault('eras', {})
for width in calendar.findall('eras/*'):
- ewidth = {
- 'eraAbbr': 'abbreviated',
- 'eraNames': 'wide',
- 'eraNarrow': 'narrow',
- }[width.tag]
- widths = eras.setdefault(ewidth, {})
- for elem in width.findall('era'):
- if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
- continue
- widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+ width_type = NAME_MAP[width.tag]
+ widths = eras.setdefault(width_type, {})
+ for elem in width.getiterator():
+ if elem.tag == 'era':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and int(elem.attrib['type']) in widths:
+ continue
+ widths[int(elem.attrib.get('type'))] = unicode(elem.text)
+ elif elem.tag == 'alias':
+ eras[width_type] = Alias(
+ _translate_alias(['eras', width_type],
+ elem.attrib['path'])
+ )
# AM/PM
periods = data.setdefault('periods', {})
for elem in calendar.findall('am'):
- if 'draft' in elem.attrib and elem.tag in periods:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.tag in periods:
continue
periods[elem.tag] = unicode(elem.text)
for elem in calendar.findall('pm'):
- if 'draft' in elem.attrib and elem.tag in periods:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.tag in periods:
continue
periods[elem.tag] = unicode(elem.text)
date_formats = data.setdefault('date_formats', {})
- for elem in calendar.findall('dateFormats/dateFormatLength'):
- if 'draft' in elem.attrib and elem.attrib.get('type') in date_formats:
- continue
- try:
- date_formats[elem.attrib.get('type')] = \
- dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
- except ValueError, e:
- print>>sys.stderr, 'ERROR: %s' % e
+ for format in calendar.findall('dateFormats'):
+ for elem in format.getiterator():
+ if elem.tag == 'dateFormatLength':
+ if 'draft' in elem.attrib and \
+ elem.attrib.get('type') in date_formats:
+ continue
+ try:
+ date_formats[elem.attrib.get('type')] = \
+ dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
+ except ValueError, e:
+ print>>sys.stderr, 'ERROR: %s' % e
+ elif elem.tag == 'alias':
+ date_formats = Alias(_translate_alias(
+ ['date_formats'], elem.attrib['path'])
+ )
time_formats = data.setdefault('time_formats', {})
- for elem in calendar.findall('timeFormats/timeFormatLength'):
- if 'draft' in elem.attrib and elem.attrib.get('type') in time_formats:
- continue
- try:
- time_formats[elem.attrib.get('type')] = \
- dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
- except ValueError, e:
- print>>sys.stderr, 'ERROR: %s' % e
+ for format in calendar.findall('timeFormats'):
+ for elem in format.getiterator():
+ if elem.tag == 'timeFormatLength':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in time_formats:
+ continue
+ try:
+ time_formats[elem.attrib.get('type')] = \
+ dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
+ except ValueError, e:
+ print>>sys.stderr, 'ERROR: %s' % e
+ elif elem.tag == 'alias':
+ time_formats = Alias(_translate_alias(
+ ['time_formats'], elem.attrib['path'])
+ )
datetime_formats = data.setdefault('datetime_formats', {})
- for elem in calendar.findall('dateTimeFormats/dateTimeFormatLength'):
- if 'draft' in elem.attrib and elem.attrib.get('type') in datetime_formats:
- continue
- try:
- datetime_formats[elem.attrib.get('type')] = \
- unicode(elem.findtext('dateTimeFormat/pattern'))
- except ValueError, e:
- print>>sys.stderr, 'ERROR: %s' % e
+ for format in calendar.findall('dateTimeFormats'):
+ for elem in format.getiterator():
+ if elem.tag == 'dateTimeFormatLength':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in datetime_formats:
+ continue
+ try:
+ datetime_formats[elem.attrib.get('type')] = \
+ unicode(elem.findtext('dateTimeFormat/pattern'))
+ except ValueError, e:
+ print>>sys.stderr, 'ERROR: %s' % e
+ elif elem.tag == 'alias':
+ datetime_formats = Alias(_translate_alias(
+ ['datetime_formats'], elem.attrib['path'])
+ )
# <numbers>
decimal_formats = data.setdefault('decimal_formats', {})
for elem in tree.findall('//decimalFormats/decimalFormatLength'):
- if 'draft' in elem.attrib and elem.attrib.get('type') in decimal_formats:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in decimal_formats:
continue
pattern = unicode(elem.findtext('decimalFormat/pattern'))
decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
scientific_formats = data.setdefault('scientific_formats', {})
for elem in tree.findall('//scientificFormats/scientificFormatLength'):
- if 'draft' in elem.attrib and elem.attrib.get('type') in scientific_formats:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in scientific_formats:
continue
pattern = unicode(elem.findtext('scientificFormat/pattern'))
scientific_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
currency_formats = data.setdefault('currency_formats', {})
for elem in tree.findall('//currencyFormats/currencyFormatLength'):
- if 'draft' in elem.attrib and elem.attrib.get('type') in currency_formats:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in currency_formats:
continue
pattern = unicode(elem.findtext('currencyFormat/pattern'))
currency_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
percent_formats = data.setdefault('percent_formats', {})
for elem in tree.findall('//percentFormats/percentFormatLength'):
- if 'draft' in elem.attrib and elem.attrib.get('type') in percent_formats:
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in percent_formats:
continue
pattern = unicode(elem.findtext('percentFormat/pattern'))
percent_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
finally:
outfile.close()
+
if __name__ == '__main__':
main()