return Locale.parse(identifier, sep=sep)
@classmethod
- def parse(cls, identifier, sep='_'):
+ def parse(cls, identifier, sep='_', resolve_likely_subtags=True):
"""Create a `Locale` instance for the given locale identifier.
>>> l = Locale.parse('de-DE', sep='-')
>>> Locale.parse(l)
Locale('de', territory='DE')
+ This also can perform resolving of likely subtags which it does
+ by default.
+
:param identifier: the locale identifier string
:param sep: optional component separator
+ :param resolve_likely_subtags: if this is specified then a locale will
+ have its likely subtag resolved if the
+ locale otherwise does not exist. For
+ instance ``zh_TW`` by itself is not a
+ locale that exists but Babel can
+ automatically expand it to the full
+ form of ``zh_hant_TW``. Note that this
+ expansion is only taking place if no
+ locale exists otherwise. For instance
+ there is a locale ``en`` that can exist
+ by itself.
:return: a corresponding `Locale` instance
:rtype: `Locale`
:raise `ValueError`: if the string does not appear to be a valid locale
requested locale
:see: `parse_locale`
"""
- if isinstance(identifier, string_types):
- return cls(*parse_locale(identifier, sep=sep))
- return identifier
+ if identifier is None:
+ return None
+ elif isinstance(identifier, Locale):
+ return identifier
+ elif not isinstance(identifier, string_types):
+ raise TypeError('Unxpected value for identifier: %r' % (identifier,))
+
+ parts = parse_locale(identifier, sep=sep)
+
+ def _make_id(language, territory, script, variant):
+ return '_'.join(filter(None, [language, script,
+ territory, variant]))
+
+ input_id = _make_id(*parts)
+
+ def _try_load(parts):
+ try:
+ return cls(*parts)
+ except UnknownLocaleError:
+ return None
+
+ locale = _try_load(parts)
+ if locale is not None:
+ return locale
+ if not resolve_likely_subtags:
+ raise UnknownLocaleError(input_id)
+
+ # From here onwards is some very bad likely subtag resolving. This
+ # whole logic is not entirely correct but good enough (tm) for the
+ # time being. This has been added so that zh_TW does not cause
+ # errors for people when they upgrade. Later we should properly
+ # implement ICU like fuzzy locale objects and provide a way to
+ # maximize and minimize locale tags.
+
+ language, territory, script, variant = parts
+ language = get_global('language_aliases').get(language, language)
+ territory = get_global('territory_aliases').get(territory, territory)
+ script = get_global('script_aliases').get(script, script)
+ variant = get_global('variant_aliases').get(variant, variant)
+
+ if territory == 'ZZ':
+ territory = None
+ if script == 'Zzzz':
+ script = None
+
+ parts = language, territory, script, variant
+
+ new_id = _make_id(*parts)
+ likely_subtag = get_global('likely_subtags').get(new_id)
+ if likely_subtag is None:
+ raise UnknownLocaleError(input_id)
+
+ parts2 = parse_locale(likely_subtag)
+
+ # Success on first hit, return it.
+ locale = _try_load(parts2)
+ if locale is not None:
+ return locale
+
+ # Now try without script and variant
+ lcoale = _try_load(parts2[:2])
+ if locale is not None:
+ return locale
+
+ # Give up.
+ raise UnknownLocaleError(input_id)
def __eq__(self, other):
for key in ('language', 'territory', 'script', 'variant'):
bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
sup_windows_zones = parse(os.path.join(srcdir, 'supplemental',
'windowsZones.xml'))
+ sup_metadata = parse(os.path.join(srcdir, 'supplemental',
+ 'supplementalMetadata.xml'))
+ sup_likely = parse(os.path.join(srcdir, 'supplemental',
+ 'likelySubtags.xml'))
sup = parse(sup_filename)
# Import global data from the supplemental files
zone_aliases = global_data.setdefault('zone_aliases', {})
zone_territories = global_data.setdefault('zone_territories', {})
win_mapping = global_data.setdefault('windows_zone_mapping', {})
-
- # create auxiliary zone->territory map from the windows zones (we don't set
- # the 'zones_territories' map directly here, because there are some zones
- # aliases listed and we defer the decision of which ones to choose to the
- # 'bcp47' data
+ language_aliases = global_data.setdefault('language_aliases', {})
+ territory_aliases = global_data.setdefault('territory_aliases', {})
+ script_aliases = global_data.setdefault('script_aliases', {})
+ variant_aliases = global_data.setdefault('variant_aliases', {})
+ likely_subtags = global_data.setdefault('likely_subtags', {})
+
+ # create auxiliary zone->territory map from the windows zones (we don't set
+ # the 'zones_territories' map directly here, because there are some zones
+ # aliases listed and we defer the decision of which ones to choose to the
+ # 'bcp47' data
_zone_territory_map = {}
for map_zone in sup_windows_zones.findall('.//windowsZones/mapTimezones/mapZone'):
if map_zone.attrib.get('territory') == '001':
if 'to' not in child.attrib: # FIXME: support old mappings
meta_zones[elem.attrib['type']] = child.attrib['mzone']
+ # Language aliases
+ for alias in sup_metadata.findall('.//alias/languageAlias'):
+ # We don't have a use for those at the moment. They don't
+ # pass our parser anyways.
+ if '-' in alias.attrib['type']:
+ continue
+ language_aliases[alias.attrib['type']] = alias.attrib['replacement']
+
+ # Territory aliases
+ for alias in sup_metadata.findall('.//alias/territoryAlias'):
+ territory_aliases[alias.attrib['type']] = alias.attrib['replacement'].split()
+
+ # Script aliases
+ for alias in sup_metadata.findall('.//alias/scriptAlias'):
+ script_aliases[alias.attrib['type']] = alias.attrib['replacement']
+
+ # Variant aliases
+ for alias in sup_metadata.findall('.//alias/variantAlias'):
+ repl = alias.attrib.get('replacement')
+ if repl:
+ variant_aliases[alias.attrib['type']] = repl
+
+ # Likely subtags
+ for likely_subtag in sup_likely.findall('.//likelySubtags/likelySubtag'):
+ likely_subtags[likely_subtag.attrib['from']] = likely_subtag.attrib['to']
+
outfile = open(global_path, 'wb')
try:
pickle.dump(global_data, outfile, 2)