From: Bruno Haible Date: Thu, 10 Dec 2020 16:01:43 +0000 (+0100) Subject: intl: Support the AIX 7 locale names. X-Git-Tag: v0.21.1~34 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c891359466d0df034923021d45fd3b6a7f7af340;p=thirdparty%2Fgettext.git intl: Support the AIX 7 locale names. * gettext-runtime/intl/explodename.c (_nl_find_language): Remove function. (_nl_explode_name): Inline it here. On AIX, lowercase the language and map a script identifier to a modifier. * NEWS: Mention it. --- diff --git a/NEWS b/NEWS index f2764d4ee..cdbb16746 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,11 @@ Version 0.21.1 - October 2022 +* Runtime behaviour: + - On AIX, locale names with a script or with an uppercase language are now + supported. + For example, sr_Cyrl_RS.UTF-8 is treated like sr_RS.UTF-8@cyrillic, and + EN_US.UTF-8 is treated like en_US.UTF-8. + * The base Unicode standard is now updated to 14.0.0. * Portability: diff --git a/gettext-runtime/intl/explodename.c b/gettext-runtime/intl/explodename.c index 57c3e4f2d..661266ea3 100644 --- a/gettext-runtime/intl/explodename.c +++ b/gettext-runtime/intl/explodename.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995-2016 Free Software Foundation, Inc. +/* Copyright (C) 1995-2016, 2020 Free Software Foundation, Inc. Contributed by Ulrich Drepper , 1995. This program is free software: you can redistribute it and/or modify @@ -35,20 +35,6 @@ /* @@ end of prolog @@ */ -/* Split a locale name NAME into a leading language part and all the - rest. Return a pointer to the first character after the language, - i.e. to the first byte of the rest. */ -static char *_nl_find_language (const char *name); - -static char * -_nl_find_language (const char *name) -{ - while (name[0] != '\0' && name[0] != '_' && name[0] != '@' && name[0] != '.') - ++name; - - return (char *) name; -} - int _nl_explode_name (char *name, @@ -64,23 +50,84 @@ _nl_explode_name (char *name, *codeset = NULL; *normalized_codeset = NULL; - /* Now we determine the single parts of the locale name. First - look for the language. Termination symbols are `_', '.', and `@'. */ + /* Determine the individual parts of the locale name. + Accept the XPG syntax + + language[_territory][.codeset][@modifier] + + On AIX systems, also accept the same syntax with an uppercased language, + and a syntax similar to RFC 5646: + + language[_script]_territory[.codeset] + + where script is a four-letter code for a script, per ISO 15924. + */ + mask = 0; - *language = cp = name; - cp = _nl_find_language (*language); - if (*language == cp) + /* First look for the language. Termination symbols are `_', '.', and `@'. */ + *language = name; + + cp = name; + while (cp[0] != '\0' && cp[0] != '_' && cp[0] != '@' && cp[0] != '.') + ++cp; + + if (cp == name) /* This does not make sense: language has to be specified. Use this entry as it is without exploding. Perhaps it is an alias. */ - cp = strchr (*language, '\0'); + cp = strchr (name, '\0'); else { if (cp[0] == '_') { + *cp++ = '\0'; +#if defined _AIX + /* Lowercase the language. */ + { + char *lcp; + + for (lcp = name; lcp < cp; lcp++) + if (*lcp >= 'A' && *lcp <= 'Z') + *lcp += 'a' - 'A'; + } + + /* Next is the script or the territory. It depends on whether + there is another '_'. */ + char *next = cp; + + while (cp[0] != '\0' && cp[0] != '_' && cp[0] != '@' && cp[0] != '.') + ++cp; + + if (cp[0] == '_') + { + *cp++ = '\0'; + + /* Next is the script. Translate the script to a modifier. + We don't need to support all of ISO 15924 here, only those + scripts that actually occur: + Latn -> latin + Cyrl -> cyrillic + Guru -> gurmukhi + Hans -> (omitted, redundant with the territory CN or SG) + Hant -> (omitted, redundant with the territory TW or HK) */ + if (strcmp (next, "Latn") == 0) + *modifier = "latin"; + else if (strcmp (next, "Cyrl") == 0) + *modifier = "cyrillic"; + else if (strcmp (next, "Guru") == 0) + *modifier = "gurmukhi"; + else if (!(strcmp (next, "Hans") == 0 + || strcmp (next, "Hant") == 0)) + *modifier = next; + if (*modifier != NULL && (*modifier)[0] != '\0') + mask |= XPG_MODIFIER; + } + else + cp = next; +#endif + /* Next is the territory. */ - cp[0] = '\0'; - *territory = ++cp; + *territory = cp; while (cp[0] != '\0' && cp[0] != '.' && cp[0] != '@') ++cp; @@ -91,8 +138,8 @@ _nl_explode_name (char *name, if (cp[0] == '.') { /* Next is the codeset. */ - cp[0] = '\0'; - *codeset = ++cp; + *cp++ = '\0'; + *codeset = cp; while (cp[0] != '\0' && cp[0] != '@') ++cp; @@ -111,16 +158,16 @@ _nl_explode_name (char *name, mask |= XPG_NORM_CODESET; } } - } - if (cp[0] == '@') - { - /* Next is the modifier. */ - cp[0] = '\0'; - *modifier = ++cp; + if (cp[0] == '@') + { + /* Next is the modifier. */ + *cp++ = '\0'; + *modifier = cp; - if (cp[0] != '\0') - mask |= XPG_MODIFIER; + if (cp[0] != '\0') + mask |= XPG_MODIFIER; + } } if (*territory != NULL && (*territory)[0] == '\0')