Canonicalize locale names on MacOS X.

author Bruno Haible <bruno@clisp.org>

Wed, 16 Mar 2005 13:27:16 +0000 (13:27 +0000)

committer Bruno Haible <bruno@clisp.org>

Tue, 23 Jun 2009 10:12:26 +0000 (12:12 +0200)
author Bruno Haible <bruno@clisp.org>
Wed, 16 Mar 2005 13:27:16 +0000 (13:27 +0000)
committer Bruno Haible <bruno@clisp.org>
Tue, 23 Jun 2009 10:12:26 +0000 (12:12 +0200)
diff --git a/gettext-runtime/intl/ChangeLog b/gettext-runtime/intl/ChangeLog

index eb191a284019819c4f8a923458139a3052568494..ec3a54aaa39c66e111e4bf02209eb44dab908469 100644 (file)
--- a/gettext-runtime/intl/ChangeLog
+++ b/gettext-runtime/intl/ChangeLog
@@ -1,3 +1,11 @@
+2005-03-16  Bruno Haible  <bruno@clisp.org>
+
+       Canonicalize locale names on MacOS X.
+       * localename.c (_nl_locale_name_canonicalize): New function.
+       (_nl_locale_name_default): Use it.
+       * langprefs.c (_nl_locale_name_canonicalize): New declaration.
+       (_nl_language_preferences_default): Use it.
+
  2005-03-14  Bruno Haible  <bruno@clisp.org>
  
         * gettext-0.14.3 released.
diff --git a/gettext-runtime/intl/langprefs.c b/gettext-runtime/intl/langprefs.c

index 8b013f440c90e9b5774d6c84df1e34f0ef5eccac..416398ded7bd5869dc0395644e9d373d739e969a 100644 (file)
--- a/gettext-runtime/intl/langprefs.c
+++ b/gettext-runtime/intl/langprefs.c
@@ -30,6 +30,7 @@
  # include <CFPropertyList.h>
  # include <CFArray.h>
  # include <CFString.h>
+extern void _nl_locale_name_canonicalize (char *name);
  #endif
  
  /* Determine the user's language preferences, as a colon separated list of
@@ -71,6 +72,7 @@ _nl_language_preferences_default (void)
                                            buf, sizeof (buf),
                                            kCFStringEncodingASCII))
                   {
+                   _nl_locale_name_canonicalize (buf);
                     size += strlen (buf) + 1;
                     /* Most GNU programs use msgids in English and don't ship
                        an en.mo message catalog.  Therefore when we see "en"
@@ -101,6 +103,7 @@ _nl_language_preferences_default (void)
                                                    buf, sizeof (buf),
                                                    kCFStringEncodingASCII))
                           {
+                           _nl_locale_name_canonicalize (buf);
                             strcpy (p, buf);
                             p += strlen (buf);
                             *p++ = ':';
diff --git a/gettext-runtime/intl/localename.c b/gettext-runtime/intl/localename.c

index 8abb2f1b6c4536fd519e54e986a770e28b3029c7..323c7323cbdd765fb25f40c55f27993a054776cd 100644 (file)
--- a/gettext-runtime/intl/localename.c
+++ b/gettext-runtime/intl/localename.c
@@ -694,6 +694,280 @@
  # endif
  #endif
  
+# if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE
+/* MacOS X 10.2 or newer */
+
+/* Canonicalize a MacOS X locale name to a Unix locale name.
+   NAME is a sufficiently large buffer.
+   On input, it contains the MacOS X locale name.
+   On output, it contains the Unix locale name.  */
+void
+_nl_locale_name_canonicalize (char *name)
+{
+  /* This conversion is based on a posting by
+     Deborah GoldSmith <goldsmit@apple.com> on 2005-03-08,
+     http://lists.apple.com/archives/carbon-dev/2005/Mar/msg00293.html */
+
+  /* Convert legacy (NeXTstep inherited) English names to Unix (ISO 639 and
+     ISO 3166) names.  Prior to MacOS X 10.3, there is no API for doing this.
+     Therefore we do it ourselves, using a table based on the results of the
+     MacOS X 10.3.8 function
+     CFLocaleCreateCanonicalLocaleIdentifierFromString().  */
+  typedef struct { const char legacy[21+1]; const char unixy[5+1]; }
+         legacy_entry;
+  static const legacy_entry legacy_table[] = {
+    { "Afrikaans",             "af" },
+    { "Albanian",              "sq" },
+    { "Amharic",               "am" },
+    { "Arabic",                "ar" },
+    { "Armenian",              "hy" },
+    { "Assamese",              "as" },
+    { "Aymara",                "ay" },
+    { "Azerbaijani",           "az" },
+    { "Basque",                "eu" },
+    { "Belarusian",            "be" },
+    { "Belorussian",           "be" },
+    { "Bengali",               "bn" },
+    { "Brazilian Portugese",   "pt_BR" },
+    { "Brazilian Portuguese",  "pt_BR" },
+    { "Breton",                "br" },
+    { "Bulgarian",             "bg" },
+    { "Burmese",               "my" },
+    { "Byelorussian",          "be" },
+    { "Catalan",               "ca" },
+    { "Chewa",                 "ny" },
+    { "Chichewa",              "ny" },
+    { "Chinese",               "zh" },
+    { "Chinese, Simplified",   "zh_CN" },
+    { "Chinese, Traditional",  "zh_TW" },
+    { "Chinese, Tradtional",   "zh_TW" },
+    { "Croatian",              "hr" },
+    { "Czech",                 "cs" },
+    { "Danish",                "da" },
+    { "Dutch",                 "nl" },
+    { "Dzongkha",              "dz" },
+    { "English",               "en" },
+    { "Esperanto",             "eo" },
+    { "Estonian",              "et" },
+    { "Faroese",               "fo" },
+    { "Farsi",                 "fa" },
+    { "Finnish",               "fi" },
+    { "Flemish",               "nl_BE" },
+    { "French",                "fr" },
+    { "Galician",              "gl" },
+    { "Gallegan",              "gl" },
+    { "Georgian",              "ka" },
+    { "German",                "de" },
+    { "Greek",                 "el" },
+    { "Greenlandic",           "kl" },
+    { "Guarani",               "gn" },
+    { "Gujarati",              "gu" },
+    { "Hawaiian",              "haw" }, /* Yes, "haw", not "cpe".  */
+    { "Hebrew",                "he" },
+    { "Hindi",                 "hi" },
+    { "Hungarian",             "hu" },
+    { "Icelandic",             "is" },
+    { "Indonesian",            "id" },
+    { "Inuktitut",             "iu" },
+    { "Irish",                 "ga" },
+    { "Italian",               "it" },
+    { "Japanese",              "ja" },
+    { "Javanese",              "jv" },
+    { "Kalaallisut",           "kl" },
+    { "Kannada",               "kn" },
+    { "Kashmiri",              "ks" },
+    { "Kazakh",                "kk" },
+    { "Khmer",                 "km" },
+    { "Kinyarwanda",           "rw" },
+    { "Kirghiz",               "ky" },
+    { "Korean",                "ko" },
+    { "Kurdish",               "ku" },
+    { "Latin",                 "la" },
+    { "Latvian",               "lv" },
+    { "Lithuanian",            "lt" },
+    { "Macedonian",            "mk" },
+    { "Malagasy",              "mg" },
+    { "Malay",                 "ms" },
+    { "Malayalam",             "ml" },
+    { "Maltese",               "mt" },
+    { "Manx",                  "gv" },
+    { "Marathi",               "mr" },
+    { "Moldavian",             "mo" },
+    { "Mongolian",             "mn" },
+    { "Nepali",                "ne" },
+    { "Norwegian",             "nb" }, /* Yes, "nb", not the obsolete "no".  */
+    { "Nyanja",                "ny" },
+    { "Nynorsk",               "nn" },
+    { "Oriya",                 "or" },
+    { "Oromo",                 "om" },
+    { "Panjabi",               "pa" },
+    { "Pashto",                "ps" },
+    { "Persian",               "fa" },
+    { "Polish",                "pl" },
+    { "Portuguese",            "pt" },
+    { "Portuguese, Brazilian", "pt_BR" },
+    { "Punjabi",               "pa" },
+    { "Pushto",                "ps" },
+    { "Quechua",               "qu" },
+    { "Romanian",              "ro" },
+    { "Ruanda",                "rw" },
+    { "Rundi",                 "rn" },
+    { "Russian",               "ru" },
+    { "Sami",                  "se_NO" }, /* Not just "se".  */
+    { "Sanskrit",              "sa" },
+    { "Scottish",              "gd" },
+    { "Serbian",               "sr" },
+    { "Simplified Chinese",    "zh_CN" },
+    { "Sindhi",                "sd" },
+    { "Sinhalese",             "si" },
+    { "Slovak",                "sk" },
+    { "Slovenian",             "sl" },
+    { "Somali",                "so" },
+    { "Spanish",               "es" },
+    { "Sundanese",             "su" },
+    { "Swahili",               "sw" },
+    { "Swedish",               "sv" },
+    { "Tagalog",               "tl" },
+    { "Tajik",                 "tg" },
+    { "Tajiki",                "tg" },
+    { "Tamil",                 "ta" },
+    { "Tatar",                 "tt" },
+    { "Telugu",                "te" },
+    { "Thai",                  "th" },
+    { "Tibetan",               "bo" },
+    { "Tigrinya",              "ti" },
+    { "Tongan",                "to" },
+    { "Traditional Chinese",   "zh_TW" },
+    { "Turkish",               "tr" },
+    { "Turkmen",               "tk" },
+    { "Uighur",                "ug" },
+    { "Ukrainian",             "uk" },
+    { "Urdu",                  "ur" },
+    { "Uzbek",                 "uz" },
+    { "Vietnamese",            "vi" },
+    { "Welsh",                 "cy" },
+    { "Yiddish",               "yi" }
+  };
+
+  /* Convert new-style locale names with language tags (ISO 639 and ISO 15924)
+     to Unix (ISO 639 and ISO 3166) names.  */
+  typedef struct { const char langtag[7+1]; const char unixy[12+1]; }
+         langtag_entry;
+  static const langtag_entry langtag_table[] = {
+    /* MacOS X has "az-Arab", "az-Cyrl", "az-Latn".
+       The default script for az on Unix is Latin.  */
+    { "az-Latn", "az" },
+    /* MacOS X has "ga-dots".  Does not yet exist on Unix.  */
+    { "ga-dots", "ga" },
+    /* MacOS X has "kk-Cyrl".  Does not yet exist on Unix.  */
+    /* MacOS X has "mn-Cyrl", "mn-Mong".
+       The default script for mn on Unix is Cyrillic.  */
+    { "mn-Cyrl", "mn" },
+    /* MacOS X has "ms-Arab", "ms-Latn".
+       The default script for ms on Unix is Latin.  */
+    { "ms-Latn", "ms" },
+    /* MacOS X has "tg-Cyrl".
+       The default script for tg on Unix is Cyrillic.  */
+    { "tg-Cyrl", "tg" },
+    /* MacOS X has "tk-Cyrl".  Does not yet exist on Unix.  */
+    /* MacOS X has "tt-Cyrl".
+       The default script for tt on Unix is Cyrillic.  */
+    { "tt-Cyrl", "tt" },
+    /* MacOS X has "zh-Hans", "zh-Hant".
+       Country codes are used to distinguish these on Unix.  */
+    { "zh-Hans", "zh_CN" },
+    { "zh-Hant", "zh_TW" }
+  };
+
+  /* Convert script names (ISO 15924) to Unix conventions.
+     See http://www.unicode.org/iso15924/iso15924-codes.html  */
+  typedef struct { const char script[4+1]; const char unixy[9+1]; }
+         script_entry;
+  static const script_entry script_table[] = {
+    { "Arab", "arabic" },
+    { "Cyrl", "cyrillic" },
+    { "Mong", "mongolian" }
+  };
+
+  /* Step 1: Convert using legacy_table.  */
+  if (name[0] >= 'A' && name[0] <= 'Z')
+    {
+      unsigned int i1, i2;
+      i1 = 0;
+      i2 = sizeof (legacy_table) / sizeof (legacy_entry);
+      while (i2 - i1 > 1)
+       {
+         /* At this point we know that if name occurs in legacy_table,
+            its index must be >= i1 and < i2.  */
+         unsigned int i = (i1 + i2) >> 1;
+         const legacy_entry *p = &legacy_table[i];
+         if (strcmp (name, p->legacy) < 0)
+           i2 = i;
+         else
+           i1 = i;
+       }
+      if (strcmp (name, legacy_table[i1].legacy) == 0)
+       {
+         strcpy (name, legacy_table[i1].unixy);
+         return;
+       }
+    }
+
+  /* Step 2: Convert using langtag_table and script_table.  */
+  if (strlen (name) == 7 && name[2] == '-')
+    {
+      unsigned int i1, i2;
+      i1 = 0;
+      i2 = sizeof (langtag_table) / sizeof (langtag_entry);
+      while (i2 - i1 > 1)
+       {
+         /* At this point we know that if name occurs in langtag_table,
+            its index must be >= i1 and < i2.  */
+         unsigned int i = (i1 + i2) >> 1;
+         const langtag_entry *p = &langtag_table[i];
+         if (strcmp (name, p->langtag) < 0)
+           i2 = i;
+         else
+           i1 = i;
+       }
+      if (strcmp (name, langtag_table[i1].langtag) == 0)
+       {
+         strcpy (name, langtag_table[i1].unixy);
+         return;
+       }
+
+      i1 = 0;
+      i2 = sizeof (script_table) / sizeof (script_entry);
+      while (i2 - i1 > 1)
+       {
+         /* At this point we know that if (name + 3) occurs in script_table,
+            its index must be >= i1 and < i2.  */
+         unsigned int i = (i1 + i2) >> 1;
+         const script_entry *p = &script_table[i];
+         if (strcmp (name + 3, p->script) < 0)
+           i2 = i;
+         else
+           i1 = i;
+       }
+      if (strcmp (name + 3, script_table[i1].script) == 0)
+       {
+         name[2] = '@';
+         strcpy (name + 3, script_table[i1].unixy);
+         return;
+       }
+    }
+
+  /* Step 3: Convert new-style dash to Unix underscore. */
+  {
+    char *p;
+    for (p = name; *p != '\0'; p++)
+      if (*p == '-')
+       *p = '_';
+  }
+}
+
+#endif
+
  /* XPG3 defines the result of 'setlocale (category, NULL)' as:
     "Directs 'setlocale()' to query 'category' and return the current
      setting of 'local'."
@@ -777,7 +1051,10 @@ _nl_locale_name_default (void)
  
         if (CFStringGetCString (name, namebuf, sizeof(namebuf),
                                 kCFStringEncodingASCII))
-         cached_localename = strdup (namebuf);
+         {
+           _nl_locale_name_canonicalize (namebuf);
+           cached_localename = strdup (namebuf);
+         }
         CFRelease (locale);
  #  elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.2 or newer */
         CFTypeRef value =
@@ -787,7 +1064,10 @@ _nl_locale_name_default (void)
             && CFGetTypeID (value) == CFStringGetTypeID ()
             && CFStringGetCString ((CFStringRef)value, namebuf, sizeof(namebuf),
                                    kCFStringEncodingASCII))
-         cached_localename = strdup (namebuf);
+         {
+           _nl_locale_name_canonicalize (namebuf);
+           cached_localename = strdup (namebuf);
+         }
  #  endif
         if (cached_localename == NULL)
           cached_localename = "C";
author	Bruno Haible <bruno@clisp.org>
	Wed, 16 Mar 2005 13:27:16 +0000 (13:27 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Tue, 23 Jun 2009 10:12:26 +0000 (12:12 +0200)
gettext-runtime/intl/ChangeLog		patch \| blob \| blame \| history
gettext-runtime/intl/langprefs.c		patch \| blob \| blame \| history
gettext-runtime/intl/localename.c		patch \| blob \| blame \| history