intl: Support the AIX 7 locale names.

author Bruno Haible <bruno@clisp.org>

Thu, 10 Dec 2020 16:01:43 +0000 (17:01 +0100)

committer Bruno Haible <bruno@clisp.org>

Sun, 9 Oct 2022 07:30:42 +0000 (09:30 +0200)
author Bruno Haible <bruno@clisp.org>
Thu, 10 Dec 2020 16:01:43 +0000 (17:01 +0100)
committer Bruno Haible <bruno@clisp.org>
Sun, 9 Oct 2022 07:30:42 +0000 (09:30 +0200)
diff --git a/NEWS b/NEWS

index f2764d4eef1958ed9f5b2f8428252958f60a4ab8..cdbb16746c23555e70bb1e16917f5c349ce92d9e 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,11 @@
  Version 0.21.1 - October 2022
  
+* Runtime behaviour:
+  - On AIX, locale names with a script or with an uppercase language are now
+    supported.
+    For example, sr_Cyrl_RS.UTF-8 is treated like sr_RS.UTF-8@cyrillic, and
+    EN_US.UTF-8 is treated like en_US.UTF-8.
+
  * The base Unicode standard is now updated to 14.0.0.
  
  * Portability:
diff --git a/gettext-runtime/intl/explodename.c b/gettext-runtime/intl/explodename.c

index 57c3e4f2d9d5a442c4aa5ee1b9a83289888b7c5b..661266ea30e3b54ae89ea7a1eeee178664aeb82a 100644 (file)
--- a/gettext-runtime/intl/explodename.c
+++ b/gettext-runtime/intl/explodename.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995-2016 Free Software Foundation, Inc.
+/* Copyright (C) 1995-2016, 2020 Free Software Foundation, Inc.
     Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
  
     This program is free software: you can redistribute it and/or modify
@@ -35,20 +35,6 @@
  
  /* @@ end of prolog @@ */
  
-/* Split a locale name NAME into a leading language part and all the
-   rest.  Return a pointer to the first character after the language,
-   i.e. to the first byte of the rest.  */
-static char *_nl_find_language (const char *name);
-
-static char *
-_nl_find_language (const char *name)
-{
-  while (name[0] != '\0' && name[0] != '_' && name[0] != '@' && name[0] != '.')
-    ++name;
-
-  return (char *) name;
-}
-
  
  int
  _nl_explode_name (char *name,
@@ -64,23 +50,84 @@ _nl_explode_name (char *name,
    *codeset = NULL;
    *normalized_codeset = NULL;
  
-  /* Now we determine the single parts of the locale name.  First
-     look for the language.  Termination symbols are `_', '.', and `@'.  */
+  /* Determine the individual parts of the locale name.
+     Accept the XPG syntax
+
+             language[_territory][.codeset][@modifier]
+
+     On AIX systems, also accept the same syntax with an uppercased language,
+     and a syntax similar to RFC 5646:
+
+             language[_script]_territory[.codeset]
+
+     where script is a four-letter code for a script, per ISO 15924.
+   */
+
    mask = 0;
-  *language = cp = name;
-  cp = _nl_find_language (*language);
  
-  if (*language == cp)
+  /* First look for the language.  Termination symbols are `_', '.', and `@'.  */
+  *language = name;
+
+  cp = name;
+  while (cp[0] != '\0' && cp[0] != '_' && cp[0] != '@' && cp[0] != '.')
+    ++cp;
+
+  if (cp == name)
      /* This does not make sense: language has to be specified.  Use
         this entry as it is without exploding.  Perhaps it is an alias.  */
-    cp = strchr (*language, '\0');
+    cp = strchr (name, '\0');
    else
      {
        if (cp[0] == '_')
         {
+         *cp++ = '\0';
+#if defined _AIX
+         /* Lowercase the language.  */
+         {
+           char *lcp;
+
+           for (lcp = name; lcp < cp; lcp++)
+             if (*lcp >= 'A' && *lcp <= 'Z')
+               *lcp += 'a' - 'A';
+         }
+
+         /* Next is the script or the territory.  It depends on whether
+            there is another '_'.  */
+         char *next = cp;
+
+         while (cp[0] != '\0' && cp[0] != '_' && cp[0] != '@' && cp[0] != '.')
+           ++cp;
+
+         if (cp[0] == '_')
+           {
+             *cp++ = '\0';
+
+             /* Next is the script.  Translate the script to a modifier.
+                We don't need to support all of ISO 15924 here, only those
+                scripts that actually occur:
+                  Latn -> latin
+                  Cyrl -> cyrillic
+                  Guru -> gurmukhi
+                  Hans -> (omitted, redundant with the territory CN or SG)
+                  Hant -> (omitted, redundant with the territory TW or HK)  */
+             if (strcmp (next, "Latn") == 0)
+               *modifier = "latin";
+             else if (strcmp (next, "Cyrl") == 0)
+               *modifier = "cyrillic";
+             else if (strcmp (next, "Guru") == 0)
+               *modifier = "gurmukhi";
+             else if (!(strcmp (next, "Hans") == 0
+                        || strcmp (next, "Hant") == 0))
+               *modifier = next;
+             if (*modifier != NULL && (*modifier)[0] != '\0')
+               mask |= XPG_MODIFIER;
+           }
+         else
+           cp = next;
+#endif
+
           /* Next is the territory.  */
-         cp[0] = '\0';
-         *territory = ++cp;
+         *territory = cp;
  
           while (cp[0] != '\0' && cp[0] != '.' && cp[0] != '@')
             ++cp;
@@ -91,8 +138,8 @@ _nl_explode_name (char *name,
        if (cp[0] == '.')
         {
           /* Next is the codeset.  */
-         cp[0] = '\0';
-         *codeset = ++cp;
+         *cp++ = '\0';
+         *codeset = cp;
  
           while (cp[0] != '\0' && cp[0] != '@')
             ++cp;
@@ -111,16 +158,16 @@ _nl_explode_name (char *name,
                 mask |= XPG_NORM_CODESET;
             }
         }
-    }
  
-  if (cp[0] == '@')
-    {
-      /* Next is the modifier.  */
-      cp[0] = '\0';
-      *modifier = ++cp;
+      if (cp[0] == '@')
+       {
+         /* Next is the modifier.  */
+         *cp++ = '\0';
+         *modifier = cp;
  
-      if (cp[0] != '\0')
-       mask |= XPG_MODIFIER;
+         if (cp[0] != '\0')
+           mask |= XPG_MODIFIER;
+       }
      }
  
    if (*territory != NULL && (*territory)[0] == '\0')
author	Bruno Haible <bruno@clisp.org>
	Thu, 10 Dec 2020 16:01:43 +0000 (17:01 +0100)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 9 Oct 2022 07:30:42 +0000 (09:30 +0200)
NEWS		patch \| blob \| blame \| history
gettext-runtime/intl/explodename.c		patch \| blob \| blame \| history