/* locale.c - Miscellaneous internationalization functions. */
-/* Copyright (C) 1996 Free Software Foundation, Inc.
+/* Copyright (C) 1996-2009,2012 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
- Bash is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 2, or (at your option) any later
- version.
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- Bash is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
- You should have received a copy of the GNU General Public License along
- with Bash; see the file COPYING. If not, write to the Free Software
- Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
#include "config.h"
# include <unistd.h>
#endif
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
+
#include "bashintl.h"
#include "bashansi.h"
#include <stdio.h>
#include "chartypes.h"
+#include <errno.h>
#include "shell.h"
+#include "input.h" /* For bash_input */
+
+#ifndef errno
+extern int errno;
+#endif
+
+int locale_utf8locale; /* unused for now */
+int locale_mb_cur_max; /* value of MB_CUR_MAX for current locale (LC_CTYPE) */
+
+extern int dump_translatable_strings, dump_po_strings;
/* The current locale when the program begins */
static char *default_locale;
categories */
static char *lc_all;
+/* tracks the value of LC_ALL; used to provide defaults for locale
+ categories */
+static char *lang;
+
+/* Called to reset all of the locale variables to their appropriate values
+ if (and only if) LC_ALL has not been assigned a value. */
+static int reset_locale_vars __P((void));
+
+static void locale_setblanks __P((void));
+static int locale_isutf8 __P((char *));
+
/* Set the value of default_locale and make the current locale the
system default locale. This should be called very early in main(). */
void
if (default_locale)
default_locale = savestring (default_locale);
#endif /* HAVE_SETLOCALE */
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ locale_mb_cur_max = MB_CUR_MAX;
}
-/* Set default values for LC_CTYPE, LC_COLLATE, and LC_MESSAGES if they
- are not specified in the environment, but LANG or LC_ALL is. This
+/* Set default values for LC_CTYPE, LC_COLLATE, LC_MESSAGES, LC_NUMERIC and
+ LC_TIME if they are not specified in the environment, but LC_ALL is. This
should be called from main() after parsing the environment. */
void
set_default_locale_vars ()
char *val;
#if defined (HAVE_SETLOCALE)
+
+# if defined (LC_CTYPE)
val = get_string_value ("LC_CTYPE");
if (val == 0 && lc_all && *lc_all)
- setlocale (LC_CTYPE, lc_all);
+ {
+ setlocale (LC_CTYPE, lc_all);
+ locale_setblanks ();
+ locale_mb_cur_max = MB_CUR_MAX;
+ u32reset ();
+ }
+# endif
# if defined (LC_COLLATE)
val = get_string_value ("LC_COLLATE");
setlocale (LC_NUMERIC, lc_all);
# endif /* LC_NUMERIC */
+# if defined (LC_TIME)
+ val = get_string_value ("LC_TIME");
+ if (val == 0 && lc_all && *lc_all)
+ setlocale (LC_TIME, lc_all);
+# endif /* LC_TIME */
+
#endif /* HAVE_SETLOCALE */
val = get_string_value ("TEXTDOMAIN");
{
FREE (default_domain);
default_domain = savestring (val);
- textdomain (default_domain);
+ if (default_dir && *default_dir)
+ bindtextdomain (default_domain, default_dir);
}
val = get_string_value ("TEXTDOMAINDIR");
{
FREE (default_dir);
default_dir = savestring (val);
- bindtextdomain (default_domain, default_dir);
+ if (default_domain && *default_domain)
+ bindtextdomain (default_domain, default_dir);
}
}
set_locale_var (var, value)
char *var, *value;
{
+ int r;
+ char *x;
+
+ x = "";
+ errno = 0;
if (var[0] == 'T' && var[10] == 0) /* TEXTDOMAIN */
{
FREE (default_domain);
default_domain = value ? savestring (value) : (char *)NULL;
- textdomain (default_domain);
+ if (default_dir && *default_dir)
+ bindtextdomain (default_domain, default_dir);
return (1);
}
else if (var[0] == 'T') /* TEXTDOMAINDIR */
{
FREE (default_dir);
default_dir = value ? savestring (value) : (char *)NULL;
- bindtextdomain (default_domain, default_dir);
+ if (default_domain && *default_domain)
+ bindtextdomain (default_domain, default_dir);
return (1);
}
FREE (lc_all);
if (value)
lc_all = savestring (value);
- else if (default_locale)
- lc_all = savestring (default_locale);
else
{
lc_all = (char *)xmalloc (1);
lc_all[0] = '\0';
}
#if defined (HAVE_SETLOCALE)
- return (setlocale (LC_ALL, lc_all) != 0);
+ r = *lc_all ? ((x = setlocale (LC_ALL, lc_all)) != 0) : reset_locale_vars ();
+ if (x == 0)
+ {
+ if (errno == 0)
+ internal_warning(_("setlocale: LC_ALL: cannot change locale (%s)"), lc_all);
+ else
+ internal_warning(_("setlocale: LC_ALL: cannot change locale (%s): %s"), lc_all, strerror (errno));
+ }
+ locale_setblanks ();
+ locale_mb_cur_max = MB_CUR_MAX;
+ u32reset ();
+ return r;
#else
return (1);
#endif
#if defined (HAVE_SETLOCALE)
else if (var[3] == 'C' && var[4] == 'T') /* LC_CTYPE */
{
+# if defined (LC_CTYPE)
if (lc_all == 0 || *lc_all == '\0')
- return (setlocale (LC_CTYPE, value ? value : "") != 0);
+ {
+ x = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE"));
+ locale_setblanks ();
+ locale_mb_cur_max = MB_CUR_MAX;
+ u32reset ();
+ }
+# endif
}
else if (var[3] == 'C' && var[4] == 'O') /* LC_COLLATE */
{
# if defined (LC_COLLATE)
if (lc_all == 0 || *lc_all == '\0')
- return (setlocale (LC_COLLATE, value ? value : "") != 0);
+ x = setlocale (LC_COLLATE, get_locale_var ("LC_COLLATE"));
# endif /* LC_COLLATE */
}
else if (var[3] == 'M' && var[4] == 'E') /* LC_MESSAGES */
{
# if defined (LC_MESSAGES)
if (lc_all == 0 || *lc_all == '\0')
- return (setlocale (LC_MESSAGES, value ? value : "") != 0);
+ x = setlocale (LC_MESSAGES, get_locale_var ("LC_MESSAGES"));
# endif /* LC_MESSAGES */
}
else if (var[3] == 'N' && var[4] == 'U') /* LC_NUMERIC */
{
# if defined (LC_NUMERIC)
if (lc_all == 0 || *lc_all == '\0')
- return (setlocale (LC_NUMERIC, value ? value : "") != 0);
+ x = setlocale (LC_NUMERIC, get_locale_var ("LC_NUMERIC"));
# endif /* LC_NUMERIC */
}
+ else if (var[3] == 'T' && var[4] == 'I') /* LC_TIME */
+ {
+# if defined (LC_TIME)
+ if (lc_all == 0 || *lc_all == '\0')
+ x = setlocale (LC_TIME, get_locale_var ("LC_TIME"));
+# endif /* LC_TIME */
+ }
#endif /* HAVE_SETLOCALE */
+
+ if (x == 0)
+ {
+ if (errno == 0)
+ internal_warning(_("setlocale: %s: cannot change locale (%s)"), var, get_locale_var (var));
+ else
+ internal_warning(_("setlocale: %s: cannot change locale (%s): %s"), var, get_locale_var (var), strerror (errno));
+ }
- return (0);
+ return (x != 0);
}
-#if 0
-/* Called when LANG is assigned a value. Sets LC_ALL if that has not
- already been set. */
-#else
-/* This no longer does anything; we rely on the C library for correct
- behavior. */
-#endif
+/* Called when LANG is assigned a value. Tracks value in `lang'. Calls
+ reset_locale_vars() to reset any default values if LC_ALL is unset or
+ null. */
int
set_lang (var, value)
char *var, *value;
{
-#if 0
- return ((lc_all == 0) ? set_locale_var ("LC_ALL", value) : 0);
-#else
- return 0;
-#endif
+ FREE (lang);
+ if (value)
+ lang = savestring (value);
+ else
+ {
+ lang = (char *)xmalloc (1);
+ lang[0] = '\0';
+ }
+
+ return ((lc_all == 0 || *lc_all == 0) ? reset_locale_vars () : 0);
}
-/* Get the value of one of the locale variables (LC_MESSAGES, LC_CTYPE) */
+/* Set default values for LANG and LC_ALL. Default values for all other
+ locale-related variables depend on these. */
+void
+set_default_lang ()
+{
+ char *v;
+
+ v = get_string_value ("LC_ALL");
+ set_locale_var ("LC_ALL", v);
+
+ v = get_string_value ("LANG");
+ set_lang ("LANG", v);
+}
+
+/* Get the value of one of the locale variables (LC_MESSAGES, LC_CTYPE).
+ The precedence is as POSIX.2 specifies: LC_ALL has precedence over
+ the specific locale variables, and LANG, if set, is used as the default. */
char *
get_locale_var (var)
char *var;
locale = lc_all;
- if (locale == 0)
- locale = get_string_value (var);
- if (locale == 0)
- locale = default_locale;
-
+ if (locale == 0 || *locale == 0)
+ locale = get_string_value (var); /* XXX - mem leak? */
+ if (locale == 0 || *locale == 0)
+ locale = lang;
+ if (locale == 0 || *locale == 0)
+#if 0
+ locale = default_locale; /* system-dependent; not really portable. should it be "C"? */
+#else
+ locale = "";
+#endif
return (locale);
}
+/* Called to reset all of the locale variables to their appropriate values
+ if (and only if) LC_ALL has not been assigned a value. DO NOT CALL THIS
+ IF LC_ALL HAS BEEN ASSIGNED A VALUE. */
+static int
+reset_locale_vars ()
+{
+ char *t;
+#if defined (HAVE_SETLOCALE)
+ if (lang == 0 || *lang == '\0')
+ maybe_make_export_env (); /* trust that this will change environment for setlocale */
+ if (setlocale (LC_ALL, lang ? lang : "") == 0)
+ return 0;
+
+# if defined (LC_CTYPE)
+ t = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE"));
+# endif
+# if defined (LC_COLLATE)
+ t = setlocale (LC_COLLATE, get_locale_var ("LC_COLLATE"));
+# endif
+# if defined (LC_MESSAGES)
+ t = setlocale (LC_MESSAGES, get_locale_var ("LC_MESSAGES"));
+# endif
+# if defined (LC_NUMERIC)
+ t = setlocale (LC_NUMERIC, get_locale_var ("LC_NUMERIC"));
+# endif
+# if defined (LC_TIME)
+ t = setlocale (LC_TIME, get_locale_var ("LC_TIME"));
+# endif
+
+ locale_setblanks ();
+ locale_mb_cur_max = MB_CUR_MAX;
+ u32reset ();
+
+#endif
+ return 1;
+}
+
/* Translate the contents of STRING, a $"..." quoted string, according
to the current locale. In the `C' or `POSIX' locale, or if gettext()
is not available, the passed string is returned unchanged. The
int len, *lenp;
{
char *locale, *t;
-#if defined (HAVE_GETTEXT)
char *translated;
int tlen;
-#endif
/* Don't try to translate null strings. */
if (string == 0 || *string == 0)
/* If we don't have setlocale() or the current locale is `C' or `POSIX',
just return the string. If we don't have gettext(), there's no use
doing anything else. */
-#if defined (HAVE_GETTEXT)
if (locale == 0 || locale[0] == '\0' ||
(locale[0] == 'C' && locale[1] == '\0') || STREQ (locale, "POSIX"))
-#endif
{
t = (char *)xmalloc (len + 1);
strcpy (t, string);
return (t);
}
-#if defined (HAVE_GETTEXT)
/* Now try to translate it. */
- translated = gettext (string);
+ if (default_domain && *default_domain)
+ translated = dgettext (default_domain, string);
+ else
+ translated = string;
+
if (translated == string) /* gettext returns its argument if untranslatable */
{
t = (char *)xmalloc (len + 1);
*lenp = tlen;
}
return (t);
-#endif /* HAVE_GETTEXT */
+}
+
+/* Change a bash string into a string suitable for inclusion in a `po' file.
+ This backslash-escapes `"' and `\' and changes newlines into \\\n"\n". */
+char *
+mk_msgstr (string, foundnlp)
+ char *string;
+ int *foundnlp;
+{
+ register int c, len;
+ char *result, *r, *s;
+
+ for (len = 0, s = string; s && *s; s++)
+ {
+ len++;
+ if (*s == '"' || *s == '\\')
+ len++;
+ else if (*s == '\n')
+ len += 5;
+ }
+
+ r = result = (char *)xmalloc (len + 3);
+ *r++ = '"';
+
+ for (s = string; s && (c = *s); s++)
+ {
+ if (c == '\n') /* <NL> -> \n"<NL>" */
+ {
+ *r++ = '\\';
+ *r++ = 'n';
+ *r++ = '"';
+ *r++ = '\n';
+ *r++ = '"';
+ if (foundnlp)
+ *foundnlp = 1;
+ continue;
+ }
+ if (c == '"' || c == '\\')
+ *r++ = '\\';
+ *r++ = c;
+ }
+
+ *r++ = '"';
+ *r++ = '\0';
+
+ return result;
+}
+
+/* $"..." -- Translate the portion of STRING between START and END
+ according to current locale using gettext (if available) and return
+ the result. The caller will take care of leaving the quotes intact.
+ The string will be left without the leading `$' by the caller.
+ If translation is performed, the translated string will be double-quoted
+ by the caller. The length of the translated string is returned in LENP,
+ if non-null. */
+char *
+localeexpand (string, start, end, lineno, lenp)
+ char *string;
+ int start, end, lineno, *lenp;
+{
+ int len, tlen, foundnl;
+ char *temp, *t, *t2;
+
+ temp = (char *)xmalloc (end - start + 1);
+ for (tlen = 0, len = start; len < end; )
+ temp[tlen++] = string[len++];
+ temp[tlen] = '\0';
+
+ /* If we're just dumping translatable strings, don't do anything with the
+ string itself, but if we're dumping in `po' file format, convert it into
+ a form more palatable to gettext(3) and friends by quoting `"' and `\'
+ with backslashes and converting <NL> into `\n"<NL>"'. If we find a
+ newline in TEMP, we first output a `msgid ""' line and then the
+ translated string; otherwise we output the `msgid' and translated
+ string all on one line. */
+ if (dump_translatable_strings)
+ {
+ if (dump_po_strings)
+ {
+ foundnl = 0;
+ t = mk_msgstr (temp, &foundnl);
+ t2 = foundnl ? "\"\"\n" : "";
+
+ printf ("#: %s:%d\nmsgid %s%s\nmsgstr \"\"\n",
+ yy_input_name (), lineno, t2, t);
+ free (t);
+ }
+ else
+ printf ("\"%s\"\n", temp);
+
+ if (lenp)
+ *lenp = tlen;
+ return (temp);
+ }
+ else if (*temp)
+ {
+ t = localetrans (temp, tlen, &len);
+ free (temp);
+ if (lenp)
+ *lenp = len;
+ return (t);
+ }
+ else
+ {
+ if (lenp)
+ *lenp = 0;
+ return (temp);
+ }
+}
+
+/* Set every character in the <blank> character class to be a shell break
+ character for the lexical analyzer when the locale changes. */
+static void
+locale_setblanks ()
+{
+ int x;
+
+ for (x = 0; x < sh_syntabsiz; x++)
+ {
+ if (isblank (x))
+ sh_syntaxtab[x] |= CSHBRK|CBLANK;
+ else if (member (x, shell_break_chars))
+ {
+ sh_syntaxtab[x] |= CSHBRK;
+ sh_syntaxtab[x] &= ~CBLANK;
+ }
+ else
+ sh_syntaxtab[x] &= ~(CSHBRK|CBLANK);
+ }
+}
+
+static int
+locale_isutf8 (lspec)
+ char *lspec;
+{
+ char *cp;
+
+#if HAVE_LANGINFO_CODESET
+ cp = nl_langinfo (CODESET);
+ return (STREQ (cp, "UTF-8") || STREQ (cp, "utf8"));
+#else
+ /* Take a shot */
+ return (strstr (lspec, "UTF-8") || strstr (lspec, "utf8"));
+#endif
}