1 /* locale.c - Miscellaneous internationalization functions. */
3 /* Copyright (C) 1996-2009,2012,2016-2021 Free Software Foundation, Inc.
5 This file is part of GNU Bash, the Bourne Again SHell.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
23 #include "bashtypes.h"
25 #if defined (HAVE_UNISTD_H)
29 #if HAVE_LANGINFO_CODESET
30 # include <langinfo.h>
36 #include "chartypes.h"
40 #include "input.h" /* For bash_input */
46 int locale_utf8locale
;
47 int locale_mb_cur_max
; /* value of MB_CUR_MAX for current locale (LC_CTYPE) */
48 int locale_shiftstates
= 0;
50 int singlequote_translations
= 0; /* single-quote output of $"..." */
52 extern int dump_translatable_strings
, dump_po_strings
;
54 /* The current locale when the program begins */
55 static char *default_locale
;
57 /* The current domain for textdomain(3). */
58 static char *default_domain
;
59 static char *default_dir
;
61 /* tracks the value of LC_ALL; used to override values for other locale
65 /* tracks the value of LC_ALL; used to provide defaults for locale
69 /* Called to reset all of the locale variables to their appropriate values
70 if (and only if) LC_ALL has not been assigned a value. */
71 static int reset_locale_vars
PARAMS((void));
73 static void locale_setblanks
PARAMS((void));
74 static int locale_isutf8
PARAMS((char *));
76 /* Set the value of default_locale and make the current locale the
77 system default locale. This should be called very early in main(). */
81 #if defined (HAVE_SETLOCALE)
82 default_locale
= setlocale (LC_ALL
, "");
84 default_locale
= savestring (default_locale
);
86 default_locale
= savestring ("C");
87 #endif /* HAVE_SETLOCALE */
88 bindtextdomain (PACKAGE
, LOCALEDIR
);
91 locale_mb_cur_max
= MB_CUR_MAX
;
92 locale_utf8locale
= locale_isutf8 (default_locale
);
93 #if defined (HANDLE_MULTIBYTE)
94 locale_shiftstates
= mblen ((char *)NULL
, 0);
96 locale_shiftstates
= 0;
100 /* Set default values for LC_CTYPE, LC_COLLATE, LC_MESSAGES, LC_NUMERIC and
101 LC_TIME if they are not specified in the environment, but LC_ALL is. This
102 should be called from main() after parsing the environment. */
104 set_default_locale_vars ()
108 #if defined (HAVE_SETLOCALE)
110 # if defined (LC_CTYPE)
111 val
= get_string_value ("LC_CTYPE");
112 if (val
== 0 && lc_all
&& *lc_all
)
114 setlocale (LC_CTYPE
, lc_all
);
116 locale_mb_cur_max
= MB_CUR_MAX
;
117 locale_utf8locale
= locale_isutf8 (lc_all
);
119 # if defined (HANDLE_MULTIBYTE)
120 locale_shiftstates
= mblen ((char *)NULL
, 0);
122 locale_shiftstates
= 0;
129 # if defined (LC_COLLATE)
130 val
= get_string_value ("LC_COLLATE");
131 if (val
== 0 && lc_all
&& *lc_all
)
132 setlocale (LC_COLLATE
, lc_all
);
133 # endif /* LC_COLLATE */
135 # if defined (LC_MESSAGES)
136 val
= get_string_value ("LC_MESSAGES");
137 if (val
== 0 && lc_all
&& *lc_all
)
138 setlocale (LC_MESSAGES
, lc_all
);
139 # endif /* LC_MESSAGES */
141 # if defined (LC_NUMERIC)
142 val
= get_string_value ("LC_NUMERIC");
143 if (val
== 0 && lc_all
&& *lc_all
)
144 setlocale (LC_NUMERIC
, lc_all
);
145 # endif /* LC_NUMERIC */
147 # if defined (LC_TIME)
148 val
= get_string_value ("LC_TIME");
149 if (val
== 0 && lc_all
&& *lc_all
)
150 setlocale (LC_TIME
, lc_all
);
151 # endif /* LC_TIME */
153 #endif /* HAVE_SETLOCALE */
155 val
= get_string_value ("TEXTDOMAIN");
158 FREE (default_domain
);
159 default_domain
= savestring (val
);
160 if (default_dir
&& *default_dir
)
161 bindtextdomain (default_domain
, default_dir
);
164 val
= get_string_value ("TEXTDOMAINDIR");
168 default_dir
= savestring (val
);
169 if (default_domain
&& *default_domain
)
170 bindtextdomain (default_domain
, default_dir
);
174 /* Set one of the locale categories (specified by VAR) to VALUE. Returns 1
175 if successful, 0 otherwise. */
177 set_locale_var (var
, value
)
185 if (var
[0] == 'T' && var
[10] == 0) /* TEXTDOMAIN */
187 FREE (default_domain
);
188 default_domain
= value
? savestring (value
) : (char *)NULL
;
189 if (default_dir
&& *default_dir
)
190 bindtextdomain (default_domain
, default_dir
);
193 else if (var
[0] == 'T') /* TEXTDOMAINDIR */
196 default_dir
= value
? savestring (value
) : (char *)NULL
;
197 if (default_domain
&& *default_domain
)
198 bindtextdomain (default_domain
, default_dir
);
202 /* var[0] == 'L' && var[1] == 'C' && var[2] == '_' */
204 else if (var
[3] == 'A') /* LC_ALL */
208 lc_all
= savestring (value
);
211 lc_all
= (char *)xmalloc (1);
214 #if defined (HAVE_SETLOCALE)
215 r
= *lc_all
? ((x
= setlocale (LC_ALL
, lc_all
)) != 0) : reset_locale_vars ();
219 internal_warning(_("setlocale: LC_ALL: cannot change locale (%s)"), lc_all
);
221 internal_warning(_("setlocale: LC_ALL: cannot change locale (%s): %s"), lc_all
, strerror (errno
));
224 locale_mb_cur_max
= MB_CUR_MAX
;
225 /* if LC_ALL == "", reset_locale_vars has already called this */
227 locale_utf8locale
= locale_isutf8 (lc_all
);
228 # if defined (HANDLE_MULTIBYTE)
229 locale_shiftstates
= mblen ((char *)NULL
, 0);
231 locale_shiftstates
= 0;
240 #if defined (HAVE_SETLOCALE)
241 else if (var
[3] == 'C' && var
[4] == 'T') /* LC_CTYPE */
243 # if defined (LC_CTYPE)
244 if (lc_all
== 0 || *lc_all
== '\0')
246 x
= setlocale (LC_CTYPE
, get_locale_var ("LC_CTYPE"));
248 locale_mb_cur_max
= MB_CUR_MAX
;
249 /* if setlocale() returns NULL, the locale is not changed */
251 locale_utf8locale
= locale_isutf8 (x
);
252 #if defined (HANDLE_MULTIBYTE)
253 locale_shiftstates
= mblen ((char *)NULL
, 0);
255 locale_shiftstates
= 0;
261 else if (var
[3] == 'C' && var
[4] == 'O') /* LC_COLLATE */
263 # if defined (LC_COLLATE)
264 if (lc_all
== 0 || *lc_all
== '\0')
265 x
= setlocale (LC_COLLATE
, get_locale_var ("LC_COLLATE"));
266 # endif /* LC_COLLATE */
268 else if (var
[3] == 'M' && var
[4] == 'E') /* LC_MESSAGES */
270 # if defined (LC_MESSAGES)
271 if (lc_all
== 0 || *lc_all
== '\0')
272 x
= setlocale (LC_MESSAGES
, get_locale_var ("LC_MESSAGES"));
273 # endif /* LC_MESSAGES */
275 else if (var
[3] == 'N' && var
[4] == 'U') /* LC_NUMERIC */
277 # if defined (LC_NUMERIC)
278 if (lc_all
== 0 || *lc_all
== '\0')
279 x
= setlocale (LC_NUMERIC
, get_locale_var ("LC_NUMERIC"));
280 # endif /* LC_NUMERIC */
282 else if (var
[3] == 'T' && var
[4] == 'I') /* LC_TIME */
284 # if defined (LC_TIME)
285 if (lc_all
== 0 || *lc_all
== '\0')
286 x
= setlocale (LC_TIME
, get_locale_var ("LC_TIME"));
287 # endif /* LC_TIME */
289 #endif /* HAVE_SETLOCALE */
294 internal_warning(_("setlocale: %s: cannot change locale (%s)"), var
, get_locale_var (var
));
296 internal_warning(_("setlocale: %s: cannot change locale (%s): %s"), var
, get_locale_var (var
), strerror (errno
));
302 /* Called when LANG is assigned a value. Tracks value in `lang'. Calls
303 reset_locale_vars() to reset any default values if LC_ALL is unset or
306 set_lang (var
, value
)
311 lang
= savestring (value
);
314 lang
= (char *)xmalloc (1);
318 return ((lc_all
== 0 || *lc_all
== 0) ? reset_locale_vars () : 0);
321 /* Set default values for LANG and LC_ALL. Default values for all other
322 locale-related variables depend on these. */
328 v
= get_string_value ("LC_ALL");
329 set_locale_var ("LC_ALL", v
);
331 v
= get_string_value ("LANG");
332 set_lang ("LANG", v
);
335 /* Get the value of one of the locale variables (LC_MESSAGES, LC_CTYPE).
336 The precedence is as POSIX.2 specifies: LC_ALL has precedence over
337 the specific locale variables, and LANG, if set, is used as the default. */
346 if (locale
== 0 || *locale
== 0)
347 locale
= get_string_value (var
); /* XXX - no mem leak */
348 if (locale
== 0 || *locale
== 0)
350 if (locale
== 0 || *locale
== 0)
352 locale
= default_locale
; /* system-dependent; not really portable. should it be "C"? */
359 /* Called to reset all of the locale variables to their appropriate values
360 if (and only if) LC_ALL has not been assigned a value. DO NOT CALL THIS
361 IF LC_ALL HAS BEEN ASSIGNED A VALUE. */
366 #if defined (HAVE_SETLOCALE)
367 if (lang
== 0 || *lang
== '\0')
368 maybe_make_export_env (); /* trust that this will change environment for setlocale */
369 if (setlocale (LC_ALL
, lang
? lang
: "") == 0)
373 # if defined (LC_CTYPE)
374 x
= setlocale (LC_CTYPE
, get_locale_var ("LC_CTYPE"));
376 # if defined (LC_COLLATE)
377 t
= setlocale (LC_COLLATE
, get_locale_var ("LC_COLLATE"));
379 # if defined (LC_MESSAGES)
380 t
= setlocale (LC_MESSAGES
, get_locale_var ("LC_MESSAGES"));
382 # if defined (LC_NUMERIC)
383 t
= setlocale (LC_NUMERIC
, get_locale_var ("LC_NUMERIC"));
385 # if defined (LC_TIME)
386 t
= setlocale (LC_TIME
, get_locale_var ("LC_TIME"));
390 locale_mb_cur_max
= MB_CUR_MAX
;
392 locale_utf8locale
= locale_isutf8 (x
);
393 # if defined (HANDLE_MULTIBYTE)
394 locale_shiftstates
= mblen ((char *)NULL
, 0);
396 locale_shiftstates
= 0;
403 #if defined (TRANSLATABLE_STRINGS)
404 /* Translate the contents of STRING, a $"..." quoted string, according
405 to the current locale. In the `C' or `POSIX' locale, or if gettext()
406 is not available, the passed string is returned unchanged. The
407 length of the translated string is returned in LENP, if non-null. */
409 localetrans (string
, len
, lenp
)
417 /* Don't try to translate null strings. */
418 if (string
== 0 || *string
== 0)
422 return ((char *)NULL
);
425 locale
= get_locale_var ("LC_MESSAGES");
427 /* If we don't have setlocale() or the current locale is `C' or `POSIX',
428 just return the string. If we don't have gettext(), there's no use
429 doing anything else. */
430 if (locale
== 0 || locale
[0] == '\0' ||
431 (locale
[0] == 'C' && locale
[1] == '\0') || STREQ (locale
, "POSIX"))
433 t
= (char *)xmalloc (len
+ 1);
440 /* Now try to translate it. */
441 if (default_domain
&& *default_domain
)
442 translated
= dgettext (default_domain
, string
);
446 if (translated
== string
) /* gettext returns its argument if untranslatable */
448 t
= (char *)xmalloc (len
+ 1);
455 tlen
= strlen (translated
);
456 t
= (char *)xmalloc (tlen
+ 1);
457 strcpy (t
, translated
);
464 /* Change a bash string into a string suitable for inclusion in a `po' file.
465 This backslash-escapes `"' and `\' and changes newlines into \\\n"\n". */
467 mk_msgstr (string
, foundnlp
)
472 char *result
, *r
, *s
;
474 for (len
= 0, s
= string
; s
&& *s
; s
++)
477 if (*s
== '"' || *s
== '\\')
483 r
= result
= (char *)xmalloc (len
+ 3);
486 for (s
= string
; s
&& (c
= *s
); s
++)
488 if (c
== '\n') /* <NL> -> \n"<NL>" */
499 if (c
== '"' || c
== '\\')
510 /* $"..." -- Translate the portion of STRING between START and END
511 according to current locale using gettext (if available) and return
512 the result. The caller will take care of leaving the quotes intact.
513 The string will be left without the leading `$' by the caller.
514 If translation is performed, the translated string will be double-quoted
515 by the caller. The length of the translated string is returned in LENP,
518 locale_expand (string
, start
, end
, lineno
, lenp
)
520 int start
, end
, lineno
, *lenp
;
522 int len
, tlen
, foundnl
;
525 temp
= (char *)xmalloc (end
- start
+ 1);
526 for (tlen
= 0, len
= start
; len
< end
; )
527 temp
[tlen
++] = string
[len
++];
530 /* If we're just dumping translatable strings, don't do anything with the
531 string itself, but if we're dumping in `po' file format, convert it into
532 a form more palatable to gettext(3) and friends by quoting `"' and `\'
533 with backslashes and converting <NL> into `\n"<NL>"'. If we find a
534 newline in TEMP, we first output a `msgid ""' line and then the
535 translated string; otherwise we output the `msgid' and translated
536 string all on one line. */
537 if (dump_translatable_strings
)
542 t
= mk_msgstr (temp
, &foundnl
);
543 t2
= foundnl
? "\"\"\n" : "";
545 printf ("#: %s:%d\nmsgid %s%s\nmsgstr \"\"\n",
546 yy_input_name (), lineno
, t2
, t
);
550 printf ("\"%s\"\n", temp
);
558 t
= localetrans (temp
, tlen
, &len
);
573 /* Set every character in the <blank> character class to be a shell break
574 character for the lexical analyzer when the locale changes. */
580 for (x
= 0; x
< sh_syntabsiz
; x
++)
582 if (isblank ((unsigned char)x
))
583 sh_syntaxtab
[x
] |= CSHBRK
|CBLANK
;
584 else if (member (x
, shell_break_chars
))
586 sh_syntaxtab
[x
] |= CSHBRK
;
587 sh_syntaxtab
[x
] &= ~CBLANK
;
590 sh_syntaxtab
[x
] &= ~(CSHBRK
|CBLANK
);
594 /* Parse a locale specification
595 language[_territory][.codeset][@modifier][+special][,[sponsor][_revision]]
596 and return TRUE if the codeset is UTF-8 or utf8 */
598 locale_isutf8 (lspec
)
603 #if HAVE_LANGINFO_CODESET
604 cp
= nl_langinfo (CODESET
);
605 return (STREQ (cp
, "UTF-8") || STREQ (cp
, "utf8"));
606 #elif HAVE_LOCALE_CHARSET
607 cp
= locale_charset ();
608 return (STREQ (cp
, "UTF-8") || STREQ (cp
, "utf8"));
611 for (cp
= lspec
; *cp
&& *cp
!= '@' && *cp
!= '+' && *cp
!= ','; cp
++)
615 for (encoding
= ++cp
; *cp
&& *cp
!= '@' && *cp
!= '+' && *cp
!= ','; cp
++)
617 /* The encoding (codeset) is the substring between encoding and cp */
618 if ((cp
- encoding
== 5 && STREQN (encoding
, "UTF-8", 5)) ||
619 (cp
- encoding
== 4 && STREQN (encoding
, "utf8", 4)))
629 #if defined (HAVE_LOCALECONV)
636 return (lv
&& lv
->decimal_point
&& lv
->decimal_point
[0]) ? lv
->decimal_point
[0] : '.';
639 # undef locale_decpoint