]> git.ipfire.org Git - thirdparty/bash.git/blame - locale.c
bash-4.3-alpha release
[thirdparty/bash.git] / locale.c
CommitLineData
ccc6cda3
JA
1/* locale.c - Miscellaneous internationalization functions. */
2
863d31ae 3/* Copyright (C) 1996-2009,2012 Free Software Foundation, Inc.
ccc6cda3
JA
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
2e4498b3
CR
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
ccc6cda3 11
2e4498b3
CR
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
ccc6cda3 16
2e4498b3
CR
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
ccc6cda3
JA
20
21#include "config.h"
22
23#include "bashtypes.h"
24
25#if defined (HAVE_UNISTD_H)
26# include <unistd.h>
27#endif
28
22818c14
CR
29#if HAVE_LANGINFO_CODESET
30# include <langinfo.h>
31#endif
32
ccc6cda3
JA
33#include "bashintl.h"
34#include "bashansi.h"
35#include <stdio.h>
f73dda09 36#include "chartypes.h"
547ef914 37#include <errno.h>
ccc6cda3
JA
38
39#include "shell.h"
7117c2d2
JA
40#include "input.h" /* For bash_input */
41
547ef914
CR
42#ifndef errno
43extern int errno;
44#endif
45
22818c14 46int locale_utf8locale; /* unused for now */
51f7ea36 47int locale_mb_cur_max; /* value of MB_CUR_MAX for current locale (LC_CTYPE) */
22818c14 48
7117c2d2 49extern int dump_translatable_strings, dump_po_strings;
ccc6cda3
JA
50
51/* The current locale when the program begins */
52static char *default_locale;
53
54/* The current domain for textdomain(3). */
55static char *default_domain;
56static char *default_dir;
57
58/* tracks the value of LC_ALL; used to override values for other locale
59 categories */
60static char *lc_all;
61
d3a24ed2
CR
62/* tracks the value of LC_ALL; used to provide defaults for locale
63 categories */
64static char *lang;
65
43cdcad8
CR
66/* Called to reset all of the locale variables to their appropriate values
67 if (and only if) LC_ALL has not been assigned a value. */
d3a24ed2
CR
68static int reset_locale_vars __P((void));
69
70static void locale_setblanks __P((void));
22818c14 71static int locale_isutf8 __P((char *));
d3a24ed2 72
ccc6cda3
JA
73/* Set the value of default_locale and make the current locale the
74 system default locale. This should be called very early in main(). */
75void
76set_default_locale ()
77{
78#if defined (HAVE_SETLOCALE)
79 default_locale = setlocale (LC_ALL, "");
80 if (default_locale)
81 default_locale = savestring (default_locale);
82#endif /* HAVE_SETLOCALE */
5e13499c
CR
83 bindtextdomain (PACKAGE, LOCALEDIR);
84 textdomain (PACKAGE);
51f7ea36
CR
85
86 locale_mb_cur_max = MB_CUR_MAX;
ccc6cda3
JA
87}
88
898cc92e
CR
89/* Set default values for LC_CTYPE, LC_COLLATE, LC_MESSAGES, LC_NUMERIC and
90 LC_TIME if they are not specified in the environment, but LC_ALL is. This
ccc6cda3
JA
91 should be called from main() after parsing the environment. */
92void
93set_default_locale_vars ()
94{
95 char *val;
96
97#if defined (HAVE_SETLOCALE)
d3a24ed2
CR
98
99# if defined (LC_CTYPE)
ccc6cda3
JA
100 val = get_string_value ("LC_CTYPE");
101 if (val == 0 && lc_all && *lc_all)
d3a24ed2
CR
102 {
103 setlocale (LC_CTYPE, lc_all);
104 locale_setblanks ();
51f7ea36 105 locale_mb_cur_max = MB_CUR_MAX;
1f6ec1a8 106 u32reset ();
d3a24ed2
CR
107 }
108# endif
ccc6cda3
JA
109
110# if defined (LC_COLLATE)
111 val = get_string_value ("LC_COLLATE");
112 if (val == 0 && lc_all && *lc_all)
113 setlocale (LC_COLLATE, lc_all);
114# endif /* LC_COLLATE */
115
116# if defined (LC_MESSAGES)
117 val = get_string_value ("LC_MESSAGES");
118 if (val == 0 && lc_all && *lc_all)
119 setlocale (LC_MESSAGES, lc_all);
120# endif /* LC_MESSAGES */
121
bb70624e
JA
122# if defined (LC_NUMERIC)
123 val = get_string_value ("LC_NUMERIC");
124 if (val == 0 && lc_all && *lc_all)
125 setlocale (LC_NUMERIC, lc_all);
126# endif /* LC_NUMERIC */
127
898cc92e
CR
128# if defined (LC_TIME)
129 val = get_string_value ("LC_TIME");
130 if (val == 0 && lc_all && *lc_all)
131 setlocale (LC_TIME, lc_all);
132# endif /* LC_TIME */
133
ccc6cda3
JA
134#endif /* HAVE_SETLOCALE */
135
136 val = get_string_value ("TEXTDOMAIN");
137 if (val && *val)
138 {
139 FREE (default_domain);
140 default_domain = savestring (val);
5e13499c
CR
141#if 0
142 /* Don't want to override the shell's textdomain as the default */
ccc6cda3 143 textdomain (default_domain);
5e13499c 144#endif
ccc6cda3
JA
145 }
146
147 val = get_string_value ("TEXTDOMAINDIR");
148 if (val && *val)
149 {
150 FREE (default_dir);
151 default_dir = savestring (val);
5e13499c
CR
152 if (default_domain && *default_domain)
153 bindtextdomain (default_domain, default_dir);
ccc6cda3
JA
154 }
155}
156
157/* Set one of the locale categories (specified by VAR) to VALUE. Returns 1
158 if successful, 0 otherwise. */
159int
160set_locale_var (var, value)
161 char *var, *value;
162{
d3a24ed2 163 int r;
547ef914 164 char *x;
d3a24ed2 165
547ef914
CR
166 x = "";
167 errno = 0;
ccc6cda3
JA
168 if (var[0] == 'T' && var[10] == 0) /* TEXTDOMAIN */
169 {
170 FREE (default_domain);
171 default_domain = value ? savestring (value) : (char *)NULL;
5e13499c
CR
172#if 0
173 /* Don't want to override the shell's textdomain as the default */
ccc6cda3 174 textdomain (default_domain);
5e13499c 175#endif
ccc6cda3
JA
176 return (1);
177 }
178 else if (var[0] == 'T') /* TEXTDOMAINDIR */
179 {
180 FREE (default_dir);
181 default_dir = value ? savestring (value) : (char *)NULL;
5e13499c
CR
182 if (default_domain && *default_domain)
183 bindtextdomain (default_domain, default_dir);
ccc6cda3
JA
184 return (1);
185 }
186
187 /* var[0] == 'L' && var[1] == 'C' && var[2] == '_' */
188
189 else if (var[3] == 'A') /* LC_ALL */
190 {
191 FREE (lc_all);
e8ce775d
JA
192 if (value)
193 lc_all = savestring (value);
e8ce775d
JA
194 else
195 {
f73dda09 196 lc_all = (char *)xmalloc (1);
e8ce775d
JA
197 lc_all[0] = '\0';
198 }
ccc6cda3 199#if defined (HAVE_SETLOCALE)
547ef914
CR
200 r = *lc_all ? ((x = setlocale (LC_ALL, lc_all)) != 0) : reset_locale_vars ();
201 if (x == 0)
db31fb26
CR
202 {
203 if (errno == 0)
204 internal_warning(_("setlocale: LC_ALL: cannot change locale (%s)"), lc_all);
205 else
206 internal_warning(_("setlocale: LC_ALL: cannot change locale (%s): %s"), lc_all, strerror (errno));
207 }
d3a24ed2 208 locale_setblanks ();
51f7ea36 209 locale_mb_cur_max = MB_CUR_MAX;
1f6ec1a8 210 u32reset ();
d3a24ed2 211 return r;
ccc6cda3
JA
212#else
213 return (1);
214#endif
215 }
216
217#if defined (HAVE_SETLOCALE)
218 else if (var[3] == 'C' && var[4] == 'T') /* LC_CTYPE */
219 {
d3a24ed2 220# if defined (LC_CTYPE)
ccc6cda3 221 if (lc_all == 0 || *lc_all == '\0')
d3a24ed2 222 {
547ef914 223 x = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE"));
d3a24ed2 224 locale_setblanks ();
51f7ea36 225 locale_mb_cur_max = MB_CUR_MAX;
1f6ec1a8 226 u32reset ();
d3a24ed2
CR
227 }
228# endif
ccc6cda3
JA
229 }
230 else if (var[3] == 'C' && var[4] == 'O') /* LC_COLLATE */
231 {
232# if defined (LC_COLLATE)
233 if (lc_all == 0 || *lc_all == '\0')
547ef914 234 x = setlocale (LC_COLLATE, get_locale_var ("LC_COLLATE"));
ccc6cda3
JA
235# endif /* LC_COLLATE */
236 }
237 else if (var[3] == 'M' && var[4] == 'E') /* LC_MESSAGES */
238 {
239# if defined (LC_MESSAGES)
240 if (lc_all == 0 || *lc_all == '\0')
547ef914 241 x = setlocale (LC_MESSAGES, get_locale_var ("LC_MESSAGES"));
ccc6cda3
JA
242# endif /* LC_MESSAGES */
243 }
f73dda09 244 else if (var[3] == 'N' && var[4] == 'U') /* LC_NUMERIC */
bb70624e
JA
245 {
246# if defined (LC_NUMERIC)
247 if (lc_all == 0 || *lc_all == '\0')
547ef914 248 x = setlocale (LC_NUMERIC, get_locale_var ("LC_NUMERIC"));
bb70624e
JA
249# endif /* LC_NUMERIC */
250 }
898cc92e
CR
251 else if (var[3] == 'T' && var[4] == 'I') /* LC_TIME */
252 {
253# if defined (LC_TIME)
254 if (lc_all == 0 || *lc_all == '\0')
547ef914 255 x = setlocale (LC_TIME, get_locale_var ("LC_TIME"));
898cc92e
CR
256# endif /* LC_TIME */
257 }
ccc6cda3 258#endif /* HAVE_SETLOCALE */
898cc92e 259
547ef914 260 if (x == 0)
db31fb26
CR
261 {
262 if (errno == 0)
263 internal_warning(_("setlocale: %s: cannot change locale (%s)"), var, get_locale_var (var));
264 else
265 internal_warning(_("setlocale: %s: cannot change locale (%s): %s"), var, get_locale_var (var), strerror (errno));
266 }
ccc6cda3 267
547ef914 268 return (x != 0);
ccc6cda3
JA
269}
270
d3a24ed2
CR
271/* Called when LANG is assigned a value. Tracks value in `lang'. Calls
272 reset_locale_vars() to reset any default values if LC_ALL is unset or
273 null. */
ccc6cda3
JA
274int
275set_lang (var, value)
276 char *var, *value;
277{
d3a24ed2
CR
278 FREE (lang);
279 if (value)
280 lang = savestring (value);
281 else
282 {
283 lang = (char *)xmalloc (1);
284 lang[0] = '\0';
285 }
22818c14 286
d3a24ed2 287 return ((lc_all == 0 || *lc_all == 0) ? reset_locale_vars () : 0);
ccc6cda3
JA
288}
289
d3ad40de
CR
290/* Set default values for LANG and LC_ALL. Default values for all other
291 locale-related variables depend on these. */
292void
293set_default_lang ()
294{
295 char *v;
296
297 v = get_string_value ("LC_ALL");
298 set_locale_var ("LC_ALL", v);
299
300 v = get_string_value ("LANG");
301 set_lang ("LANG", v);
302}
303
d3a24ed2
CR
304/* Get the value of one of the locale variables (LC_MESSAGES, LC_CTYPE).
305 The precedence is as POSIX.2 specifies: LC_ALL has precedence over
306 the specific locale variables, and LANG, if set, is used as the default. */
ccc6cda3
JA
307char *
308get_locale_var (var)
309 char *var;
310{
311 char *locale;
312
313 locale = lc_all;
314
d3a24ed2 315 if (locale == 0 || *locale == 0)
adc6cff5 316 locale = get_string_value (var); /* XXX - mem leak? */
d3a24ed2
CR
317 if (locale == 0 || *locale == 0)
318 locale = lang;
319 if (locale == 0 || *locale == 0)
d3ad40de
CR
320#if 0
321 locale = default_locale; /* system-dependent; not really portable. should it be "C"? */
322#else
323 locale = "";
324#endif
ccc6cda3
JA
325 return (locale);
326}
327
43cdcad8
CR
328/* Called to reset all of the locale variables to their appropriate values
329 if (and only if) LC_ALL has not been assigned a value. DO NOT CALL THIS
330 IF LC_ALL HAS BEEN ASSIGNED A VALUE. */
d3a24ed2
CR
331static int
332reset_locale_vars ()
333{
d3ad40de 334 char *t;
d3a24ed2 335#if defined (HAVE_SETLOCALE)
d3ad40de
CR
336 if (lang == 0 || *lang == '\0')
337 maybe_make_export_env (); /* trust that this will change environment for setlocale */
338 if (setlocale (LC_ALL, lang ? lang : "") == 0)
d3a24ed2
CR
339 return 0;
340
341# if defined (LC_CTYPE)
d3ad40de 342 t = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE"));
d3a24ed2
CR
343# endif
344# if defined (LC_COLLATE)
d3ad40de 345 t = setlocale (LC_COLLATE, get_locale_var ("LC_COLLATE"));
d3a24ed2
CR
346# endif
347# if defined (LC_MESSAGES)
d3ad40de 348 t = setlocale (LC_MESSAGES, get_locale_var ("LC_MESSAGES"));
d3a24ed2
CR
349# endif
350# if defined (LC_NUMERIC)
d3ad40de 351 t = setlocale (LC_NUMERIC, get_locale_var ("LC_NUMERIC"));
d3a24ed2 352# endif
898cc92e 353# if defined (LC_TIME)
d3ad40de 354 t = setlocale (LC_TIME, get_locale_var ("LC_TIME"));
898cc92e 355# endif
d3a24ed2
CR
356
357 locale_setblanks ();
51f7ea36 358 locale_mb_cur_max = MB_CUR_MAX;
1f6ec1a8 359 u32reset ();
d3a24ed2
CR
360
361#endif
362 return 1;
363}
364
ccc6cda3
JA
365/* Translate the contents of STRING, a $"..." quoted string, according
366 to the current locale. In the `C' or `POSIX' locale, or if gettext()
367 is not available, the passed string is returned unchanged. The
368 length of the translated string is returned in LENP, if non-null. */
369char *
370localetrans (string, len, lenp)
371 char *string;
372 int len, *lenp;
373{
374 char *locale, *t;
ccc6cda3
JA
375 char *translated;
376 int tlen;
ccc6cda3
JA
377
378 /* Don't try to translate null strings. */
379 if (string == 0 || *string == 0)
380 {
381 if (lenp)
28ef6c31 382 *lenp = 0;
ccc6cda3
JA
383 return ((char *)NULL);
384 }
385
ccc6cda3
JA
386 locale = get_locale_var ("LC_MESSAGES");
387
388 /* If we don't have setlocale() or the current locale is `C' or `POSIX',
389 just return the string. If we don't have gettext(), there's no use
390 doing anything else. */
ccc6cda3
JA
391 if (locale == 0 || locale[0] == '\0' ||
392 (locale[0] == 'C' && locale[1] == '\0') || STREQ (locale, "POSIX"))
ccc6cda3 393 {
f73dda09 394 t = (char *)xmalloc (len + 1);
ccc6cda3
JA
395 strcpy (t, string);
396 if (lenp)
397 *lenp = len;
398 return (t);
399 }
400
ccc6cda3 401 /* Now try to translate it. */
5e13499c
CR
402 if (default_domain && *default_domain)
403 translated = dgettext (default_domain, string);
404 else
405 translated = string;
406
ccc6cda3
JA
407 if (translated == string) /* gettext returns its argument if untranslatable */
408 {
f73dda09 409 t = (char *)xmalloc (len + 1);
ccc6cda3
JA
410 strcpy (t, string);
411 if (lenp)
412 *lenp = len;
413 }
414 else
415 {
ccc6cda3 416 tlen = strlen (translated);
f73dda09 417 t = (char *)xmalloc (tlen + 1);
ccc6cda3
JA
418 strcpy (t, translated);
419 if (lenp)
420 *lenp = tlen;
421 }
422 return (t);
ccc6cda3 423}
7117c2d2
JA
424
425/* Change a bash string into a string suitable for inclusion in a `po' file.
426 This backslash-escapes `"' and `\' and changes newlines into \\\n"\n". */
427char *
428mk_msgstr (string, foundnlp)
429 char *string;
430 int *foundnlp;
431{
432 register int c, len;
433 char *result, *r, *s;
434
435 for (len = 0, s = string; s && *s; s++)
436 {
437 len++;
438 if (*s == '"' || *s == '\\')
439 len++;
440 else if (*s == '\n')
441 len += 5;
442 }
443
444 r = result = (char *)xmalloc (len + 3);
445 *r++ = '"';
446
447 for (s = string; s && (c = *s); s++)
448 {
449 if (c == '\n') /* <NL> -> \n"<NL>" */
450 {
451 *r++ = '\\';
452 *r++ = 'n';
453 *r++ = '"';
454 *r++ = '\n';
455 *r++ = '"';
456 if (foundnlp)
457 *foundnlp = 1;
458 continue;
459 }
460 if (c == '"' || c == '\\')
461 *r++ = '\\';
462 *r++ = c;
463 }
464
465 *r++ = '"';
466 *r++ = '\0';
467
468 return result;
469}
470
471/* $"..." -- Translate the portion of STRING between START and END
472 according to current locale using gettext (if available) and return
473 the result. The caller will take care of leaving the quotes intact.
474 The string will be left without the leading `$' by the caller.
475 If translation is performed, the translated string will be double-quoted
476 by the caller. The length of the translated string is returned in LENP,
477 if non-null. */
478char *
479localeexpand (string, start, end, lineno, lenp)
480 char *string;
481 int start, end, lineno, *lenp;
482{
483 int len, tlen, foundnl;
484 char *temp, *t, *t2;
485
486 temp = (char *)xmalloc (end - start + 1);
487 for (tlen = 0, len = start; len < end; )
488 temp[tlen++] = string[len++];
489 temp[tlen] = '\0';
490
491 /* If we're just dumping translatable strings, don't do anything with the
5e13499c
CR
492 string itself, but if we're dumping in `po' file format, convert it into
493 a form more palatable to gettext(3) and friends by quoting `"' and `\'
494 with backslashes and converting <NL> into `\n"<NL>"'. If we find a
495 newline in TEMP, we first output a `msgid ""' line and then the
496 translated string; otherwise we output the `msgid' and translated
497 string all on one line. */
7117c2d2
JA
498 if (dump_translatable_strings)
499 {
500 if (dump_po_strings)
501 {
502 foundnl = 0;
503 t = mk_msgstr (temp, &foundnl);
504 t2 = foundnl ? "\"\"\n" : "";
505
506 printf ("#: %s:%d\nmsgid %s%s\nmsgstr \"\"\n",
507 yy_input_name (), lineno, t2, t);
508 free (t);
509 }
510 else
511 printf ("\"%s\"\n", temp);
512
513 if (lenp)
514 *lenp = tlen;
515 return (temp);
516 }
517 else if (*temp)
518 {
519 t = localetrans (temp, tlen, &len);
520 free (temp);
521 if (lenp)
522 *lenp = len;
523 return (t);
524 }
525 else
526 {
527 if (lenp)
528 *lenp = 0;
529 return (temp);
530 }
531}
d3a24ed2
CR
532
533/* Set every character in the <blank> character class to be a shell break
534 character for the lexical analyzer when the locale changes. */
535static void
536locale_setblanks ()
537{
538 int x;
539
540 for (x = 0; x < sh_syntabsiz; x++)
541 {
542 if (isblank (x))
d3ad40de 543 sh_syntaxtab[x] |= CSHBRK|CBLANK;
d3a24ed2 544 else if (member (x, shell_break_chars))
d3ad40de
CR
545 {
546 sh_syntaxtab[x] |= CSHBRK;
547 sh_syntaxtab[x] &= ~CBLANK;
548 }
d3a24ed2 549 else
d3ad40de 550 sh_syntaxtab[x] &= ~(CSHBRK|CBLANK);
d3a24ed2
CR
551 }
552}
22818c14
CR
553
554static int
555locale_isutf8 (lspec)
556 char *lspec;
557{
558 char *cp;
559
560#if HAVE_LANGINFO_CODESET
561 cp = nl_langinfo (CODESET);
562 return (STREQ (cp, "UTF-8") || STREQ (cp, "utf8"));
563#else
564 /* Take a shot */
565 return (strstr (lspec, "UTF-8") || strstr (lspec, "utf8"));
566#endif
567}