]> git.ipfire.org Git - thirdparty/bash.git/blob - locale.c
Bash-5.2 patch 10: slightly relax check for binary script files
[thirdparty/bash.git] / locale.c
1 /* locale.c - Miscellaneous internationalization functions. */
2
3 /* Copyright (C) 1996-2009,2012,2016-2021 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "config.h"
22
23 #include "bashtypes.h"
24
25 #if defined (HAVE_UNISTD_H)
26 # include <unistd.h>
27 #endif
28
29 #if HAVE_LANGINFO_CODESET
30 # include <langinfo.h>
31 #endif
32
33 #include "bashintl.h"
34 #include "bashansi.h"
35 #include <stdio.h>
36 #include "chartypes.h"
37 #include <errno.h>
38
39 #include "shell.h"
40 #include "input.h" /* For bash_input */
41
42 #ifndef errno
43 extern int errno;
44 #endif
45
46 int locale_utf8locale;
47 int locale_mb_cur_max; /* value of MB_CUR_MAX for current locale (LC_CTYPE) */
48 int locale_shiftstates = 0;
49
50 int singlequote_translations = 0; /* single-quote output of $"..." */
51
52 extern int dump_translatable_strings, dump_po_strings;
53
54 /* The current locale when the program begins */
55 static char *default_locale;
56
57 /* The current domain for textdomain(3). */
58 static char *default_domain;
59 static char *default_dir;
60
61 /* tracks the value of LC_ALL; used to override values for other locale
62 categories */
63 static char *lc_all;
64
65 /* tracks the value of LC_ALL; used to provide defaults for locale
66 categories */
67 static char *lang;
68
69 /* Called to reset all of the locale variables to their appropriate values
70 if (and only if) LC_ALL has not been assigned a value. */
71 static int reset_locale_vars PARAMS((void));
72
73 static void locale_setblanks PARAMS((void));
74 static int locale_isutf8 PARAMS((char *));
75
76 /* Set the value of default_locale and make the current locale the
77 system default locale. This should be called very early in main(). */
78 void
79 set_default_locale ()
80 {
81 #if defined (HAVE_SETLOCALE)
82 default_locale = setlocale (LC_ALL, "");
83 if (default_locale)
84 default_locale = savestring (default_locale);
85 #else
86 default_locale = savestring ("C");
87 #endif /* HAVE_SETLOCALE */
88 bindtextdomain (PACKAGE, LOCALEDIR);
89 textdomain (PACKAGE);
90
91 locale_mb_cur_max = MB_CUR_MAX;
92 locale_utf8locale = locale_isutf8 (default_locale);
93 #if defined (HANDLE_MULTIBYTE)
94 locale_shiftstates = mblen ((char *)NULL, 0);
95 #else
96 locale_shiftstates = 0;
97 #endif
98 }
99
100 /* Set default values for LC_CTYPE, LC_COLLATE, LC_MESSAGES, LC_NUMERIC and
101 LC_TIME if they are not specified in the environment, but LC_ALL is. This
102 should be called from main() after parsing the environment. */
103 void
104 set_default_locale_vars ()
105 {
106 char *val;
107
108 #if defined (HAVE_SETLOCALE)
109
110 # if defined (LC_CTYPE)
111 val = get_string_value ("LC_CTYPE");
112 if (val == 0 && lc_all && *lc_all)
113 {
114 setlocale (LC_CTYPE, lc_all);
115 locale_setblanks ();
116 locale_mb_cur_max = MB_CUR_MAX;
117 locale_utf8locale = locale_isutf8 (lc_all);
118
119 # if defined (HANDLE_MULTIBYTE)
120 locale_shiftstates = mblen ((char *)NULL, 0);
121 # else
122 locale_shiftstates = 0;
123 # endif
124
125 u32reset ();
126 }
127 # endif
128
129 # if defined (LC_COLLATE)
130 val = get_string_value ("LC_COLLATE");
131 if (val == 0 && lc_all && *lc_all)
132 setlocale (LC_COLLATE, lc_all);
133 # endif /* LC_COLLATE */
134
135 # if defined (LC_MESSAGES)
136 val = get_string_value ("LC_MESSAGES");
137 if (val == 0 && lc_all && *lc_all)
138 setlocale (LC_MESSAGES, lc_all);
139 # endif /* LC_MESSAGES */
140
141 # if defined (LC_NUMERIC)
142 val = get_string_value ("LC_NUMERIC");
143 if (val == 0 && lc_all && *lc_all)
144 setlocale (LC_NUMERIC, lc_all);
145 # endif /* LC_NUMERIC */
146
147 # if defined (LC_TIME)
148 val = get_string_value ("LC_TIME");
149 if (val == 0 && lc_all && *lc_all)
150 setlocale (LC_TIME, lc_all);
151 # endif /* LC_TIME */
152
153 #endif /* HAVE_SETLOCALE */
154
155 val = get_string_value ("TEXTDOMAIN");
156 if (val && *val)
157 {
158 FREE (default_domain);
159 default_domain = savestring (val);
160 if (default_dir && *default_dir)
161 bindtextdomain (default_domain, default_dir);
162 }
163
164 val = get_string_value ("TEXTDOMAINDIR");
165 if (val && *val)
166 {
167 FREE (default_dir);
168 default_dir = savestring (val);
169 if (default_domain && *default_domain)
170 bindtextdomain (default_domain, default_dir);
171 }
172 }
173
174 /* Set one of the locale categories (specified by VAR) to VALUE. Returns 1
175 if successful, 0 otherwise. */
176 int
177 set_locale_var (var, value)
178 char *var, *value;
179 {
180 int r;
181 char *x;
182
183 x = "";
184 errno = 0;
185 if (var[0] == 'T' && var[10] == 0) /* TEXTDOMAIN */
186 {
187 FREE (default_domain);
188 default_domain = value ? savestring (value) : (char *)NULL;
189 if (default_dir && *default_dir)
190 bindtextdomain (default_domain, default_dir);
191 return (1);
192 }
193 else if (var[0] == 'T') /* TEXTDOMAINDIR */
194 {
195 FREE (default_dir);
196 default_dir = value ? savestring (value) : (char *)NULL;
197 if (default_domain && *default_domain)
198 bindtextdomain (default_domain, default_dir);
199 return (1);
200 }
201
202 /* var[0] == 'L' && var[1] == 'C' && var[2] == '_' */
203
204 else if (var[3] == 'A') /* LC_ALL */
205 {
206 FREE (lc_all);
207 if (value)
208 lc_all = savestring (value);
209 else
210 {
211 lc_all = (char *)xmalloc (1);
212 lc_all[0] = '\0';
213 }
214 #if defined (HAVE_SETLOCALE)
215 r = *lc_all ? ((x = setlocale (LC_ALL, lc_all)) != 0) : reset_locale_vars ();
216 if (x == 0)
217 {
218 if (errno == 0)
219 internal_warning(_("setlocale: LC_ALL: cannot change locale (%s)"), lc_all);
220 else
221 internal_warning(_("setlocale: LC_ALL: cannot change locale (%s): %s"), lc_all, strerror (errno));
222 }
223 locale_setblanks ();
224 locale_mb_cur_max = MB_CUR_MAX;
225 /* if LC_ALL == "", reset_locale_vars has already called this */
226 if (*lc_all && x)
227 locale_utf8locale = locale_isutf8 (lc_all);
228 # if defined (HANDLE_MULTIBYTE)
229 locale_shiftstates = mblen ((char *)NULL, 0);
230 # else
231 locale_shiftstates = 0;
232 # endif
233 u32reset ();
234 return r;
235 #else
236 return (1);
237 #endif
238 }
239
240 #if defined (HAVE_SETLOCALE)
241 else if (var[3] == 'C' && var[4] == 'T') /* LC_CTYPE */
242 {
243 # if defined (LC_CTYPE)
244 if (lc_all == 0 || *lc_all == '\0')
245 {
246 x = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE"));
247 locale_setblanks ();
248 locale_mb_cur_max = MB_CUR_MAX;
249 /* if setlocale() returns NULL, the locale is not changed */
250 if (x)
251 locale_utf8locale = locale_isutf8 (x);
252 #if defined (HANDLE_MULTIBYTE)
253 locale_shiftstates = mblen ((char *)NULL, 0);
254 #else
255 locale_shiftstates = 0;
256 #endif
257 u32reset ();
258 }
259 # endif
260 }
261 else if (var[3] == 'C' && var[4] == 'O') /* LC_COLLATE */
262 {
263 # if defined (LC_COLLATE)
264 if (lc_all == 0 || *lc_all == '\0')
265 x = setlocale (LC_COLLATE, get_locale_var ("LC_COLLATE"));
266 # endif /* LC_COLLATE */
267 }
268 else if (var[3] == 'M' && var[4] == 'E') /* LC_MESSAGES */
269 {
270 # if defined (LC_MESSAGES)
271 if (lc_all == 0 || *lc_all == '\0')
272 x = setlocale (LC_MESSAGES, get_locale_var ("LC_MESSAGES"));
273 # endif /* LC_MESSAGES */
274 }
275 else if (var[3] == 'N' && var[4] == 'U') /* LC_NUMERIC */
276 {
277 # if defined (LC_NUMERIC)
278 if (lc_all == 0 || *lc_all == '\0')
279 x = setlocale (LC_NUMERIC, get_locale_var ("LC_NUMERIC"));
280 # endif /* LC_NUMERIC */
281 }
282 else if (var[3] == 'T' && var[4] == 'I') /* LC_TIME */
283 {
284 # if defined (LC_TIME)
285 if (lc_all == 0 || *lc_all == '\0')
286 x = setlocale (LC_TIME, get_locale_var ("LC_TIME"));
287 # endif /* LC_TIME */
288 }
289 #endif /* HAVE_SETLOCALE */
290
291 if (x == 0)
292 {
293 if (errno == 0)
294 internal_warning(_("setlocale: %s: cannot change locale (%s)"), var, get_locale_var (var));
295 else
296 internal_warning(_("setlocale: %s: cannot change locale (%s): %s"), var, get_locale_var (var), strerror (errno));
297 }
298
299 return (x != 0);
300 }
301
302 /* Called when LANG is assigned a value. Tracks value in `lang'. Calls
303 reset_locale_vars() to reset any default values if LC_ALL is unset or
304 null. */
305 int
306 set_lang (var, value)
307 char *var, *value;
308 {
309 FREE (lang);
310 if (value)
311 lang = savestring (value);
312 else
313 {
314 lang = (char *)xmalloc (1);
315 lang[0] = '\0';
316 }
317
318 return ((lc_all == 0 || *lc_all == 0) ? reset_locale_vars () : 0);
319 }
320
321 /* Set default values for LANG and LC_ALL. Default values for all other
322 locale-related variables depend on these. */
323 void
324 set_default_lang ()
325 {
326 char *v;
327
328 v = get_string_value ("LC_ALL");
329 set_locale_var ("LC_ALL", v);
330
331 v = get_string_value ("LANG");
332 set_lang ("LANG", v);
333 }
334
335 /* Get the value of one of the locale variables (LC_MESSAGES, LC_CTYPE).
336 The precedence is as POSIX.2 specifies: LC_ALL has precedence over
337 the specific locale variables, and LANG, if set, is used as the default. */
338 char *
339 get_locale_var (var)
340 char *var;
341 {
342 char *locale;
343
344 locale = lc_all;
345
346 if (locale == 0 || *locale == 0)
347 locale = get_string_value (var); /* XXX - no mem leak */
348 if (locale == 0 || *locale == 0)
349 locale = lang;
350 if (locale == 0 || *locale == 0)
351 #if 0
352 locale = default_locale; /* system-dependent; not really portable. should it be "C"? */
353 #else
354 locale = "";
355 #endif
356 return (locale);
357 }
358
359 /* Called to reset all of the locale variables to their appropriate values
360 if (and only if) LC_ALL has not been assigned a value. DO NOT CALL THIS
361 IF LC_ALL HAS BEEN ASSIGNED A VALUE. */
362 static int
363 reset_locale_vars ()
364 {
365 char *t, *x;
366 #if defined (HAVE_SETLOCALE)
367 if (lang == 0 || *lang == '\0')
368 maybe_make_export_env (); /* trust that this will change environment for setlocale */
369 if (setlocale (LC_ALL, lang ? lang : "") == 0)
370 return 0;
371
372 x = 0;
373 # if defined (LC_CTYPE)
374 x = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE"));
375 # endif
376 # if defined (LC_COLLATE)
377 t = setlocale (LC_COLLATE, get_locale_var ("LC_COLLATE"));
378 # endif
379 # if defined (LC_MESSAGES)
380 t = setlocale (LC_MESSAGES, get_locale_var ("LC_MESSAGES"));
381 # endif
382 # if defined (LC_NUMERIC)
383 t = setlocale (LC_NUMERIC, get_locale_var ("LC_NUMERIC"));
384 # endif
385 # if defined (LC_TIME)
386 t = setlocale (LC_TIME, get_locale_var ("LC_TIME"));
387 # endif
388
389 locale_setblanks ();
390 locale_mb_cur_max = MB_CUR_MAX;
391 if (x)
392 locale_utf8locale = locale_isutf8 (x);
393 # if defined (HANDLE_MULTIBYTE)
394 locale_shiftstates = mblen ((char *)NULL, 0);
395 # else
396 locale_shiftstates = 0;
397 # endif
398 u32reset ();
399 #endif
400 return 1;
401 }
402
403 #if defined (TRANSLATABLE_STRINGS)
404 /* Translate the contents of STRING, a $"..." quoted string, according
405 to the current locale. In the `C' or `POSIX' locale, or if gettext()
406 is not available, the passed string is returned unchanged. The
407 length of the translated string is returned in LENP, if non-null. */
408 char *
409 localetrans (string, len, lenp)
410 char *string;
411 int len, *lenp;
412 {
413 char *locale, *t;
414 char *translated;
415 int tlen;
416
417 /* Don't try to translate null strings. */
418 if (string == 0 || *string == 0)
419 {
420 if (lenp)
421 *lenp = 0;
422 return ((char *)NULL);
423 }
424
425 locale = get_locale_var ("LC_MESSAGES");
426
427 /* If we don't have setlocale() or the current locale is `C' or `POSIX',
428 just return the string. If we don't have gettext(), there's no use
429 doing anything else. */
430 if (locale == 0 || locale[0] == '\0' ||
431 (locale[0] == 'C' && locale[1] == '\0') || STREQ (locale, "POSIX"))
432 {
433 t = (char *)xmalloc (len + 1);
434 strcpy (t, string);
435 if (lenp)
436 *lenp = len;
437 return (t);
438 }
439
440 /* Now try to translate it. */
441 if (default_domain && *default_domain)
442 translated = dgettext (default_domain, string);
443 else
444 translated = string;
445
446 if (translated == string) /* gettext returns its argument if untranslatable */
447 {
448 t = (char *)xmalloc (len + 1);
449 strcpy (t, string);
450 if (lenp)
451 *lenp = len;
452 }
453 else
454 {
455 tlen = strlen (translated);
456 t = (char *)xmalloc (tlen + 1);
457 strcpy (t, translated);
458 if (lenp)
459 *lenp = tlen;
460 }
461 return (t);
462 }
463
464 /* Change a bash string into a string suitable for inclusion in a `po' file.
465 This backslash-escapes `"' and `\' and changes newlines into \\\n"\n". */
466 char *
467 mk_msgstr (string, foundnlp)
468 char *string;
469 int *foundnlp;
470 {
471 register int c, len;
472 char *result, *r, *s;
473
474 for (len = 0, s = string; s && *s; s++)
475 {
476 len++;
477 if (*s == '"' || *s == '\\')
478 len++;
479 else if (*s == '\n')
480 len += 5;
481 }
482
483 r = result = (char *)xmalloc (len + 3);
484 *r++ = '"';
485
486 for (s = string; s && (c = *s); s++)
487 {
488 if (c == '\n') /* <NL> -> \n"<NL>" */
489 {
490 *r++ = '\\';
491 *r++ = 'n';
492 *r++ = '"';
493 *r++ = '\n';
494 *r++ = '"';
495 if (foundnlp)
496 *foundnlp = 1;
497 continue;
498 }
499 if (c == '"' || c == '\\')
500 *r++ = '\\';
501 *r++ = c;
502 }
503
504 *r++ = '"';
505 *r++ = '\0';
506
507 return result;
508 }
509
510 /* $"..." -- Translate the portion of STRING between START and END
511 according to current locale using gettext (if available) and return
512 the result. The caller will take care of leaving the quotes intact.
513 The string will be left without the leading `$' by the caller.
514 If translation is performed, the translated string will be double-quoted
515 by the caller. The length of the translated string is returned in LENP,
516 if non-null. */
517 char *
518 locale_expand (string, start, end, lineno, lenp)
519 char *string;
520 int start, end, lineno, *lenp;
521 {
522 int len, tlen, foundnl;
523 char *temp, *t, *t2;
524
525 temp = (char *)xmalloc (end - start + 1);
526 for (tlen = 0, len = start; len < end; )
527 temp[tlen++] = string[len++];
528 temp[tlen] = '\0';
529
530 /* If we're just dumping translatable strings, don't do anything with the
531 string itself, but if we're dumping in `po' file format, convert it into
532 a form more palatable to gettext(3) and friends by quoting `"' and `\'
533 with backslashes and converting <NL> into `\n"<NL>"'. If we find a
534 newline in TEMP, we first output a `msgid ""' line and then the
535 translated string; otherwise we output the `msgid' and translated
536 string all on one line. */
537 if (dump_translatable_strings)
538 {
539 if (dump_po_strings)
540 {
541 foundnl = 0;
542 t = mk_msgstr (temp, &foundnl);
543 t2 = foundnl ? "\"\"\n" : "";
544
545 printf ("#: %s:%d\nmsgid %s%s\nmsgstr \"\"\n",
546 yy_input_name (), lineno, t2, t);
547 free (t);
548 }
549 else
550 printf ("\"%s\"\n", temp);
551
552 if (lenp)
553 *lenp = tlen;
554 return (temp);
555 }
556 else if (*temp)
557 {
558 t = localetrans (temp, tlen, &len);
559 free (temp);
560 if (lenp)
561 *lenp = len;
562 return (t);
563 }
564 else
565 {
566 if (lenp)
567 *lenp = 0;
568 return (temp);
569 }
570 }
571 #endif
572
573 /* Set every character in the <blank> character class to be a shell break
574 character for the lexical analyzer when the locale changes. */
575 static void
576 locale_setblanks ()
577 {
578 int x;
579
580 for (x = 0; x < sh_syntabsiz; x++)
581 {
582 if (isblank ((unsigned char)x))
583 sh_syntaxtab[x] |= CSHBRK|CBLANK;
584 else if (member (x, shell_break_chars))
585 {
586 sh_syntaxtab[x] |= CSHBRK;
587 sh_syntaxtab[x] &= ~CBLANK;
588 }
589 else
590 sh_syntaxtab[x] &= ~(CSHBRK|CBLANK);
591 }
592 }
593
594 /* Parse a locale specification
595 language[_territory][.codeset][@modifier][+special][,[sponsor][_revision]]
596 and return TRUE if the codeset is UTF-8 or utf8 */
597 static int
598 locale_isutf8 (lspec)
599 char *lspec;
600 {
601 char *cp, *encoding;
602
603 #if HAVE_LANGINFO_CODESET
604 cp = nl_langinfo (CODESET);
605 return (STREQ (cp, "UTF-8") || STREQ (cp, "utf8"));
606 #elif HAVE_LOCALE_CHARSET
607 cp = locale_charset ();
608 return (STREQ (cp, "UTF-8") || STREQ (cp, "utf8"));
609 #else
610 /* Take a shot */
611 for (cp = lspec; *cp && *cp != '@' && *cp != '+' && *cp != ','; cp++)
612 {
613 if (*cp == '.')
614 {
615 for (encoding = ++cp; *cp && *cp != '@' && *cp != '+' && *cp != ','; cp++)
616 ;
617 /* The encoding (codeset) is the substring between encoding and cp */
618 if ((cp - encoding == 5 && STREQN (encoding, "UTF-8", 5)) ||
619 (cp - encoding == 4 && STREQN (encoding, "utf8", 4)))
620 return 1;
621 else
622 return 0;
623 }
624 }
625 return 0;
626 #endif
627 }
628
629 #if defined (HAVE_LOCALECONV)
630 int
631 locale_decpoint ()
632 {
633 struct lconv *lv;
634
635 lv = localeconv ();
636 return (lv && lv->decimal_point && lv->decimal_point[0]) ? lv->decimal_point[0] : '.';
637 }
638 #else
639 # undef locale_decpoint
640 int
641 locale_decpoint ()
642 {
643 return '.';
644 }
645 #endif