]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/locale-util.c
Merge pull request #13807 from 1848/ip6gre_key_fix
[thirdparty/systemd.git] / src / basic / locale-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <dirent.h>
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <ftw.h>
7 #include <langinfo.h>
8 #include <libintl.h>
9 #include <locale.h>
10 #include <stddef.h>
11 #include <stdint.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <sys/stat.h>
16
17 #include "def.h"
18 #include "dirent-util.h"
19 #include "env-util.h"
20 #include "fd-util.h"
21 #include "hashmap.h"
22 #include "locale-util.h"
23 #include "path-util.h"
24 #include "set.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "strv.h"
28 #include "utf8.h"
29
30 static char *normalize_locale(const char *name) {
31 const char *e;
32
33 /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
34 * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
35 * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
36 * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
37 * that for UTF-8 however, since it's kinda the only charset that matters. */
38
39 e = endswith(name, ".utf8");
40 if (e) {
41 _cleanup_free_ char *prefix = NULL;
42
43 prefix = strndup(name, e - name);
44 if (!prefix)
45 return NULL;
46
47 return strjoin(prefix, ".UTF-8");
48 }
49
50 e = strstr(name, ".utf8@");
51 if (e) {
52 _cleanup_free_ char *prefix = NULL;
53
54 prefix = strndup(name, e - name);
55 if (!prefix)
56 return NULL;
57
58 return strjoin(prefix, ".UTF-8@", e + 6);
59 }
60
61 return strdup(name);
62 }
63
64 static int add_locales_from_archive(Set *locales) {
65 /* Stolen from glibc... */
66
67 struct locarhead {
68 uint32_t magic;
69 /* Serial number. */
70 uint32_t serial;
71 /* Name hash table. */
72 uint32_t namehash_offset;
73 uint32_t namehash_used;
74 uint32_t namehash_size;
75 /* String table. */
76 uint32_t string_offset;
77 uint32_t string_used;
78 uint32_t string_size;
79 /* Table with locale records. */
80 uint32_t locrectab_offset;
81 uint32_t locrectab_used;
82 uint32_t locrectab_size;
83 /* MD5 sum hash table. */
84 uint32_t sumhash_offset;
85 uint32_t sumhash_used;
86 uint32_t sumhash_size;
87 };
88
89 struct namehashent {
90 /* Hash value of the name. */
91 uint32_t hashval;
92 /* Offset of the name in the string table. */
93 uint32_t name_offset;
94 /* Offset of the locale record. */
95 uint32_t locrec_offset;
96 };
97
98 const struct locarhead *h;
99 const struct namehashent *e;
100 const void *p = MAP_FAILED;
101 _cleanup_close_ int fd = -1;
102 size_t sz = 0;
103 struct stat st;
104 size_t i;
105 int r;
106
107 fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
108 if (fd < 0)
109 return errno == ENOENT ? 0 : -errno;
110
111 if (fstat(fd, &st) < 0)
112 return -errno;
113
114 if (!S_ISREG(st.st_mode))
115 return -EBADMSG;
116
117 if (st.st_size < (off_t) sizeof(struct locarhead))
118 return -EBADMSG;
119
120 p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
121 if (p == MAP_FAILED)
122 return -errno;
123
124 h = (const struct locarhead *) p;
125 if (h->magic != 0xde020109 ||
126 h->namehash_offset + h->namehash_size > st.st_size ||
127 h->string_offset + h->string_size > st.st_size ||
128 h->locrectab_offset + h->locrectab_size > st.st_size ||
129 h->sumhash_offset + h->sumhash_size > st.st_size) {
130 r = -EBADMSG;
131 goto finish;
132 }
133
134 e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
135 for (i = 0; i < h->namehash_size; i++) {
136 char *z;
137
138 if (e[i].locrec_offset == 0)
139 continue;
140
141 if (!utf8_is_valid((char*) p + e[i].name_offset))
142 continue;
143
144 z = normalize_locale((char*) p + e[i].name_offset);
145 if (!z) {
146 r = -ENOMEM;
147 goto finish;
148 }
149
150 r = set_consume(locales, z);
151 if (r < 0)
152 goto finish;
153 }
154
155 r = 0;
156
157 finish:
158 if (p != MAP_FAILED)
159 munmap((void*) p, sz);
160
161 return r;
162 }
163
164 static int add_locales_from_libdir (Set *locales) {
165 _cleanup_closedir_ DIR *dir = NULL;
166 struct dirent *entry;
167 int r;
168
169 dir = opendir("/usr/lib/locale");
170 if (!dir)
171 return errno == ENOENT ? 0 : -errno;
172
173 FOREACH_DIRENT(entry, dir, return -errno) {
174 char *z;
175
176 dirent_ensure_type(dir, entry);
177
178 if (entry->d_type != DT_DIR)
179 continue;
180
181 z = normalize_locale(entry->d_name);
182 if (!z)
183 return -ENOMEM;
184
185 r = set_consume(locales, z);
186 if (r < 0 && r != -EEXIST)
187 return r;
188 }
189
190 return 0;
191 }
192
193 int get_locales(char ***ret) {
194 _cleanup_set_free_ Set *locales = NULL;
195 _cleanup_strv_free_ char **l = NULL;
196 int r;
197
198 locales = set_new(&string_hash_ops);
199 if (!locales)
200 return -ENOMEM;
201
202 r = add_locales_from_archive(locales);
203 if (r < 0 && r != -ENOENT)
204 return r;
205
206 r = add_locales_from_libdir(locales);
207 if (r < 0)
208 return r;
209
210 l = set_get_strv(locales);
211 if (!l)
212 return -ENOMEM;
213
214 r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
215 if (r == -ENXIO || r == 0) {
216 char **a, **b;
217
218 /* Filter out non-UTF-8 locales, because it's 2019, by default */
219 for (a = b = l; *a; a++) {
220
221 if (endswith(*a, "UTF-8") ||
222 strstr(*a, ".UTF-8@"))
223 *(b++) = *a;
224 else
225 free(*a);
226 }
227
228 *b = NULL;
229
230 } else if (r < 0)
231 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
232
233 strv_sort(l);
234
235 *ret = TAKE_PTR(l);
236
237 return 0;
238 }
239
240 bool locale_is_valid(const char *name) {
241
242 if (isempty(name))
243 return false;
244
245 if (strlen(name) >= 128)
246 return false;
247
248 if (!utf8_is_valid(name))
249 return false;
250
251 if (!filename_is_valid(name))
252 return false;
253
254 if (!string_is_safe(name))
255 return false;
256
257 return true;
258 }
259
260 void init_gettext(void) {
261 setlocale(LC_ALL, "");
262 textdomain(GETTEXT_PACKAGE);
263 }
264
265 bool is_locale_utf8(void) {
266 const char *set;
267 static int cached_answer = -1;
268
269 /* Note that we default to 'true' here, since today UTF8 is
270 * pretty much supported everywhere. */
271
272 if (cached_answer >= 0)
273 goto out;
274
275 if (!setlocale(LC_ALL, "")) {
276 cached_answer = true;
277 goto out;
278 }
279
280 set = nl_langinfo(CODESET);
281 if (!set) {
282 cached_answer = true;
283 goto out;
284 }
285
286 if (streq(set, "UTF-8")) {
287 cached_answer = true;
288 goto out;
289 }
290
291 /* For LC_CTYPE=="C" return true, because CTYPE is effectively
292 * unset and everything can do to UTF-8 nowadays. */
293 set = setlocale(LC_CTYPE, NULL);
294 if (!set) {
295 cached_answer = true;
296 goto out;
297 }
298
299 /* Check result, but ignore the result if C was set
300 * explicitly. */
301 cached_answer =
302 STR_IN_SET(set, "C", "POSIX") &&
303 !getenv("LC_ALL") &&
304 !getenv("LC_CTYPE") &&
305 !getenv("LANG");
306
307 out:
308 return (bool) cached_answer;
309 }
310
311 static bool emoji_enabled(void) {
312 static int cached_emoji_enabled = -1;
313
314 if (cached_emoji_enabled < 0) {
315 int val;
316
317 val = getenv_bool("SYSTEMD_EMOJI");
318 if (val < 0)
319 cached_emoji_enabled =
320 is_locale_utf8() &&
321 !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
322 else
323 cached_emoji_enabled = val;
324 }
325
326 return cached_emoji_enabled;
327 }
328
329 const char *special_glyph(SpecialGlyph code) {
330
331 /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
332 * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
333 * works reasonably well on the Linux console. For details see:
334 *
335 * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
336 */
337
338 static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
339 /* ASCII fallback */
340 [false] = {
341 [SPECIAL_GLYPH_TREE_VERTICAL] = "| ",
342 [SPECIAL_GLYPH_TREE_BRANCH] = "|-",
343 [SPECIAL_GLYPH_TREE_RIGHT] = "`-",
344 [SPECIAL_GLYPH_TREE_SPACE] = " ",
345 [SPECIAL_GLYPH_TRIANGULAR_BULLET] = ">",
346 [SPECIAL_GLYPH_BLACK_CIRCLE] = "*",
347 [SPECIAL_GLYPH_BULLET] = "*",
348 [SPECIAL_GLYPH_ARROW] = "->",
349 [SPECIAL_GLYPH_MDASH] = "-",
350 [SPECIAL_GLYPH_ELLIPSIS] = "...",
351 [SPECIAL_GLYPH_MU] = "u",
352 [SPECIAL_GLYPH_CHECK_MARK] = "+",
353 [SPECIAL_GLYPH_CROSS_MARK] = "-",
354 [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]",
355 [SPECIAL_GLYPH_HAPPY_SMILEY] = ":-}",
356 [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = ":-)",
357 [SPECIAL_GLYPH_NEUTRAL_SMILEY] = ":-|",
358 [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
359 [SPECIAL_GLYPH_UNHAPPY_SMILEY] = ":-{",
360 [SPECIAL_GLYPH_DEPRESSED_SMILEY] = ":-[",
361 },
362
363 /* UTF-8 */
364 [true] = {
365 [SPECIAL_GLYPH_TREE_VERTICAL] = "\342\224\202 ", /* │ */
366 [SPECIAL_GLYPH_TREE_BRANCH] = "\342\224\234\342\224\200", /* ├─ */
367 [SPECIAL_GLYPH_TREE_RIGHT] = "\342\224\224\342\224\200", /* └─ */
368 [SPECIAL_GLYPH_TREE_SPACE] = " ", /* */
369 [SPECIAL_GLYPH_TRIANGULAR_BULLET] = "\342\200\243", /* ‣ */
370 [SPECIAL_GLYPH_BLACK_CIRCLE] = "\342\227\217", /* ● */
371 [SPECIAL_GLYPH_BULLET] = "\342\200\242", /* • */
372 [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → */
373 [SPECIAL_GLYPH_MDASH] = "\342\200\223", /* – */
374 [SPECIAL_GLYPH_ELLIPSIS] = "\342\200\246", /* … */
375 [SPECIAL_GLYPH_MU] = "\316\274", /* μ */
376 [SPECIAL_GLYPH_CHECK_MARK] = "\342\234\223", /* ✓ */
377 [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ */
378 [SPECIAL_GLYPH_ECSTATIC_SMILEY] = "\360\237\230\207", /* 😇 */
379 [SPECIAL_GLYPH_HAPPY_SMILEY] = "\360\237\230\200", /* 😀 */
380 [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = "\360\237\231\202", /* 🙂 */
381 [SPECIAL_GLYPH_NEUTRAL_SMILEY] = "\360\237\230\220", /* 😐 */
382 [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201", /* 🙁 */
383 [SPECIAL_GLYPH_UNHAPPY_SMILEY] = "\360\237\230\250", /* 😨️️ */
384 [SPECIAL_GLYPH_DEPRESSED_SMILEY] = "\360\237\244\242", /* 🤢 */
385 },
386 };
387
388 assert(code < _SPECIAL_GLYPH_MAX);
389
390 return draw_table[code >= _SPECIAL_GLYPH_FIRST_SMILEY ? emoji_enabled() : is_locale_utf8()][code];
391 }
392
393 void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
394 LocaleVariable i;
395
396 if (!l)
397 return;
398
399 for (i = 0; i < _VARIABLE_LC_MAX; i++)
400 l[i] = mfree(l[i]);
401 }
402
403 static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
404 [VARIABLE_LANG] = "LANG",
405 [VARIABLE_LANGUAGE] = "LANGUAGE",
406 [VARIABLE_LC_CTYPE] = "LC_CTYPE",
407 [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
408 [VARIABLE_LC_TIME] = "LC_TIME",
409 [VARIABLE_LC_COLLATE] = "LC_COLLATE",
410 [VARIABLE_LC_MONETARY] = "LC_MONETARY",
411 [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
412 [VARIABLE_LC_PAPER] = "LC_PAPER",
413 [VARIABLE_LC_NAME] = "LC_NAME",
414 [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
415 [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
416 [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
417 [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
418 };
419
420 DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);