]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/locale-util.c
Merge pull request #20346 from poettering/strlen-unsigned-fix
[thirdparty/systemd.git] / src / basic / locale-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <ftw.h>
6 #include <langinfo.h>
7 #include <libintl.h>
8 #include <stddef.h>
9 #include <stdint.h>
10 #include <stdlib.h>
11 #include <sys/mman.h>
12 #include <sys/stat.h>
13
14 #include "def.h"
15 #include "dirent-util.h"
16 #include "env-util.h"
17 #include "fd-util.h"
18 #include "hashmap.h"
19 #include "locale-util.h"
20 #include "path-util.h"
21 #include "set.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "strv.h"
25 #include "utf8.h"
26
27 static char *normalize_locale(const char *name) {
28 const char *e;
29
30 /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
31 * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
32 * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
33 * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
34 * that for UTF-8 however, since it's kinda the only charset that matters. */
35
36 e = endswith(name, ".utf8");
37 if (e) {
38 _cleanup_free_ char *prefix = NULL;
39
40 prefix = strndup(name, e - name);
41 if (!prefix)
42 return NULL;
43
44 return strjoin(prefix, ".UTF-8");
45 }
46
47 e = strstr(name, ".utf8@");
48 if (e) {
49 _cleanup_free_ char *prefix = NULL;
50
51 prefix = strndup(name, e - name);
52 if (!prefix)
53 return NULL;
54
55 return strjoin(prefix, ".UTF-8@", e + 6);
56 }
57
58 return strdup(name);
59 }
60
61 static int add_locales_from_archive(Set *locales) {
62 /* Stolen from glibc... */
63
64 struct locarhead {
65 uint32_t magic;
66 /* Serial number. */
67 uint32_t serial;
68 /* Name hash table. */
69 uint32_t namehash_offset;
70 uint32_t namehash_used;
71 uint32_t namehash_size;
72 /* String table. */
73 uint32_t string_offset;
74 uint32_t string_used;
75 uint32_t string_size;
76 /* Table with locale records. */
77 uint32_t locrectab_offset;
78 uint32_t locrectab_used;
79 uint32_t locrectab_size;
80 /* MD5 sum hash table. */
81 uint32_t sumhash_offset;
82 uint32_t sumhash_used;
83 uint32_t sumhash_size;
84 };
85
86 struct namehashent {
87 /* Hash value of the name. */
88 uint32_t hashval;
89 /* Offset of the name in the string table. */
90 uint32_t name_offset;
91 /* Offset of the locale record. */
92 uint32_t locrec_offset;
93 };
94
95 const struct locarhead *h;
96 const struct namehashent *e;
97 const void *p = MAP_FAILED;
98 _cleanup_close_ int fd = -1;
99 size_t sz = 0;
100 struct stat st;
101 int r;
102
103 fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
104 if (fd < 0)
105 return errno == ENOENT ? 0 : -errno;
106
107 if (fstat(fd, &st) < 0)
108 return -errno;
109
110 if (!S_ISREG(st.st_mode))
111 return -EBADMSG;
112
113 if (st.st_size < (off_t) sizeof(struct locarhead))
114 return -EBADMSG;
115
116 p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
117 if (p == MAP_FAILED)
118 return -errno;
119
120 h = (const struct locarhead *) p;
121 if (h->magic != 0xde020109 ||
122 h->namehash_offset + h->namehash_size > st.st_size ||
123 h->string_offset + h->string_size > st.st_size ||
124 h->locrectab_offset + h->locrectab_size > st.st_size ||
125 h->sumhash_offset + h->sumhash_size > st.st_size) {
126 r = -EBADMSG;
127 goto finish;
128 }
129
130 e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
131 for (size_t i = 0; i < h->namehash_size; i++) {
132 char *z;
133
134 if (e[i].locrec_offset == 0)
135 continue;
136
137 if (!utf8_is_valid((char*) p + e[i].name_offset))
138 continue;
139
140 z = normalize_locale((char*) p + e[i].name_offset);
141 if (!z) {
142 r = -ENOMEM;
143 goto finish;
144 }
145
146 r = set_consume(locales, z);
147 if (r < 0)
148 goto finish;
149 }
150
151 r = 0;
152
153 finish:
154 if (p != MAP_FAILED)
155 munmap((void*) p, sz);
156
157 return r;
158 }
159
160 static int add_locales_from_libdir (Set *locales) {
161 _cleanup_closedir_ DIR *dir = NULL;
162 struct dirent *entry;
163 int r;
164
165 dir = opendir("/usr/lib/locale");
166 if (!dir)
167 return errno == ENOENT ? 0 : -errno;
168
169 FOREACH_DIRENT(entry, dir, return -errno) {
170 char *z;
171
172 if (entry->d_type != DT_DIR)
173 continue;
174
175 z = normalize_locale(entry->d_name);
176 if (!z)
177 return -ENOMEM;
178
179 r = set_consume(locales, z);
180 if (r < 0 && r != -EEXIST)
181 return r;
182 }
183
184 return 0;
185 }
186
187 int get_locales(char ***ret) {
188 _cleanup_set_free_ Set *locales = NULL;
189 _cleanup_strv_free_ char **l = NULL;
190 int r;
191
192 locales = set_new(&string_hash_ops);
193 if (!locales)
194 return -ENOMEM;
195
196 r = add_locales_from_archive(locales);
197 if (r < 0 && r != -ENOENT)
198 return r;
199
200 r = add_locales_from_libdir(locales);
201 if (r < 0)
202 return r;
203
204 l = set_get_strv(locales);
205 if (!l)
206 return -ENOMEM;
207
208 r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
209 if (r == -ENXIO || r == 0) {
210 char **a, **b;
211
212 /* Filter out non-UTF-8 locales, because it's 2019, by default */
213 for (a = b = l; *a; a++) {
214
215 if (endswith(*a, "UTF-8") ||
216 strstr(*a, ".UTF-8@"))
217 *(b++) = *a;
218 else
219 free(*a);
220 }
221
222 *b = NULL;
223
224 } else if (r < 0)
225 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
226
227 strv_sort(l);
228
229 *ret = TAKE_PTR(l);
230
231 return 0;
232 }
233
234 bool locale_is_valid(const char *name) {
235
236 if (isempty(name))
237 return false;
238
239 if (strlen(name) >= 128)
240 return false;
241
242 if (!utf8_is_valid(name))
243 return false;
244
245 if (!filename_is_valid(name))
246 return false;
247
248 if (!string_is_safe(name))
249 return false;
250
251 return true;
252 }
253
254 int locale_is_installed(const char *name) {
255 if (!locale_is_valid(name))
256 return false;
257
258 if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
259 return true;
260
261 _cleanup_(freelocalep) locale_t loc =
262 newlocale(LC_ALL_MASK, name, 0);
263 if (loc == (locale_t) 0)
264 return errno == ENOMEM ? -ENOMEM : false;
265
266 return true;
267 }
268
269 void init_gettext(void) {
270 setlocale(LC_ALL, "");
271 textdomain(GETTEXT_PACKAGE);
272 }
273
274 bool is_locale_utf8(void) {
275 const char *set;
276 static int cached_answer = -1;
277
278 /* Note that we default to 'true' here, since today UTF8 is
279 * pretty much supported everywhere. */
280
281 if (cached_answer >= 0)
282 goto out;
283
284 if (!setlocale(LC_ALL, "")) {
285 cached_answer = true;
286 goto out;
287 }
288
289 set = nl_langinfo(CODESET);
290 if (!set) {
291 cached_answer = true;
292 goto out;
293 }
294
295 if (streq(set, "UTF-8")) {
296 cached_answer = true;
297 goto out;
298 }
299
300 /* For LC_CTYPE=="C" return true, because CTYPE is effectively
301 * unset and everything can do to UTF-8 nowadays. */
302 set = setlocale(LC_CTYPE, NULL);
303 if (!set) {
304 cached_answer = true;
305 goto out;
306 }
307
308 /* Check result, but ignore the result if C was set
309 * explicitly. */
310 cached_answer =
311 STR_IN_SET(set, "C", "POSIX") &&
312 !getenv("LC_ALL") &&
313 !getenv("LC_CTYPE") &&
314 !getenv("LANG");
315
316 out:
317 return (bool) cached_answer;
318 }
319
320 bool emoji_enabled(void) {
321 static int cached_emoji_enabled = -1;
322
323 if (cached_emoji_enabled < 0) {
324 int val;
325
326 val = getenv_bool("SYSTEMD_EMOJI");
327 if (val < 0)
328 cached_emoji_enabled =
329 is_locale_utf8() &&
330 !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
331 else
332 cached_emoji_enabled = val;
333 }
334
335 return cached_emoji_enabled;
336 }
337
338 const char *special_glyph(SpecialGlyph code) {
339
340 /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
341 * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
342 * works reasonably well on the Linux console. For details see:
343 *
344 * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
345 */
346
347 static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
348 /* ASCII fallback */
349 [false] = {
350 [SPECIAL_GLYPH_TREE_VERTICAL] = "| ",
351 [SPECIAL_GLYPH_TREE_BRANCH] = "|-",
352 [SPECIAL_GLYPH_TREE_RIGHT] = "`-",
353 [SPECIAL_GLYPH_TREE_SPACE] = " ",
354 [SPECIAL_GLYPH_TRIANGULAR_BULLET] = ">",
355 [SPECIAL_GLYPH_BLACK_CIRCLE] = "*",
356 [SPECIAL_GLYPH_WHITE_CIRCLE] = "*",
357 [SPECIAL_GLYPH_MULTIPLICATION_SIGN] = "x",
358 [SPECIAL_GLYPH_CIRCLE_ARROW] = "*",
359 [SPECIAL_GLYPH_BULLET] = "*",
360 [SPECIAL_GLYPH_MU] = "u",
361 [SPECIAL_GLYPH_CHECK_MARK] = "+",
362 [SPECIAL_GLYPH_CROSS_MARK] = "-",
363 [SPECIAL_GLYPH_LIGHT_SHADE] = "-",
364 [SPECIAL_GLYPH_DARK_SHADE] = "X",
365 [SPECIAL_GLYPH_SIGMA] = "S",
366 [SPECIAL_GLYPH_ARROW] = "->",
367 [SPECIAL_GLYPH_ELLIPSIS] = "...",
368 [SPECIAL_GLYPH_EXTERNAL_LINK] = "[LNK]",
369 [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]",
370 [SPECIAL_GLYPH_HAPPY_SMILEY] = ":-}",
371 [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = ":-)",
372 [SPECIAL_GLYPH_NEUTRAL_SMILEY] = ":-|",
373 [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
374 [SPECIAL_GLYPH_UNHAPPY_SMILEY] = ":-{",
375 [SPECIAL_GLYPH_DEPRESSED_SMILEY] = ":-[",
376 [SPECIAL_GLYPH_LOCK_AND_KEY] = "o-,",
377 [SPECIAL_GLYPH_TOUCH] = "O=", /* Yeah, not very convincing, can you do it better? */
378 [SPECIAL_GLYPH_RECYCLING] = "~",
379 [SPECIAL_GLYPH_DOWNLOAD] = "\\",
380 [SPECIAL_GLYPH_SPARKLES] = "*",
381 },
382
383 /* UTF-8 */
384 [true] = {
385 /* The following are multiple glyphs in both ASCII and in UNICODE */
386 [SPECIAL_GLYPH_TREE_VERTICAL] = "\342\224\202 ", /* │ */
387 [SPECIAL_GLYPH_TREE_BRANCH] = "\342\224\234\342\224\200", /* ├─ */
388 [SPECIAL_GLYPH_TREE_RIGHT] = "\342\224\224\342\224\200", /* └─ */
389 [SPECIAL_GLYPH_TREE_SPACE] = " ", /* */
390
391 /* Single glyphs in both cases */
392 [SPECIAL_GLYPH_TRIANGULAR_BULLET] = "\342\200\243", /* ‣ */
393 [SPECIAL_GLYPH_BLACK_CIRCLE] = "\342\227\217", /* ● */
394 [SPECIAL_GLYPH_WHITE_CIRCLE] = "\u25CB", /* ○ */
395 [SPECIAL_GLYPH_MULTIPLICATION_SIGN] = "\u00D7", /* × */
396 [SPECIAL_GLYPH_CIRCLE_ARROW] = "\u21BB", /* ↻ */
397 [SPECIAL_GLYPH_BULLET] = "\342\200\242", /* • */
398 [SPECIAL_GLYPH_MU] = "\316\274", /* μ (actually called: GREEK SMALL LETTER MU) */
399 [SPECIAL_GLYPH_CHECK_MARK] = "\342\234\223", /* ✓ */
400 [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ (actually called: BALLOT X) */
401 [SPECIAL_GLYPH_LIGHT_SHADE] = "\342\226\221", /* ░ */
402 [SPECIAL_GLYPH_DARK_SHADE] = "\342\226\223", /* ▒ */
403 [SPECIAL_GLYPH_SIGMA] = "\316\243", /* Σ */
404
405 /* Single glyph in Unicode, two in ASCII */
406 [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → (actually called: RIGHTWARDS ARROW) */
407
408 /* Single glyph in Unicode, three in ASCII */
409 [SPECIAL_GLYPH_ELLIPSIS] = "\342\200\246", /* … (actually called: HORIZONTAL ELLIPSIS) */
410
411 /* Three glyphs in Unicode, five in ASCII */
412 [SPECIAL_GLYPH_EXTERNAL_LINK] = "[\360\237\241\225]", /* 🡕 (actually called: NORTH EAST SANS-SERIF ARROW, enclosed in []) */
413
414 /* These smileys are a single glyph in Unicode, and three in ASCII */
415 [SPECIAL_GLYPH_ECSTATIC_SMILEY] = "\360\237\230\207", /* 😇 (actually called: SMILING FACE WITH HALO) */
416 [SPECIAL_GLYPH_HAPPY_SMILEY] = "\360\237\230\200", /* 😀 (actually called: GRINNING FACE) */
417 [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = "\360\237\231\202", /* 🙂 (actually called: SLIGHTLY SMILING FACE) */
418 [SPECIAL_GLYPH_NEUTRAL_SMILEY] = "\360\237\230\220", /* 😐 (actually called: NEUTRAL FACE) */
419 [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201", /* 🙁 (actually called: SLIGHTLY FROWNING FACE) */
420 [SPECIAL_GLYPH_UNHAPPY_SMILEY] = "\360\237\230\250", /* 😨 (actually called: FEARFUL FACE) */
421 [SPECIAL_GLYPH_DEPRESSED_SMILEY] = "\360\237\244\242", /* 🤢 (actually called: NAUSEATED FACE) */
422
423 /* This emoji is a single character cell glyph in Unicode, and three in ASCII */
424 [SPECIAL_GLYPH_LOCK_AND_KEY] = "\360\237\224\220", /* 🔐 (actually called: CLOSED LOCK WITH KEY) */
425
426 /* This emoji is a single character cell glyph in Unicode, and two in ASCII */
427 [SPECIAL_GLYPH_TOUCH] = "\360\237\221\206", /* 👆 (actually called: BACKHAND INDEX POINTING UP) */
428
429 /* These three emojis are single character cell glyphs in Unicode and also in ASCII. */
430 [SPECIAL_GLYPH_RECYCLING] = "\u267B\uFE0F ", /* ♻️ (actually called: UNIVERSAL RECYCLNG SYMBOL) */
431 [SPECIAL_GLYPH_DOWNLOAD] = "\u2935\uFE0F ", /* ⤵️ (actually called: RIGHT ARROW CURVING DOWN) */
432 [SPECIAL_GLYPH_SPARKLES] = "\u2728", /* ✨ */
433 },
434 };
435
436 if (code < 0)
437 return NULL;
438
439 assert(code < _SPECIAL_GLYPH_MAX);
440 return draw_table[code >= _SPECIAL_GLYPH_FIRST_EMOJI ? emoji_enabled() : is_locale_utf8()][code];
441 }
442
443 void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
444 if (!l)
445 return;
446
447 for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++)
448 l[i] = mfree(l[i]);
449 }
450
451 static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
452 [VARIABLE_LANG] = "LANG",
453 [VARIABLE_LANGUAGE] = "LANGUAGE",
454 [VARIABLE_LC_CTYPE] = "LC_CTYPE",
455 [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
456 [VARIABLE_LC_TIME] = "LC_TIME",
457 [VARIABLE_LC_COLLATE] = "LC_COLLATE",
458 [VARIABLE_LC_MONETARY] = "LC_MONETARY",
459 [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
460 [VARIABLE_LC_PAPER] = "LC_PAPER",
461 [VARIABLE_LC_NAME] = "LC_NAME",
462 [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
463 [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
464 [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
465 [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
466 };
467
468 DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);