]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/locale-util.c
Merge pull request #14420 from DaanDeMeyer/nspawn-fix-read-only-overlay-rootfs
[thirdparty/systemd.git] / src / basic / locale-util.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
75683450 2
11c3a366
TA
3#include <errno.h>
4#include <fcntl.h>
ed457f13 5#include <ftw.h>
8752c575 6#include <langinfo.h>
11c3a366 7#include <libintl.h>
11c3a366
TA
8#include <stddef.h>
9#include <stdint.h>
10#include <stdlib.h>
75683450 11#include <sys/mman.h>
11c3a366 12#include <sys/stat.h>
75683450 13
ed457f13 14#include "def.h"
a0956174 15#include "dirent-util.h"
5f1b0cc6 16#include "env-util.h"
3ffd4af2 17#include "fd-util.h"
93cc7779 18#include "hashmap.h"
3ffd4af2 19#include "locale-util.h"
bb15fafe 20#include "path-util.h"
75683450 21#include "set.h"
8b43440b 22#include "string-table.h"
07630cea 23#include "string-util.h"
75683450 24#include "strv.h"
07630cea 25#include "utf8.h"
75683450 26
13f45806
LP
27static char *normalize_locale(const char *name) {
28 const char *e;
29
30 /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
31 * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
32 * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
33 * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
34 * that for UTF-8 however, since it's kinda the only charset that matters. */
35
36 e = endswith(name, ".utf8");
37 if (e) {
38 _cleanup_free_ char *prefix = NULL;
39
40 prefix = strndup(name, e - name);
41 if (!prefix)
42 return NULL;
43
44 return strjoin(prefix, ".UTF-8");
45 }
46
47 e = strstr(name, ".utf8@");
48 if (e) {
49 _cleanup_free_ char *prefix = NULL;
50
51 prefix = strndup(name, e - name);
52 if (!prefix)
53 return NULL;
54
55 return strjoin(prefix, ".UTF-8@", e + 6);
56 }
57
58 return strdup(name);
59}
60
75683450
LP
61static int add_locales_from_archive(Set *locales) {
62 /* Stolen from glibc... */
63
64 struct locarhead {
65 uint32_t magic;
66 /* Serial number. */
67 uint32_t serial;
68 /* Name hash table. */
69 uint32_t namehash_offset;
70 uint32_t namehash_used;
71 uint32_t namehash_size;
72 /* String table. */
73 uint32_t string_offset;
74 uint32_t string_used;
75 uint32_t string_size;
76 /* Table with locale records. */
77 uint32_t locrectab_offset;
78 uint32_t locrectab_used;
79 uint32_t locrectab_size;
80 /* MD5 sum hash table. */
81 uint32_t sumhash_offset;
82 uint32_t sumhash_used;
83 uint32_t sumhash_size;
84 };
85
86 struct namehashent {
87 /* Hash value of the name. */
88 uint32_t hashval;
89 /* Offset of the name in the string table. */
90 uint32_t name_offset;
91 /* Offset of the locale record. */
92 uint32_t locrec_offset;
93 };
94
95 const struct locarhead *h;
96 const struct namehashent *e;
97 const void *p = MAP_FAILED;
98 _cleanup_close_ int fd = -1;
99 size_t sz = 0;
100 struct stat st;
da6053d0 101 size_t i;
75683450
LP
102 int r;
103
104 fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
105 if (fd < 0)
106 return errno == ENOENT ? 0 : -errno;
107
108 if (fstat(fd, &st) < 0)
109 return -errno;
110
111 if (!S_ISREG(st.st_mode))
112 return -EBADMSG;
113
114 if (st.st_size < (off_t) sizeof(struct locarhead))
115 return -EBADMSG;
116
117 p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
118 if (p == MAP_FAILED)
119 return -errno;
120
121 h = (const struct locarhead *) p;
122 if (h->magic != 0xde020109 ||
123 h->namehash_offset + h->namehash_size > st.st_size ||
124 h->string_offset + h->string_size > st.st_size ||
125 h->locrectab_offset + h->locrectab_size > st.st_size ||
126 h->sumhash_offset + h->sumhash_size > st.st_size) {
127 r = -EBADMSG;
128 goto finish;
129 }
130
131 e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
132 for (i = 0; i < h->namehash_size; i++) {
133 char *z;
134
135 if (e[i].locrec_offset == 0)
136 continue;
137
138 if (!utf8_is_valid((char*) p + e[i].name_offset))
139 continue;
140
13f45806 141 z = normalize_locale((char*) p + e[i].name_offset);
75683450
LP
142 if (!z) {
143 r = -ENOMEM;
144 goto finish;
145 }
146
147 r = set_consume(locales, z);
148 if (r < 0)
149 goto finish;
150 }
151
152 r = 0;
153
154 finish:
155 if (p != MAP_FAILED)
156 munmap((void*) p, sz);
157
158 return r;
159}
160
161static int add_locales_from_libdir (Set *locales) {
162 _cleanup_closedir_ DIR *dir = NULL;
163 struct dirent *entry;
164 int r;
165
166 dir = opendir("/usr/lib/locale");
167 if (!dir)
168 return errno == ENOENT ? 0 : -errno;
169
170 FOREACH_DIRENT(entry, dir, return -errno) {
171 char *z;
172
331fb4ca
EV
173 dirent_ensure_type(dir, entry);
174
75683450
LP
175 if (entry->d_type != DT_DIR)
176 continue;
177
13f45806 178 z = normalize_locale(entry->d_name);
75683450
LP
179 if (!z)
180 return -ENOMEM;
181
182 r = set_consume(locales, z);
183 if (r < 0 && r != -EEXIST)
184 return r;
185 }
186
187 return 0;
188}
189
190int get_locales(char ***ret) {
191 _cleanup_set_free_ Set *locales = NULL;
192 _cleanup_strv_free_ char **l = NULL;
193 int r;
194
d5099efc 195 locales = set_new(&string_hash_ops);
75683450
LP
196 if (!locales)
197 return -ENOMEM;
198
199 r = add_locales_from_archive(locales);
200 if (r < 0 && r != -ENOENT)
201 return r;
202
203 r = add_locales_from_libdir(locales);
204 if (r < 0)
205 return r;
206
207 l = set_get_strv(locales);
208 if (!l)
209 return -ENOMEM;
210
a7d9fccd
LP
211 r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
212 if (r == -ENXIO || r == 0) {
213 char **a, **b;
214
215 /* Filter out non-UTF-8 locales, because it's 2019, by default */
216 for (a = b = l; *a; a++) {
217
218 if (endswith(*a, "UTF-8") ||
219 strstr(*a, ".UTF-8@"))
220 *(b++) = *a;
221 else
222 free(*a);
223 }
224
225 *b = NULL;
226
227 } else if (r < 0)
228 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
229
75683450
LP
230 strv_sort(l);
231
1cc6c93a 232 *ret = TAKE_PTR(l);
75683450
LP
233
234 return 0;
235}
236
237bool locale_is_valid(const char *name) {
238
239 if (isempty(name))
240 return false;
241
242 if (strlen(name) >= 128)
243 return false;
244
245 if (!utf8_is_valid(name))
246 return false;
247
ae6c3cc0 248 if (!filename_is_valid(name))
75683450
LP
249 return false;
250
251 if (!string_is_safe(name))
252 return false;
253
254 return true;
255}
a3428668 256
8752c575
LP
257void init_gettext(void) {
258 setlocale(LC_ALL, "");
259 textdomain(GETTEXT_PACKAGE);
260}
261
262bool is_locale_utf8(void) {
263 const char *set;
264 static int cached_answer = -1;
265
266 /* Note that we default to 'true' here, since today UTF8 is
267 * pretty much supported everywhere. */
268
269 if (cached_answer >= 0)
270 goto out;
271
272 if (!setlocale(LC_ALL, "")) {
273 cached_answer = true;
274 goto out;
275 }
276
277 set = nl_langinfo(CODESET);
278 if (!set) {
279 cached_answer = true;
280 goto out;
281 }
282
283 if (streq(set, "UTF-8")) {
284 cached_answer = true;
285 goto out;
286 }
287
5238e957 288 /* For LC_CTYPE=="C" return true, because CTYPE is effectively
8752c575
LP
289 * unset and everything can do to UTF-8 nowadays. */
290 set = setlocale(LC_CTYPE, NULL);
291 if (!set) {
292 cached_answer = true;
293 goto out;
294 }
295
296 /* Check result, but ignore the result if C was set
297 * explicitly. */
298 cached_answer =
299 STR_IN_SET(set, "C", "POSIX") &&
300 !getenv("LC_ALL") &&
301 !getenv("LC_CTYPE") &&
302 !getenv("LANG");
303
304out:
305 return (bool) cached_answer;
306}
307
5f1b0cc6
LP
308static bool emoji_enabled(void) {
309 static int cached_emoji_enabled = -1;
310
311 if (cached_emoji_enabled < 0) {
312 int val;
313
314 val = getenv_bool("SYSTEMD_EMOJI");
315 if (val < 0)
316 cached_emoji_enabled =
317 is_locale_utf8() &&
318 !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
319 else
320 cached_emoji_enabled = val;
321 }
322
323 return cached_emoji_enabled;
324}
325
323b7dc9
ZJS
326const char *special_glyph(SpecialGlyph code) {
327
5d01f5dc
LP
328 /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
329 * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
330 * works reasonably well on the Linux console. For details see:
331 *
332 * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
333 */
334
dff4bf93 335 static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
323b7dc9
ZJS
336 /* ASCII fallback */
337 [false] = {
9a6f746f
LP
338 [SPECIAL_GLYPH_TREE_VERTICAL] = "| ",
339 [SPECIAL_GLYPH_TREE_BRANCH] = "|-",
340 [SPECIAL_GLYPH_TREE_RIGHT] = "`-",
341 [SPECIAL_GLYPH_TREE_SPACE] = " ",
342 [SPECIAL_GLYPH_TRIANGULAR_BULLET] = ">",
343 [SPECIAL_GLYPH_BLACK_CIRCLE] = "*",
344 [SPECIAL_GLYPH_BULLET] = "*",
345 [SPECIAL_GLYPH_ARROW] = "->",
346 [SPECIAL_GLYPH_MDASH] = "-",
347 [SPECIAL_GLYPH_ELLIPSIS] = "...",
348 [SPECIAL_GLYPH_MU] = "u",
349 [SPECIAL_GLYPH_CHECK_MARK] = "+",
350 [SPECIAL_GLYPH_CROSS_MARK] = "-",
351 [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]",
352 [SPECIAL_GLYPH_HAPPY_SMILEY] = ":-}",
353 [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = ":-)",
354 [SPECIAL_GLYPH_NEUTRAL_SMILEY] = ":-|",
355 [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
de520006 356 [SPECIAL_GLYPH_UNHAPPY_SMILEY] = ":-{",
9a6f746f 357 [SPECIAL_GLYPH_DEPRESSED_SMILEY] = ":-[",
8752c575
LP
358 },
359
323b7dc9 360 /* UTF-8 */
b77f5e27 361 [true] = {
9a6f746f
LP
362 [SPECIAL_GLYPH_TREE_VERTICAL] = "\342\224\202 ", /* │ */
363 [SPECIAL_GLYPH_TREE_BRANCH] = "\342\224\234\342\224\200", /* ├─ */
364 [SPECIAL_GLYPH_TREE_RIGHT] = "\342\224\224\342\224\200", /* └─ */
365 [SPECIAL_GLYPH_TREE_SPACE] = " ", /* */
366 [SPECIAL_GLYPH_TRIANGULAR_BULLET] = "\342\200\243", /* ‣ */
367 [SPECIAL_GLYPH_BLACK_CIRCLE] = "\342\227\217", /* ● */
368 [SPECIAL_GLYPH_BULLET] = "\342\200\242", /* • */
369 [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → */
370 [SPECIAL_GLYPH_MDASH] = "\342\200\223", /* – */
371 [SPECIAL_GLYPH_ELLIPSIS] = "\342\200\246", /* … */
372 [SPECIAL_GLYPH_MU] = "\316\274", /* μ */
373 [SPECIAL_GLYPH_CHECK_MARK] = "\342\234\223", /* ✓ */
374 [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ */
375 [SPECIAL_GLYPH_ECSTATIC_SMILEY] = "\360\237\230\207", /* 😇 */
376 [SPECIAL_GLYPH_HAPPY_SMILEY] = "\360\237\230\200", /* 😀 */
377 [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = "\360\237\231\202", /* 🙂 */
378 [SPECIAL_GLYPH_NEUTRAL_SMILEY] = "\360\237\230\220", /* 😐 */
379 [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201", /* 🙁 */
5e13bcdd 380 [SPECIAL_GLYPH_UNHAPPY_SMILEY] = "\360\237\230\250", /* 😨 */
9a6f746f 381 [SPECIAL_GLYPH_DEPRESSED_SMILEY] = "\360\237\244\242", /* 🤢 */
323b7dc9 382 },
8752c575
LP
383 };
384
5f1b0cc6
LP
385 assert(code < _SPECIAL_GLYPH_MAX);
386
387 return draw_table[code >= _SPECIAL_GLYPH_FIRST_SMILEY ? emoji_enabled() : is_locale_utf8()][code];
8752c575
LP
388}
389
f2a3de01 390void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
e6755a33
LP
391 LocaleVariable i;
392
393 if (!l)
394 return;
395
396 for (i = 0; i < _VARIABLE_LC_MAX; i++)
397 l[i] = mfree(l[i]);
398}
399
a3428668
MS
400static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
401 [VARIABLE_LANG] = "LANG",
402 [VARIABLE_LANGUAGE] = "LANGUAGE",
403 [VARIABLE_LC_CTYPE] = "LC_CTYPE",
404 [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
405 [VARIABLE_LC_TIME] = "LC_TIME",
406 [VARIABLE_LC_COLLATE] = "LC_COLLATE",
407 [VARIABLE_LC_MONETARY] = "LC_MONETARY",
408 [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
409 [VARIABLE_LC_PAPER] = "LC_PAPER",
410 [VARIABLE_LC_NAME] = "LC_NAME",
411 [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
412 [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
413 [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
414 [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
415};
416
417DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);