]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/locale-util.c
Merge pull request #31524 from poettering/secure-getenv-naming-fix
[thirdparty/systemd.git] / src / basic / locale-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <langinfo.h>
6 #include <libintl.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <sys/mman.h>
11 #include <sys/stat.h>
12
13 #include "constants.h"
14 #include "dirent-util.h"
15 #include "env-util.h"
16 #include "fd-util.h"
17 #include "fileio.h"
18 #include "hashmap.h"
19 #include "locale-util.h"
20 #include "missing_syscall.h"
21 #include "path-util.h"
22 #include "set.h"
23 #include "string-table.h"
24 #include "string-util.h"
25 #include "strv.h"
26 #include "utf8.h"
27
28 static char *normalize_locale(const char *name) {
29 const char *e;
30
31 /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
32 * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
33 * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
34 * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
35 * that for UTF-8 however, since it's kinda the only charset that matters. */
36
37 e = endswith(name, ".utf8");
38 if (e) {
39 _cleanup_free_ char *prefix = NULL;
40
41 prefix = strndup(name, e - name);
42 if (!prefix)
43 return NULL;
44
45 return strjoin(prefix, ".UTF-8");
46 }
47
48 e = strstr(name, ".utf8@");
49 if (e) {
50 _cleanup_free_ char *prefix = NULL;
51
52 prefix = strndup(name, e - name);
53 if (!prefix)
54 return NULL;
55
56 return strjoin(prefix, ".UTF-8@", e + 6);
57 }
58
59 return strdup(name);
60 }
61
62 static int add_locales_from_archive(Set *locales) {
63 /* Stolen from glibc... */
64
65 struct locarhead {
66 uint32_t magic;
67 /* Serial number. */
68 uint32_t serial;
69 /* Name hash table. */
70 uint32_t namehash_offset;
71 uint32_t namehash_used;
72 uint32_t namehash_size;
73 /* String table. */
74 uint32_t string_offset;
75 uint32_t string_used;
76 uint32_t string_size;
77 /* Table with locale records. */
78 uint32_t locrectab_offset;
79 uint32_t locrectab_used;
80 uint32_t locrectab_size;
81 /* MD5 sum hash table. */
82 uint32_t sumhash_offset;
83 uint32_t sumhash_used;
84 uint32_t sumhash_size;
85 };
86
87 struct namehashent {
88 /* Hash value of the name. */
89 uint32_t hashval;
90 /* Offset of the name in the string table. */
91 uint32_t name_offset;
92 /* Offset of the locale record. */
93 uint32_t locrec_offset;
94 };
95
96 const struct locarhead *h;
97 const struct namehashent *e;
98 const void *p = MAP_FAILED;
99 _cleanup_close_ int fd = -EBADF;
100 size_t sz = 0;
101 struct stat st;
102 int r;
103
104 fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
105 if (fd < 0)
106 return errno == ENOENT ? 0 : -errno;
107
108 if (fstat(fd, &st) < 0)
109 return -errno;
110
111 if (!S_ISREG(st.st_mode))
112 return -EBADMSG;
113
114 if (st.st_size < (off_t) sizeof(struct locarhead))
115 return -EBADMSG;
116
117 if (file_offset_beyond_memory_size(st.st_size))
118 return -EFBIG;
119
120 p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
121 if (p == MAP_FAILED)
122 return -errno;
123
124 h = (const struct locarhead *) p;
125 if (h->magic != 0xde020109 ||
126 h->namehash_offset + h->namehash_size > st.st_size ||
127 h->string_offset + h->string_size > st.st_size ||
128 h->locrectab_offset + h->locrectab_size > st.st_size ||
129 h->sumhash_offset + h->sumhash_size > st.st_size) {
130 r = -EBADMSG;
131 goto finish;
132 }
133
134 e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
135 for (size_t i = 0; i < h->namehash_size; i++) {
136 char *z;
137
138 if (e[i].locrec_offset == 0)
139 continue;
140
141 if (!utf8_is_valid((char*) p + e[i].name_offset))
142 continue;
143
144 z = normalize_locale((char*) p + e[i].name_offset);
145 if (!z) {
146 r = -ENOMEM;
147 goto finish;
148 }
149
150 r = set_consume(locales, z);
151 if (r < 0)
152 goto finish;
153 }
154
155 r = 0;
156
157 finish:
158 if (p != MAP_FAILED)
159 munmap((void*) p, sz);
160
161 return r;
162 }
163
164 static int add_locales_from_libdir(Set *locales) {
165 _cleanup_closedir_ DIR *dir = NULL;
166 int r;
167
168 dir = opendir("/usr/lib/locale");
169 if (!dir)
170 return errno == ENOENT ? 0 : -errno;
171
172 FOREACH_DIRENT(de, dir, return -errno) {
173 char *z;
174
175 if (de->d_type != DT_DIR)
176 continue;
177
178 z = normalize_locale(de->d_name);
179 if (!z)
180 return -ENOMEM;
181
182 r = set_consume(locales, z);
183 if (r < 0 && r != -EEXIST)
184 return r;
185 }
186
187 return 0;
188 }
189
190 int get_locales(char ***ret) {
191 _cleanup_set_free_free_ Set *locales = NULL;
192 _cleanup_strv_free_ char **l = NULL;
193 int r;
194
195 locales = set_new(&string_hash_ops);
196 if (!locales)
197 return -ENOMEM;
198
199 r = add_locales_from_archive(locales);
200 if (r < 0 && r != -ENOENT)
201 return r;
202
203 r = add_locales_from_libdir(locales);
204 if (r < 0)
205 return r;
206
207 char *locale;
208 SET_FOREACH(locale, locales) {
209 r = locale_is_installed(locale);
210 if (r < 0)
211 return r;
212 if (r == 0)
213 free(set_remove(locales, locale));
214 }
215
216 l = set_get_strv(locales);
217 if (!l)
218 return -ENOMEM;
219
220 /* Now, all elements are owned by strv 'l'. Hence, do not call set_free_free(). */
221 locales = set_free(locales);
222
223 r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
224 if (IN_SET(r, -ENXIO, 0)) {
225 char **a, **b;
226
227 /* Filter out non-UTF-8 locales, because it's 2019, by default */
228 for (a = b = l; *a; a++) {
229
230 if (endswith(*a, "UTF-8") ||
231 strstr(*a, ".UTF-8@"))
232 *(b++) = *a;
233 else
234 free(*a);
235 }
236
237 *b = NULL;
238
239 } else if (r < 0)
240 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
241
242 strv_sort(l);
243
244 *ret = TAKE_PTR(l);
245
246 return 0;
247 }
248
249 bool locale_is_valid(const char *name) {
250
251 if (isempty(name))
252 return false;
253
254 if (strlen(name) >= 128)
255 return false;
256
257 if (!utf8_is_valid(name))
258 return false;
259
260 if (!filename_is_valid(name))
261 return false;
262
263 /* Locales look like: ll_CC.ENC@variant, where ll and CC are alphabetic, ENC is alphanumeric with
264 * dashes, and variant seems to be alphabetic.
265 * See: https://www.gnu.org/software/gettext/manual/html_node/Locale-Names.html */
266 if (!in_charset(name, ALPHANUMERICAL "_.-@"))
267 return false;
268
269 return true;
270 }
271
272 int locale_is_installed(const char *name) {
273 if (!locale_is_valid(name))
274 return false;
275
276 if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
277 return true;
278
279 _cleanup_(freelocalep) locale_t loc =
280 newlocale(LC_ALL_MASK, name, 0);
281 if (loc == (locale_t) 0)
282 return errno == ENOMEM ? -ENOMEM : false;
283
284 return true;
285 }
286
287 bool is_locale_utf8(void) {
288 static int cached_answer = -1;
289 const char *set;
290 int r;
291
292 /* Note that we default to 'true' here, since today UTF8 is
293 * pretty much supported everywhere. */
294
295 if (cached_answer >= 0)
296 goto out;
297
298 r = secure_getenv_bool("SYSTEMD_UTF8");
299 if (r >= 0) {
300 cached_answer = r;
301 goto out;
302 } else if (r != -ENXIO)
303 log_debug_errno(r, "Failed to parse $SYSTEMD_UTF8, ignoring: %m");
304
305 /* This function may be called from libsystemd, and setlocale() is not thread safe. Assuming yes. */
306 if (gettid() != raw_getpid()) {
307 cached_answer = true;
308 goto out;
309 }
310
311 if (!setlocale(LC_ALL, "")) {
312 cached_answer = true;
313 goto out;
314 }
315
316 set = nl_langinfo(CODESET);
317 if (!set) {
318 cached_answer = true;
319 goto out;
320 }
321
322 if (streq(set, "UTF-8")) {
323 cached_answer = true;
324 goto out;
325 }
326
327 /* For LC_CTYPE=="C" return true, because CTYPE is effectively
328 * unset and everything can do to UTF-8 nowadays. */
329 set = setlocale(LC_CTYPE, NULL);
330 if (!set) {
331 cached_answer = true;
332 goto out;
333 }
334
335 /* Check result, but ignore the result if C was set
336 * explicitly. */
337 cached_answer =
338 STR_IN_SET(set, "C", "POSIX") &&
339 !getenv("LC_ALL") &&
340 !getenv("LC_CTYPE") &&
341 !getenv("LANG");
342
343 out:
344 return (bool) cached_answer;
345 }
346
347 void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
348 free_many_charp(l, _VARIABLE_LC_MAX);
349 }
350
351 void locale_variables_simplify(char *l[_VARIABLE_LC_MAX]) {
352 assert(l);
353
354 for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++) {
355 if (p == VARIABLE_LANG)
356 continue;
357 if (isempty(l[p]) || streq_ptr(l[VARIABLE_LANG], l[p]))
358 l[p] = mfree(l[p]);
359 }
360 }
361
362 static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
363 [VARIABLE_LANG] = "LANG",
364 [VARIABLE_LANGUAGE] = "LANGUAGE",
365 [VARIABLE_LC_CTYPE] = "LC_CTYPE",
366 [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
367 [VARIABLE_LC_TIME] = "LC_TIME",
368 [VARIABLE_LC_COLLATE] = "LC_COLLATE",
369 [VARIABLE_LC_MONETARY] = "LC_MONETARY",
370 [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
371 [VARIABLE_LC_PAPER] = "LC_PAPER",
372 [VARIABLE_LC_NAME] = "LC_NAME",
373 [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
374 [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
375 [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
376 [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
377 };
378
379 DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);