]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/locale-util.c
tree-wide: use -EBADF for fd initialization
[thirdparty/systemd.git] / src / basic / locale-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
75683450 2
11c3a366
TA
3#include <errno.h>
4#include <fcntl.h>
8752c575 5#include <langinfo.h>
11c3a366 6#include <libintl.h>
11c3a366
TA
7#include <stddef.h>
8#include <stdint.h>
9#include <stdlib.h>
75683450 10#include <sys/mman.h>
11c3a366 11#include <sys/stat.h>
75683450 12
28db6fbf 13#include "constants.h"
a0956174 14#include "dirent-util.h"
5f1b0cc6 15#include "env-util.h"
3ffd4af2 16#include "fd-util.h"
1a823cde 17#include "fileio.h"
93cc7779 18#include "hashmap.h"
3ffd4af2 19#include "locale-util.h"
bb15fafe 20#include "path-util.h"
75683450 21#include "set.h"
8b43440b 22#include "string-table.h"
07630cea 23#include "string-util.h"
75683450 24#include "strv.h"
07630cea 25#include "utf8.h"
75683450 26
13f45806
LP
27static char *normalize_locale(const char *name) {
28 const char *e;
29
30 /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
31 * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
32 * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
33 * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
34 * that for UTF-8 however, since it's kinda the only charset that matters. */
35
36 e = endswith(name, ".utf8");
37 if (e) {
38 _cleanup_free_ char *prefix = NULL;
39
40 prefix = strndup(name, e - name);
41 if (!prefix)
42 return NULL;
43
44 return strjoin(prefix, ".UTF-8");
45 }
46
47 e = strstr(name, ".utf8@");
48 if (e) {
49 _cleanup_free_ char *prefix = NULL;
50
51 prefix = strndup(name, e - name);
52 if (!prefix)
53 return NULL;
54
55 return strjoin(prefix, ".UTF-8@", e + 6);
56 }
57
58 return strdup(name);
59}
60
75683450
LP
61static int add_locales_from_archive(Set *locales) {
62 /* Stolen from glibc... */
63
64 struct locarhead {
65 uint32_t magic;
66 /* Serial number. */
67 uint32_t serial;
68 /* Name hash table. */
69 uint32_t namehash_offset;
70 uint32_t namehash_used;
71 uint32_t namehash_size;
72 /* String table. */
73 uint32_t string_offset;
74 uint32_t string_used;
75 uint32_t string_size;
76 /* Table with locale records. */
77 uint32_t locrectab_offset;
78 uint32_t locrectab_used;
79 uint32_t locrectab_size;
80 /* MD5 sum hash table. */
81 uint32_t sumhash_offset;
82 uint32_t sumhash_used;
83 uint32_t sumhash_size;
84 };
85
86 struct namehashent {
87 /* Hash value of the name. */
88 uint32_t hashval;
89 /* Offset of the name in the string table. */
90 uint32_t name_offset;
91 /* Offset of the locale record. */
92 uint32_t locrec_offset;
93 };
94
95 const struct locarhead *h;
96 const struct namehashent *e;
97 const void *p = MAP_FAILED;
254d1313 98 _cleanup_close_ int fd = -EBADF;
75683450
LP
99 size_t sz = 0;
100 struct stat st;
75683450
LP
101 int r;
102
103 fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
104 if (fd < 0)
105 return errno == ENOENT ? 0 : -errno;
106
107 if (fstat(fd, &st) < 0)
108 return -errno;
109
110 if (!S_ISREG(st.st_mode))
111 return -EBADMSG;
112
113 if (st.st_size < (off_t) sizeof(struct locarhead))
114 return -EBADMSG;
115
1a823cde
TS
116 if (file_offset_beyond_memory_size(st.st_size))
117 return -EFBIG;
118
75683450
LP
119 p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
120 if (p == MAP_FAILED)
121 return -errno;
122
123 h = (const struct locarhead *) p;
124 if (h->magic != 0xde020109 ||
125 h->namehash_offset + h->namehash_size > st.st_size ||
126 h->string_offset + h->string_size > st.st_size ||
127 h->locrectab_offset + h->locrectab_size > st.st_size ||
128 h->sumhash_offset + h->sumhash_size > st.st_size) {
129 r = -EBADMSG;
130 goto finish;
131 }
132
133 e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
0eacd185 134 for (size_t i = 0; i < h->namehash_size; i++) {
75683450
LP
135 char *z;
136
137 if (e[i].locrec_offset == 0)
138 continue;
139
140 if (!utf8_is_valid((char*) p + e[i].name_offset))
141 continue;
142
13f45806 143 z = normalize_locale((char*) p + e[i].name_offset);
75683450
LP
144 if (!z) {
145 r = -ENOMEM;
146 goto finish;
147 }
148
149 r = set_consume(locales, z);
150 if (r < 0)
151 goto finish;
152 }
153
154 r = 0;
155
156 finish:
157 if (p != MAP_FAILED)
158 munmap((void*) p, sz);
159
160 return r;
161}
162
065058e6 163static int add_locales_from_libdir(Set *locales) {
75683450 164 _cleanup_closedir_ DIR *dir = NULL;
75683450
LP
165 int r;
166
167 dir = opendir("/usr/lib/locale");
168 if (!dir)
169 return errno == ENOENT ? 0 : -errno;
170
af3b864d 171 FOREACH_DIRENT(de, dir, return -errno) {
75683450
LP
172 char *z;
173
af3b864d 174 if (de->d_type != DT_DIR)
75683450
LP
175 continue;
176
af3b864d 177 z = normalize_locale(de->d_name);
75683450
LP
178 if (!z)
179 return -ENOMEM;
180
181 r = set_consume(locales, z);
182 if (r < 0 && r != -EEXIST)
183 return r;
184 }
185
186 return 0;
187}
188
189int get_locales(char ***ret) {
065058e6 190 _cleanup_set_free_free_ Set *locales = NULL;
75683450
LP
191 _cleanup_strv_free_ char **l = NULL;
192 int r;
193
d5099efc 194 locales = set_new(&string_hash_ops);
75683450
LP
195 if (!locales)
196 return -ENOMEM;
197
198 r = add_locales_from_archive(locales);
199 if (r < 0 && r != -ENOENT)
200 return r;
201
202 r = add_locales_from_libdir(locales);
203 if (r < 0)
204 return r;
205
a2f79377
YW
206 char *locale;
207 SET_FOREACH(locale, locales) {
208 r = locale_is_installed(locale);
209 if (r < 0)
210 return r;
211 if (r == 0)
212 free(set_remove(locales, locale));
213 }
214
75683450
LP
215 l = set_get_strv(locales);
216 if (!l)
217 return -ENOMEM;
218
065058e6
YW
219 /* Now, all elements are owned by strv 'l'. Hence, do not call set_free_free(). */
220 locales = set_free(locales);
221
a7d9fccd
LP
222 r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
223 if (r == -ENXIO || r == 0) {
224 char **a, **b;
225
226 /* Filter out non-UTF-8 locales, because it's 2019, by default */
227 for (a = b = l; *a; a++) {
228
229 if (endswith(*a, "UTF-8") ||
230 strstr(*a, ".UTF-8@"))
231 *(b++) = *a;
232 else
233 free(*a);
234 }
235
236 *b = NULL;
237
238 } else if (r < 0)
239 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
240
75683450
LP
241 strv_sort(l);
242
1cc6c93a 243 *ret = TAKE_PTR(l);
75683450
LP
244
245 return 0;
246}
247
248bool locale_is_valid(const char *name) {
249
250 if (isempty(name))
251 return false;
252
253 if (strlen(name) >= 128)
254 return false;
255
256 if (!utf8_is_valid(name))
257 return false;
258
ae6c3cc0 259 if (!filename_is_valid(name))
75683450
LP
260 return false;
261
262 if (!string_is_safe(name))
263 return false;
264
265 return true;
266}
a3428668 267
23fa786c
LP
268int locale_is_installed(const char *name) {
269 if (!locale_is_valid(name))
270 return false;
271
272 if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
273 return true;
274
275 _cleanup_(freelocalep) locale_t loc =
276 newlocale(LC_ALL_MASK, name, 0);
277 if (loc == (locale_t) 0)
278 return errno == ENOMEM ? -ENOMEM : false;
279
280 return true;
281}
282
8752c575
LP
283void init_gettext(void) {
284 setlocale(LC_ALL, "");
285 textdomain(GETTEXT_PACKAGE);
286}
287
288bool is_locale_utf8(void) {
8752c575 289 static int cached_answer = -1;
71ea8436
LP
290 const char *set;
291 int r;
8752c575
LP
292
293 /* Note that we default to 'true' here, since today UTF8 is
294 * pretty much supported everywhere. */
295
296 if (cached_answer >= 0)
297 goto out;
298
71ea8436
LP
299 r = getenv_bool_secure("SYSTEMD_UTF8");
300 if (r >= 0) {
301 cached_answer = r;
302 goto out;
303 } else if (r != -ENXIO)
304 log_debug_errno(r, "Failed to parse $SYSTEMD_UTF8, ignoring: %m");
305
8752c575
LP
306 if (!setlocale(LC_ALL, "")) {
307 cached_answer = true;
308 goto out;
309 }
310
311 set = nl_langinfo(CODESET);
312 if (!set) {
313 cached_answer = true;
314 goto out;
315 }
316
317 if (streq(set, "UTF-8")) {
318 cached_answer = true;
319 goto out;
320 }
321
5238e957 322 /* For LC_CTYPE=="C" return true, because CTYPE is effectively
8752c575
LP
323 * unset and everything can do to UTF-8 nowadays. */
324 set = setlocale(LC_CTYPE, NULL);
325 if (!set) {
326 cached_answer = true;
327 goto out;
328 }
329
330 /* Check result, but ignore the result if C was set
331 * explicitly. */
332 cached_answer =
333 STR_IN_SET(set, "C", "POSIX") &&
334 !getenv("LC_ALL") &&
335 !getenv("LC_CTYPE") &&
336 !getenv("LANG");
337
338out:
339 return (bool) cached_answer;
340}
341
f2a3de01 342void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
e6755a33
LP
343 if (!l)
344 return;
345
0eacd185 346 for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++)
e6755a33
LP
347 l[i] = mfree(l[i]);
348}
349
3d36b5d7
YW
350void locale_variables_simplify(char *l[_VARIABLE_LC_MAX]) {
351 assert(l);
352
353 for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++) {
354 if (p == VARIABLE_LANG)
355 continue;
356 if (isempty(l[p]) || streq_ptr(l[VARIABLE_LANG], l[p]))
357 l[p] = mfree(l[p]);
358 }
359}
360
a3428668 361static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
d2e96a4f
YW
362 [VARIABLE_LANG] = "LANG",
363 [VARIABLE_LANGUAGE] = "LANGUAGE",
364 [VARIABLE_LC_CTYPE] = "LC_CTYPE",
365 [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
366 [VARIABLE_LC_TIME] = "LC_TIME",
367 [VARIABLE_LC_COLLATE] = "LC_COLLATE",
368 [VARIABLE_LC_MONETARY] = "LC_MONETARY",
369 [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
370 [VARIABLE_LC_PAPER] = "LC_PAPER",
371 [VARIABLE_LC_NAME] = "LC_NAME",
372 [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
373 [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
374 [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
a3428668
MS
375 [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
376};
377
378DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);