]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/locale-util.c
Merge pull request #21800 from keszybz/net-id-debugging
[thirdparty/systemd.git] / src / basic / locale-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <langinfo.h>
6 #include <libintl.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <sys/mman.h>
11 #include <sys/stat.h>
12
13 #include "def.h"
14 #include "dirent-util.h"
15 #include "env-util.h"
16 #include "fd-util.h"
17 #include "hashmap.h"
18 #include "locale-util.h"
19 #include "path-util.h"
20 #include "set.h"
21 #include "string-table.h"
22 #include "string-util.h"
23 #include "strv.h"
24 #include "utf8.h"
25
26 static char *normalize_locale(const char *name) {
27 const char *e;
28
29 /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
30 * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
31 * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
32 * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
33 * that for UTF-8 however, since it's kinda the only charset that matters. */
34
35 e = endswith(name, ".utf8");
36 if (e) {
37 _cleanup_free_ char *prefix = NULL;
38
39 prefix = strndup(name, e - name);
40 if (!prefix)
41 return NULL;
42
43 return strjoin(prefix, ".UTF-8");
44 }
45
46 e = strstr(name, ".utf8@");
47 if (e) {
48 _cleanup_free_ char *prefix = NULL;
49
50 prefix = strndup(name, e - name);
51 if (!prefix)
52 return NULL;
53
54 return strjoin(prefix, ".UTF-8@", e + 6);
55 }
56
57 return strdup(name);
58 }
59
60 static int add_locales_from_archive(Set *locales) {
61 /* Stolen from glibc... */
62
63 struct locarhead {
64 uint32_t magic;
65 /* Serial number. */
66 uint32_t serial;
67 /* Name hash table. */
68 uint32_t namehash_offset;
69 uint32_t namehash_used;
70 uint32_t namehash_size;
71 /* String table. */
72 uint32_t string_offset;
73 uint32_t string_used;
74 uint32_t string_size;
75 /* Table with locale records. */
76 uint32_t locrectab_offset;
77 uint32_t locrectab_used;
78 uint32_t locrectab_size;
79 /* MD5 sum hash table. */
80 uint32_t sumhash_offset;
81 uint32_t sumhash_used;
82 uint32_t sumhash_size;
83 };
84
85 struct namehashent {
86 /* Hash value of the name. */
87 uint32_t hashval;
88 /* Offset of the name in the string table. */
89 uint32_t name_offset;
90 /* Offset of the locale record. */
91 uint32_t locrec_offset;
92 };
93
94 const struct locarhead *h;
95 const struct namehashent *e;
96 const void *p = MAP_FAILED;
97 _cleanup_close_ int fd = -1;
98 size_t sz = 0;
99 struct stat st;
100 int r;
101
102 fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
103 if (fd < 0)
104 return errno == ENOENT ? 0 : -errno;
105
106 if (fstat(fd, &st) < 0)
107 return -errno;
108
109 if (!S_ISREG(st.st_mode))
110 return -EBADMSG;
111
112 if (st.st_size < (off_t) sizeof(struct locarhead))
113 return -EBADMSG;
114
115 p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
116 if (p == MAP_FAILED)
117 return -errno;
118
119 h = (const struct locarhead *) p;
120 if (h->magic != 0xde020109 ||
121 h->namehash_offset + h->namehash_size > st.st_size ||
122 h->string_offset + h->string_size > st.st_size ||
123 h->locrectab_offset + h->locrectab_size > st.st_size ||
124 h->sumhash_offset + h->sumhash_size > st.st_size) {
125 r = -EBADMSG;
126 goto finish;
127 }
128
129 e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
130 for (size_t i = 0; i < h->namehash_size; i++) {
131 char *z;
132
133 if (e[i].locrec_offset == 0)
134 continue;
135
136 if (!utf8_is_valid((char*) p + e[i].name_offset))
137 continue;
138
139 z = normalize_locale((char*) p + e[i].name_offset);
140 if (!z) {
141 r = -ENOMEM;
142 goto finish;
143 }
144
145 r = set_consume(locales, z);
146 if (r < 0)
147 goto finish;
148 }
149
150 r = 0;
151
152 finish:
153 if (p != MAP_FAILED)
154 munmap((void*) p, sz);
155
156 return r;
157 }
158
159 static int add_locales_from_libdir (Set *locales) {
160 _cleanup_closedir_ DIR *dir = NULL;
161 int r;
162
163 dir = opendir("/usr/lib/locale");
164 if (!dir)
165 return errno == ENOENT ? 0 : -errno;
166
167 FOREACH_DIRENT(de, dir, return -errno) {
168 char *z;
169
170 if (de->d_type != DT_DIR)
171 continue;
172
173 z = normalize_locale(de->d_name);
174 if (!z)
175 return -ENOMEM;
176
177 r = set_consume(locales, z);
178 if (r < 0 && r != -EEXIST)
179 return r;
180 }
181
182 return 0;
183 }
184
185 int get_locales(char ***ret) {
186 _cleanup_set_free_ Set *locales = NULL;
187 _cleanup_strv_free_ char **l = NULL;
188 int r;
189
190 locales = set_new(&string_hash_ops);
191 if (!locales)
192 return -ENOMEM;
193
194 r = add_locales_from_archive(locales);
195 if (r < 0 && r != -ENOENT)
196 return r;
197
198 r = add_locales_from_libdir(locales);
199 if (r < 0)
200 return r;
201
202 l = set_get_strv(locales);
203 if (!l)
204 return -ENOMEM;
205
206 r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
207 if (r == -ENXIO || r == 0) {
208 char **a, **b;
209
210 /* Filter out non-UTF-8 locales, because it's 2019, by default */
211 for (a = b = l; *a; a++) {
212
213 if (endswith(*a, "UTF-8") ||
214 strstr(*a, ".UTF-8@"))
215 *(b++) = *a;
216 else
217 free(*a);
218 }
219
220 *b = NULL;
221
222 } else if (r < 0)
223 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
224
225 strv_sort(l);
226
227 *ret = TAKE_PTR(l);
228
229 return 0;
230 }
231
232 bool locale_is_valid(const char *name) {
233
234 if (isempty(name))
235 return false;
236
237 if (strlen(name) >= 128)
238 return false;
239
240 if (!utf8_is_valid(name))
241 return false;
242
243 if (!filename_is_valid(name))
244 return false;
245
246 if (!string_is_safe(name))
247 return false;
248
249 return true;
250 }
251
252 int locale_is_installed(const char *name) {
253 if (!locale_is_valid(name))
254 return false;
255
256 if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
257 return true;
258
259 _cleanup_(freelocalep) locale_t loc =
260 newlocale(LC_ALL_MASK, name, 0);
261 if (loc == (locale_t) 0)
262 return errno == ENOMEM ? -ENOMEM : false;
263
264 return true;
265 }
266
267 void init_gettext(void) {
268 setlocale(LC_ALL, "");
269 textdomain(GETTEXT_PACKAGE);
270 }
271
272 bool is_locale_utf8(void) {
273 const char *set;
274 static int cached_answer = -1;
275
276 /* Note that we default to 'true' here, since today UTF8 is
277 * pretty much supported everywhere. */
278
279 if (cached_answer >= 0)
280 goto out;
281
282 if (!setlocale(LC_ALL, "")) {
283 cached_answer = true;
284 goto out;
285 }
286
287 set = nl_langinfo(CODESET);
288 if (!set) {
289 cached_answer = true;
290 goto out;
291 }
292
293 if (streq(set, "UTF-8")) {
294 cached_answer = true;
295 goto out;
296 }
297
298 /* For LC_CTYPE=="C" return true, because CTYPE is effectively
299 * unset and everything can do to UTF-8 nowadays. */
300 set = setlocale(LC_CTYPE, NULL);
301 if (!set) {
302 cached_answer = true;
303 goto out;
304 }
305
306 /* Check result, but ignore the result if C was set
307 * explicitly. */
308 cached_answer =
309 STR_IN_SET(set, "C", "POSIX") &&
310 !getenv("LC_ALL") &&
311 !getenv("LC_CTYPE") &&
312 !getenv("LANG");
313
314 out:
315 return (bool) cached_answer;
316 }
317
318 void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
319 if (!l)
320 return;
321
322 for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++)
323 l[i] = mfree(l[i]);
324 }
325
326 static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
327 [VARIABLE_LANG] = "LANG",
328 [VARIABLE_LANGUAGE] = "LANGUAGE",
329 [VARIABLE_LC_CTYPE] = "LC_CTYPE",
330 [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
331 [VARIABLE_LC_TIME] = "LC_TIME",
332 [VARIABLE_LC_COLLATE] = "LC_COLLATE",
333 [VARIABLE_LC_MONETARY] = "LC_MONETARY",
334 [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
335 [VARIABLE_LC_PAPER] = "LC_PAPER",
336 [VARIABLE_LC_NAME] = "LC_NAME",
337 [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
338 [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
339 [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
340 [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
341 };
342
343 DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);