]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
75683450 | 2 | |
11c3a366 TA |
3 | #include <errno.h> |
4 | #include <fcntl.h> | |
8752c575 | 5 | #include <langinfo.h> |
11c3a366 | 6 | #include <libintl.h> |
11c3a366 TA |
7 | #include <stddef.h> |
8 | #include <stdint.h> | |
9 | #include <stdlib.h> | |
75683450 | 10 | #include <sys/mman.h> |
11c3a366 | 11 | #include <sys/stat.h> |
75683450 | 12 | |
28db6fbf | 13 | #include "constants.h" |
a0956174 | 14 | #include "dirent-util.h" |
5f1b0cc6 | 15 | #include "env-util.h" |
3ffd4af2 | 16 | #include "fd-util.h" |
1a823cde | 17 | #include "fileio.h" |
93cc7779 | 18 | #include "hashmap.h" |
3ffd4af2 | 19 | #include "locale-util.h" |
ca13432d | 20 | #include "missing_syscall.h" |
bb15fafe | 21 | #include "path-util.h" |
75683450 | 22 | #include "set.h" |
8b43440b | 23 | #include "string-table.h" |
07630cea | 24 | #include "string-util.h" |
75683450 | 25 | #include "strv.h" |
07630cea | 26 | #include "utf8.h" |
75683450 | 27 | |
13f45806 LP |
28 | static char *normalize_locale(const char *name) { |
29 | const char *e; | |
30 | ||
31 | /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it | |
32 | * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix | |
33 | * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse | |
34 | * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do | |
35 | * that for UTF-8 however, since it's kinda the only charset that matters. */ | |
36 | ||
37 | e = endswith(name, ".utf8"); | |
38 | if (e) { | |
39 | _cleanup_free_ char *prefix = NULL; | |
40 | ||
41 | prefix = strndup(name, e - name); | |
42 | if (!prefix) | |
43 | return NULL; | |
44 | ||
45 | return strjoin(prefix, ".UTF-8"); | |
46 | } | |
47 | ||
48 | e = strstr(name, ".utf8@"); | |
49 | if (e) { | |
50 | _cleanup_free_ char *prefix = NULL; | |
51 | ||
52 | prefix = strndup(name, e - name); | |
53 | if (!prefix) | |
54 | return NULL; | |
55 | ||
56 | return strjoin(prefix, ".UTF-8@", e + 6); | |
57 | } | |
58 | ||
59 | return strdup(name); | |
60 | } | |
61 | ||
75683450 LP |
62 | static int add_locales_from_archive(Set *locales) { |
63 | /* Stolen from glibc... */ | |
64 | ||
65 | struct locarhead { | |
66 | uint32_t magic; | |
67 | /* Serial number. */ | |
68 | uint32_t serial; | |
69 | /* Name hash table. */ | |
70 | uint32_t namehash_offset; | |
71 | uint32_t namehash_used; | |
72 | uint32_t namehash_size; | |
73 | /* String table. */ | |
74 | uint32_t string_offset; | |
75 | uint32_t string_used; | |
76 | uint32_t string_size; | |
77 | /* Table with locale records. */ | |
78 | uint32_t locrectab_offset; | |
79 | uint32_t locrectab_used; | |
80 | uint32_t locrectab_size; | |
81 | /* MD5 sum hash table. */ | |
82 | uint32_t sumhash_offset; | |
83 | uint32_t sumhash_used; | |
84 | uint32_t sumhash_size; | |
85 | }; | |
86 | ||
87 | struct namehashent { | |
88 | /* Hash value of the name. */ | |
89 | uint32_t hashval; | |
90 | /* Offset of the name in the string table. */ | |
91 | uint32_t name_offset; | |
92 | /* Offset of the locale record. */ | |
93 | uint32_t locrec_offset; | |
94 | }; | |
95 | ||
96 | const struct locarhead *h; | |
97 | const struct namehashent *e; | |
98 | const void *p = MAP_FAILED; | |
254d1313 | 99 | _cleanup_close_ int fd = -EBADF; |
75683450 LP |
100 | size_t sz = 0; |
101 | struct stat st; | |
75683450 LP |
102 | int r; |
103 | ||
104 | fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
105 | if (fd < 0) | |
106 | return errno == ENOENT ? 0 : -errno; | |
107 | ||
108 | if (fstat(fd, &st) < 0) | |
109 | return -errno; | |
110 | ||
111 | if (!S_ISREG(st.st_mode)) | |
112 | return -EBADMSG; | |
113 | ||
114 | if (st.st_size < (off_t) sizeof(struct locarhead)) | |
115 | return -EBADMSG; | |
116 | ||
1a823cde TS |
117 | if (file_offset_beyond_memory_size(st.st_size)) |
118 | return -EFBIG; | |
119 | ||
75683450 LP |
120 | p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); |
121 | if (p == MAP_FAILED) | |
122 | return -errno; | |
123 | ||
124 | h = (const struct locarhead *) p; | |
125 | if (h->magic != 0xde020109 || | |
126 | h->namehash_offset + h->namehash_size > st.st_size || | |
127 | h->string_offset + h->string_size > st.st_size || | |
128 | h->locrectab_offset + h->locrectab_size > st.st_size || | |
129 | h->sumhash_offset + h->sumhash_size > st.st_size) { | |
130 | r = -EBADMSG; | |
131 | goto finish; | |
132 | } | |
133 | ||
134 | e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset); | |
0eacd185 | 135 | for (size_t i = 0; i < h->namehash_size; i++) { |
75683450 LP |
136 | char *z; |
137 | ||
138 | if (e[i].locrec_offset == 0) | |
139 | continue; | |
140 | ||
141 | if (!utf8_is_valid((char*) p + e[i].name_offset)) | |
142 | continue; | |
143 | ||
13f45806 | 144 | z = normalize_locale((char*) p + e[i].name_offset); |
75683450 LP |
145 | if (!z) { |
146 | r = -ENOMEM; | |
147 | goto finish; | |
148 | } | |
149 | ||
150 | r = set_consume(locales, z); | |
151 | if (r < 0) | |
152 | goto finish; | |
153 | } | |
154 | ||
155 | r = 0; | |
156 | ||
157 | finish: | |
158 | if (p != MAP_FAILED) | |
159 | munmap((void*) p, sz); | |
160 | ||
161 | return r; | |
162 | } | |
163 | ||
065058e6 | 164 | static int add_locales_from_libdir(Set *locales) { |
75683450 | 165 | _cleanup_closedir_ DIR *dir = NULL; |
75683450 LP |
166 | int r; |
167 | ||
168 | dir = opendir("/usr/lib/locale"); | |
169 | if (!dir) | |
170 | return errno == ENOENT ? 0 : -errno; | |
171 | ||
af3b864d | 172 | FOREACH_DIRENT(de, dir, return -errno) { |
75683450 LP |
173 | char *z; |
174 | ||
af3b864d | 175 | if (de->d_type != DT_DIR) |
75683450 LP |
176 | continue; |
177 | ||
af3b864d | 178 | z = normalize_locale(de->d_name); |
75683450 LP |
179 | if (!z) |
180 | return -ENOMEM; | |
181 | ||
182 | r = set_consume(locales, z); | |
183 | if (r < 0 && r != -EEXIST) | |
184 | return r; | |
185 | } | |
186 | ||
187 | return 0; | |
188 | } | |
189 | ||
190 | int get_locales(char ***ret) { | |
065058e6 | 191 | _cleanup_set_free_free_ Set *locales = NULL; |
75683450 LP |
192 | _cleanup_strv_free_ char **l = NULL; |
193 | int r; | |
194 | ||
d5099efc | 195 | locales = set_new(&string_hash_ops); |
75683450 LP |
196 | if (!locales) |
197 | return -ENOMEM; | |
198 | ||
199 | r = add_locales_from_archive(locales); | |
200 | if (r < 0 && r != -ENOENT) | |
201 | return r; | |
202 | ||
203 | r = add_locales_from_libdir(locales); | |
204 | if (r < 0) | |
205 | return r; | |
206 | ||
a2f79377 YW |
207 | char *locale; |
208 | SET_FOREACH(locale, locales) { | |
209 | r = locale_is_installed(locale); | |
210 | if (r < 0) | |
211 | return r; | |
212 | if (r == 0) | |
213 | free(set_remove(locales, locale)); | |
214 | } | |
215 | ||
75683450 LP |
216 | l = set_get_strv(locales); |
217 | if (!l) | |
218 | return -ENOMEM; | |
219 | ||
065058e6 YW |
220 | /* Now, all elements are owned by strv 'l'. Hence, do not call set_free_free(). */ |
221 | locales = set_free(locales); | |
222 | ||
a7d9fccd | 223 | r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES"); |
c83f4220 | 224 | if (IN_SET(r, -ENXIO, 0)) { |
a7d9fccd LP |
225 | char **a, **b; |
226 | ||
227 | /* Filter out non-UTF-8 locales, because it's 2019, by default */ | |
228 | for (a = b = l; *a; a++) { | |
229 | ||
230 | if (endswith(*a, "UTF-8") || | |
231 | strstr(*a, ".UTF-8@")) | |
232 | *(b++) = *a; | |
233 | else | |
234 | free(*a); | |
235 | } | |
236 | ||
237 | *b = NULL; | |
238 | ||
239 | } else if (r < 0) | |
240 | log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean"); | |
241 | ||
75683450 LP |
242 | strv_sort(l); |
243 | ||
1cc6c93a | 244 | *ret = TAKE_PTR(l); |
75683450 LP |
245 | |
246 | return 0; | |
247 | } | |
248 | ||
249 | bool locale_is_valid(const char *name) { | |
250 | ||
251 | if (isempty(name)) | |
252 | return false; | |
253 | ||
254 | if (strlen(name) >= 128) | |
255 | return false; | |
256 | ||
257 | if (!utf8_is_valid(name)) | |
258 | return false; | |
259 | ||
ae6c3cc0 | 260 | if (!filename_is_valid(name)) |
75683450 LP |
261 | return false; |
262 | ||
fa485e8f AV |
263 | /* Locales look like: ll_CC.ENC@variant, where ll and CC are alphabetic, ENC is alphanumeric with |
264 | * dashes, and variant seems to be alphabetic. | |
265 | * See: https://www.gnu.org/software/gettext/manual/html_node/Locale-Names.html */ | |
266 | if (!in_charset(name, ALPHANUMERICAL "_.-@")) | |
75683450 LP |
267 | return false; |
268 | ||
269 | return true; | |
270 | } | |
a3428668 | 271 | |
23fa786c LP |
272 | int locale_is_installed(const char *name) { |
273 | if (!locale_is_valid(name)) | |
274 | return false; | |
275 | ||
276 | if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */ | |
277 | return true; | |
278 | ||
279 | _cleanup_(freelocalep) locale_t loc = | |
280 | newlocale(LC_ALL_MASK, name, 0); | |
281 | if (loc == (locale_t) 0) | |
282 | return errno == ENOMEM ? -ENOMEM : false; | |
283 | ||
284 | return true; | |
285 | } | |
286 | ||
8752c575 | 287 | bool is_locale_utf8(void) { |
8752c575 | 288 | static int cached_answer = -1; |
71ea8436 LP |
289 | const char *set; |
290 | int r; | |
8752c575 LP |
291 | |
292 | /* Note that we default to 'true' here, since today UTF8 is | |
293 | * pretty much supported everywhere. */ | |
294 | ||
295 | if (cached_answer >= 0) | |
296 | goto out; | |
297 | ||
efb9b3ba | 298 | r = secure_getenv_bool("SYSTEMD_UTF8"); |
71ea8436 LP |
299 | if (r >= 0) { |
300 | cached_answer = r; | |
301 | goto out; | |
302 | } else if (r != -ENXIO) | |
303 | log_debug_errno(r, "Failed to parse $SYSTEMD_UTF8, ignoring: %m"); | |
304 | ||
ca13432d YW |
305 | /* This function may be called from libsystemd, and setlocale() is not thread safe. Assuming yes. */ |
306 | if (gettid() != raw_getpid()) { | |
307 | cached_answer = true; | |
308 | goto out; | |
309 | } | |
310 | ||
8752c575 LP |
311 | if (!setlocale(LC_ALL, "")) { |
312 | cached_answer = true; | |
313 | goto out; | |
314 | } | |
315 | ||
316 | set = nl_langinfo(CODESET); | |
317 | if (!set) { | |
318 | cached_answer = true; | |
319 | goto out; | |
320 | } | |
321 | ||
322 | if (streq(set, "UTF-8")) { | |
323 | cached_answer = true; | |
324 | goto out; | |
325 | } | |
326 | ||
5238e957 | 327 | /* For LC_CTYPE=="C" return true, because CTYPE is effectively |
8752c575 LP |
328 | * unset and everything can do to UTF-8 nowadays. */ |
329 | set = setlocale(LC_CTYPE, NULL); | |
330 | if (!set) { | |
331 | cached_answer = true; | |
332 | goto out; | |
333 | } | |
334 | ||
335 | /* Check result, but ignore the result if C was set | |
336 | * explicitly. */ | |
337 | cached_answer = | |
338 | STR_IN_SET(set, "C", "POSIX") && | |
339 | !getenv("LC_ALL") && | |
340 | !getenv("LC_CTYPE") && | |
341 | !getenv("LANG"); | |
342 | ||
343 | out: | |
344 | return (bool) cached_answer; | |
345 | } | |
346 | ||
f2a3de01 | 347 | void locale_variables_free(char *l[_VARIABLE_LC_MAX]) { |
24ae45cb | 348 | free_many_charp(l, _VARIABLE_LC_MAX); |
e6755a33 LP |
349 | } |
350 | ||
3d36b5d7 YW |
351 | void locale_variables_simplify(char *l[_VARIABLE_LC_MAX]) { |
352 | assert(l); | |
353 | ||
354 | for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++) { | |
355 | if (p == VARIABLE_LANG) | |
356 | continue; | |
357 | if (isempty(l[p]) || streq_ptr(l[VARIABLE_LANG], l[p])) | |
358 | l[p] = mfree(l[p]); | |
359 | } | |
360 | } | |
361 | ||
a3428668 | 362 | static const char * const locale_variable_table[_VARIABLE_LC_MAX] = { |
d2e96a4f YW |
363 | [VARIABLE_LANG] = "LANG", |
364 | [VARIABLE_LANGUAGE] = "LANGUAGE", | |
365 | [VARIABLE_LC_CTYPE] = "LC_CTYPE", | |
366 | [VARIABLE_LC_NUMERIC] = "LC_NUMERIC", | |
367 | [VARIABLE_LC_TIME] = "LC_TIME", | |
368 | [VARIABLE_LC_COLLATE] = "LC_COLLATE", | |
369 | [VARIABLE_LC_MONETARY] = "LC_MONETARY", | |
370 | [VARIABLE_LC_MESSAGES] = "LC_MESSAGES", | |
371 | [VARIABLE_LC_PAPER] = "LC_PAPER", | |
372 | [VARIABLE_LC_NAME] = "LC_NAME", | |
373 | [VARIABLE_LC_ADDRESS] = "LC_ADDRESS", | |
374 | [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE", | |
375 | [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT", | |
a3428668 MS |
376 | [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION" |
377 | }; | |
378 | ||
379 | DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable); |