]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
75683450 | 2 | |
11c3a366 TA |
3 | #include <errno.h> |
4 | #include <fcntl.h> | |
8752c575 | 5 | #include <langinfo.h> |
11c3a366 | 6 | #include <libintl.h> |
11c3a366 TA |
7 | #include <stddef.h> |
8 | #include <stdint.h> | |
9 | #include <stdlib.h> | |
75683450 | 10 | #include <sys/mman.h> |
11c3a366 | 11 | #include <sys/stat.h> |
75683450 | 12 | |
ed457f13 | 13 | #include "def.h" |
a0956174 | 14 | #include "dirent-util.h" |
5f1b0cc6 | 15 | #include "env-util.h" |
3ffd4af2 | 16 | #include "fd-util.h" |
1a823cde | 17 | #include "fileio.h" |
93cc7779 | 18 | #include "hashmap.h" |
3ffd4af2 | 19 | #include "locale-util.h" |
bb15fafe | 20 | #include "path-util.h" |
75683450 | 21 | #include "set.h" |
8b43440b | 22 | #include "string-table.h" |
07630cea | 23 | #include "string-util.h" |
75683450 | 24 | #include "strv.h" |
07630cea | 25 | #include "utf8.h" |
75683450 | 26 | |
13f45806 LP |
27 | static char *normalize_locale(const char *name) { |
28 | const char *e; | |
29 | ||
30 | /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it | |
31 | * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix | |
32 | * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse | |
33 | * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do | |
34 | * that for UTF-8 however, since it's kinda the only charset that matters. */ | |
35 | ||
36 | e = endswith(name, ".utf8"); | |
37 | if (e) { | |
38 | _cleanup_free_ char *prefix = NULL; | |
39 | ||
40 | prefix = strndup(name, e - name); | |
41 | if (!prefix) | |
42 | return NULL; | |
43 | ||
44 | return strjoin(prefix, ".UTF-8"); | |
45 | } | |
46 | ||
47 | e = strstr(name, ".utf8@"); | |
48 | if (e) { | |
49 | _cleanup_free_ char *prefix = NULL; | |
50 | ||
51 | prefix = strndup(name, e - name); | |
52 | if (!prefix) | |
53 | return NULL; | |
54 | ||
55 | return strjoin(prefix, ".UTF-8@", e + 6); | |
56 | } | |
57 | ||
58 | return strdup(name); | |
59 | } | |
60 | ||
75683450 LP |
61 | static int add_locales_from_archive(Set *locales) { |
62 | /* Stolen from glibc... */ | |
63 | ||
64 | struct locarhead { | |
65 | uint32_t magic; | |
66 | /* Serial number. */ | |
67 | uint32_t serial; | |
68 | /* Name hash table. */ | |
69 | uint32_t namehash_offset; | |
70 | uint32_t namehash_used; | |
71 | uint32_t namehash_size; | |
72 | /* String table. */ | |
73 | uint32_t string_offset; | |
74 | uint32_t string_used; | |
75 | uint32_t string_size; | |
76 | /* Table with locale records. */ | |
77 | uint32_t locrectab_offset; | |
78 | uint32_t locrectab_used; | |
79 | uint32_t locrectab_size; | |
80 | /* MD5 sum hash table. */ | |
81 | uint32_t sumhash_offset; | |
82 | uint32_t sumhash_used; | |
83 | uint32_t sumhash_size; | |
84 | }; | |
85 | ||
86 | struct namehashent { | |
87 | /* Hash value of the name. */ | |
88 | uint32_t hashval; | |
89 | /* Offset of the name in the string table. */ | |
90 | uint32_t name_offset; | |
91 | /* Offset of the locale record. */ | |
92 | uint32_t locrec_offset; | |
93 | }; | |
94 | ||
95 | const struct locarhead *h; | |
96 | const struct namehashent *e; | |
97 | const void *p = MAP_FAILED; | |
98 | _cleanup_close_ int fd = -1; | |
99 | size_t sz = 0; | |
100 | struct stat st; | |
75683450 LP |
101 | int r; |
102 | ||
103 | fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC); | |
104 | if (fd < 0) | |
105 | return errno == ENOENT ? 0 : -errno; | |
106 | ||
107 | if (fstat(fd, &st) < 0) | |
108 | return -errno; | |
109 | ||
110 | if (!S_ISREG(st.st_mode)) | |
111 | return -EBADMSG; | |
112 | ||
113 | if (st.st_size < (off_t) sizeof(struct locarhead)) | |
114 | return -EBADMSG; | |
115 | ||
1a823cde TS |
116 | if (file_offset_beyond_memory_size(st.st_size)) |
117 | return -EFBIG; | |
118 | ||
75683450 LP |
119 | p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); |
120 | if (p == MAP_FAILED) | |
121 | return -errno; | |
122 | ||
123 | h = (const struct locarhead *) p; | |
124 | if (h->magic != 0xde020109 || | |
125 | h->namehash_offset + h->namehash_size > st.st_size || | |
126 | h->string_offset + h->string_size > st.st_size || | |
127 | h->locrectab_offset + h->locrectab_size > st.st_size || | |
128 | h->sumhash_offset + h->sumhash_size > st.st_size) { | |
129 | r = -EBADMSG; | |
130 | goto finish; | |
131 | } | |
132 | ||
133 | e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset); | |
0eacd185 | 134 | for (size_t i = 0; i < h->namehash_size; i++) { |
75683450 LP |
135 | char *z; |
136 | ||
137 | if (e[i].locrec_offset == 0) | |
138 | continue; | |
139 | ||
140 | if (!utf8_is_valid((char*) p + e[i].name_offset)) | |
141 | continue; | |
142 | ||
13f45806 | 143 | z = normalize_locale((char*) p + e[i].name_offset); |
75683450 LP |
144 | if (!z) { |
145 | r = -ENOMEM; | |
146 | goto finish; | |
147 | } | |
148 | ||
149 | r = set_consume(locales, z); | |
150 | if (r < 0) | |
151 | goto finish; | |
152 | } | |
153 | ||
154 | r = 0; | |
155 | ||
156 | finish: | |
157 | if (p != MAP_FAILED) | |
158 | munmap((void*) p, sz); | |
159 | ||
160 | return r; | |
161 | } | |
162 | ||
065058e6 | 163 | static int add_locales_from_libdir(Set *locales) { |
75683450 | 164 | _cleanup_closedir_ DIR *dir = NULL; |
75683450 LP |
165 | int r; |
166 | ||
167 | dir = opendir("/usr/lib/locale"); | |
168 | if (!dir) | |
169 | return errno == ENOENT ? 0 : -errno; | |
170 | ||
af3b864d | 171 | FOREACH_DIRENT(de, dir, return -errno) { |
75683450 LP |
172 | char *z; |
173 | ||
af3b864d | 174 | if (de->d_type != DT_DIR) |
75683450 LP |
175 | continue; |
176 | ||
af3b864d | 177 | z = normalize_locale(de->d_name); |
75683450 LP |
178 | if (!z) |
179 | return -ENOMEM; | |
180 | ||
181 | r = set_consume(locales, z); | |
182 | if (r < 0 && r != -EEXIST) | |
183 | return r; | |
184 | } | |
185 | ||
186 | return 0; | |
187 | } | |
188 | ||
189 | int get_locales(char ***ret) { | |
065058e6 | 190 | _cleanup_set_free_free_ Set *locales = NULL; |
75683450 LP |
191 | _cleanup_strv_free_ char **l = NULL; |
192 | int r; | |
193 | ||
d5099efc | 194 | locales = set_new(&string_hash_ops); |
75683450 LP |
195 | if (!locales) |
196 | return -ENOMEM; | |
197 | ||
198 | r = add_locales_from_archive(locales); | |
199 | if (r < 0 && r != -ENOENT) | |
200 | return r; | |
201 | ||
202 | r = add_locales_from_libdir(locales); | |
203 | if (r < 0) | |
204 | return r; | |
205 | ||
a2f79377 YW |
206 | char *locale; |
207 | SET_FOREACH(locale, locales) { | |
208 | r = locale_is_installed(locale); | |
209 | if (r < 0) | |
210 | return r; | |
211 | if (r == 0) | |
212 | free(set_remove(locales, locale)); | |
213 | } | |
214 | ||
75683450 LP |
215 | l = set_get_strv(locales); |
216 | if (!l) | |
217 | return -ENOMEM; | |
218 | ||
065058e6 YW |
219 | /* Now, all elements are owned by strv 'l'. Hence, do not call set_free_free(). */ |
220 | locales = set_free(locales); | |
221 | ||
a7d9fccd LP |
222 | r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES"); |
223 | if (r == -ENXIO || r == 0) { | |
224 | char **a, **b; | |
225 | ||
226 | /* Filter out non-UTF-8 locales, because it's 2019, by default */ | |
227 | for (a = b = l; *a; a++) { | |
228 | ||
229 | if (endswith(*a, "UTF-8") || | |
230 | strstr(*a, ".UTF-8@")) | |
231 | *(b++) = *a; | |
232 | else | |
233 | free(*a); | |
234 | } | |
235 | ||
236 | *b = NULL; | |
237 | ||
238 | } else if (r < 0) | |
239 | log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean"); | |
240 | ||
75683450 LP |
241 | strv_sort(l); |
242 | ||
1cc6c93a | 243 | *ret = TAKE_PTR(l); |
75683450 LP |
244 | |
245 | return 0; | |
246 | } | |
247 | ||
248 | bool locale_is_valid(const char *name) { | |
249 | ||
250 | if (isempty(name)) | |
251 | return false; | |
252 | ||
253 | if (strlen(name) >= 128) | |
254 | return false; | |
255 | ||
256 | if (!utf8_is_valid(name)) | |
257 | return false; | |
258 | ||
ae6c3cc0 | 259 | if (!filename_is_valid(name)) |
75683450 LP |
260 | return false; |
261 | ||
262 | if (!string_is_safe(name)) | |
263 | return false; | |
264 | ||
265 | return true; | |
266 | } | |
a3428668 | 267 | |
23fa786c LP |
268 | int locale_is_installed(const char *name) { |
269 | if (!locale_is_valid(name)) | |
270 | return false; | |
271 | ||
272 | if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */ | |
273 | return true; | |
274 | ||
275 | _cleanup_(freelocalep) locale_t loc = | |
276 | newlocale(LC_ALL_MASK, name, 0); | |
277 | if (loc == (locale_t) 0) | |
278 | return errno == ENOMEM ? -ENOMEM : false; | |
279 | ||
280 | return true; | |
281 | } | |
282 | ||
8752c575 LP |
283 | void init_gettext(void) { |
284 | setlocale(LC_ALL, ""); | |
285 | textdomain(GETTEXT_PACKAGE); | |
286 | } | |
287 | ||
288 | bool is_locale_utf8(void) { | |
289 | const char *set; | |
290 | static int cached_answer = -1; | |
291 | ||
292 | /* Note that we default to 'true' here, since today UTF8 is | |
293 | * pretty much supported everywhere. */ | |
294 | ||
295 | if (cached_answer >= 0) | |
296 | goto out; | |
297 | ||
298 | if (!setlocale(LC_ALL, "")) { | |
299 | cached_answer = true; | |
300 | goto out; | |
301 | } | |
302 | ||
303 | set = nl_langinfo(CODESET); | |
304 | if (!set) { | |
305 | cached_answer = true; | |
306 | goto out; | |
307 | } | |
308 | ||
309 | if (streq(set, "UTF-8")) { | |
310 | cached_answer = true; | |
311 | goto out; | |
312 | } | |
313 | ||
5238e957 | 314 | /* For LC_CTYPE=="C" return true, because CTYPE is effectively |
8752c575 LP |
315 | * unset and everything can do to UTF-8 nowadays. */ |
316 | set = setlocale(LC_CTYPE, NULL); | |
317 | if (!set) { | |
318 | cached_answer = true; | |
319 | goto out; | |
320 | } | |
321 | ||
322 | /* Check result, but ignore the result if C was set | |
323 | * explicitly. */ | |
324 | cached_answer = | |
325 | STR_IN_SET(set, "C", "POSIX") && | |
326 | !getenv("LC_ALL") && | |
327 | !getenv("LC_CTYPE") && | |
328 | !getenv("LANG"); | |
329 | ||
330 | out: | |
331 | return (bool) cached_answer; | |
332 | } | |
333 | ||
f2a3de01 | 334 | void locale_variables_free(char *l[_VARIABLE_LC_MAX]) { |
e6755a33 LP |
335 | if (!l) |
336 | return; | |
337 | ||
0eacd185 | 338 | for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++) |
e6755a33 LP |
339 | l[i] = mfree(l[i]); |
340 | } | |
341 | ||
a3428668 MS |
342 | static const char * const locale_variable_table[_VARIABLE_LC_MAX] = { |
343 | [VARIABLE_LANG] = "LANG", | |
344 | [VARIABLE_LANGUAGE] = "LANGUAGE", | |
345 | [VARIABLE_LC_CTYPE] = "LC_CTYPE", | |
346 | [VARIABLE_LC_NUMERIC] = "LC_NUMERIC", | |
347 | [VARIABLE_LC_TIME] = "LC_TIME", | |
348 | [VARIABLE_LC_COLLATE] = "LC_COLLATE", | |
349 | [VARIABLE_LC_MONETARY] = "LC_MONETARY", | |
350 | [VARIABLE_LC_MESSAGES] = "LC_MESSAGES", | |
351 | [VARIABLE_LC_PAPER] = "LC_PAPER", | |
352 | [VARIABLE_LC_NAME] = "LC_NAME", | |
353 | [VARIABLE_LC_ADDRESS] = "LC_ADDRESS", | |
354 | [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE", | |
355 | [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT", | |
356 | [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION" | |
357 | }; | |
358 | ||
359 | DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable); |