]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/locale-util.c
journal: Serialize __MONOTONIC_TIMESTAMP metadata field as well
[thirdparty/systemd.git] / src / basic / locale-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
75683450 2
11c3a366
TA
3#include <errno.h>
4#include <fcntl.h>
8752c575 5#include <langinfo.h>
11c3a366 6#include <libintl.h>
11c3a366
TA
7#include <stddef.h>
8#include <stdint.h>
9#include <stdlib.h>
75683450 10#include <sys/mman.h>
11c3a366 11#include <sys/stat.h>
75683450 12
28db6fbf 13#include "constants.h"
a0956174 14#include "dirent-util.h"
5f1b0cc6 15#include "env-util.h"
3ffd4af2 16#include "fd-util.h"
1a823cde 17#include "fileio.h"
93cc7779 18#include "hashmap.h"
3ffd4af2 19#include "locale-util.h"
ca13432d 20#include "missing_syscall.h"
bb15fafe 21#include "path-util.h"
75683450 22#include "set.h"
8b43440b 23#include "string-table.h"
07630cea 24#include "string-util.h"
75683450 25#include "strv.h"
07630cea 26#include "utf8.h"
75683450 27
13f45806
LP
28static char *normalize_locale(const char *name) {
29 const char *e;
30
31 /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
32 * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
33 * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
34 * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
35 * that for UTF-8 however, since it's kinda the only charset that matters. */
36
37 e = endswith(name, ".utf8");
38 if (e) {
39 _cleanup_free_ char *prefix = NULL;
40
41 prefix = strndup(name, e - name);
42 if (!prefix)
43 return NULL;
44
45 return strjoin(prefix, ".UTF-8");
46 }
47
48 e = strstr(name, ".utf8@");
49 if (e) {
50 _cleanup_free_ char *prefix = NULL;
51
52 prefix = strndup(name, e - name);
53 if (!prefix)
54 return NULL;
55
56 return strjoin(prefix, ".UTF-8@", e + 6);
57 }
58
59 return strdup(name);
60}
61
75683450
LP
62static int add_locales_from_archive(Set *locales) {
63 /* Stolen from glibc... */
64
65 struct locarhead {
66 uint32_t magic;
67 /* Serial number. */
68 uint32_t serial;
69 /* Name hash table. */
70 uint32_t namehash_offset;
71 uint32_t namehash_used;
72 uint32_t namehash_size;
73 /* String table. */
74 uint32_t string_offset;
75 uint32_t string_used;
76 uint32_t string_size;
77 /* Table with locale records. */
78 uint32_t locrectab_offset;
79 uint32_t locrectab_used;
80 uint32_t locrectab_size;
81 /* MD5 sum hash table. */
82 uint32_t sumhash_offset;
83 uint32_t sumhash_used;
84 uint32_t sumhash_size;
85 };
86
87 struct namehashent {
88 /* Hash value of the name. */
89 uint32_t hashval;
90 /* Offset of the name in the string table. */
91 uint32_t name_offset;
92 /* Offset of the locale record. */
93 uint32_t locrec_offset;
94 };
95
96 const struct locarhead *h;
97 const struct namehashent *e;
98 const void *p = MAP_FAILED;
254d1313 99 _cleanup_close_ int fd = -EBADF;
75683450
LP
100 size_t sz = 0;
101 struct stat st;
75683450
LP
102 int r;
103
104 fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
105 if (fd < 0)
106 return errno == ENOENT ? 0 : -errno;
107
108 if (fstat(fd, &st) < 0)
109 return -errno;
110
111 if (!S_ISREG(st.st_mode))
112 return -EBADMSG;
113
114 if (st.st_size < (off_t) sizeof(struct locarhead))
115 return -EBADMSG;
116
1a823cde
TS
117 if (file_offset_beyond_memory_size(st.st_size))
118 return -EFBIG;
119
75683450
LP
120 p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
121 if (p == MAP_FAILED)
122 return -errno;
123
124 h = (const struct locarhead *) p;
125 if (h->magic != 0xde020109 ||
126 h->namehash_offset + h->namehash_size > st.st_size ||
127 h->string_offset + h->string_size > st.st_size ||
128 h->locrectab_offset + h->locrectab_size > st.st_size ||
129 h->sumhash_offset + h->sumhash_size > st.st_size) {
130 r = -EBADMSG;
131 goto finish;
132 }
133
134 e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
0eacd185 135 for (size_t i = 0; i < h->namehash_size; i++) {
75683450
LP
136 char *z;
137
138 if (e[i].locrec_offset == 0)
139 continue;
140
141 if (!utf8_is_valid((char*) p + e[i].name_offset))
142 continue;
143
13f45806 144 z = normalize_locale((char*) p + e[i].name_offset);
75683450
LP
145 if (!z) {
146 r = -ENOMEM;
147 goto finish;
148 }
149
150 r = set_consume(locales, z);
151 if (r < 0)
152 goto finish;
153 }
154
155 r = 0;
156
157 finish:
158 if (p != MAP_FAILED)
159 munmap((void*) p, sz);
160
161 return r;
162}
163
065058e6 164static int add_locales_from_libdir(Set *locales) {
75683450 165 _cleanup_closedir_ DIR *dir = NULL;
75683450
LP
166 int r;
167
168 dir = opendir("/usr/lib/locale");
169 if (!dir)
170 return errno == ENOENT ? 0 : -errno;
171
af3b864d 172 FOREACH_DIRENT(de, dir, return -errno) {
75683450
LP
173 char *z;
174
af3b864d 175 if (de->d_type != DT_DIR)
75683450
LP
176 continue;
177
af3b864d 178 z = normalize_locale(de->d_name);
75683450
LP
179 if (!z)
180 return -ENOMEM;
181
182 r = set_consume(locales, z);
183 if (r < 0 && r != -EEXIST)
184 return r;
185 }
186
187 return 0;
188}
189
190int get_locales(char ***ret) {
065058e6 191 _cleanup_set_free_free_ Set *locales = NULL;
75683450
LP
192 _cleanup_strv_free_ char **l = NULL;
193 int r;
194
d5099efc 195 locales = set_new(&string_hash_ops);
75683450
LP
196 if (!locales)
197 return -ENOMEM;
198
199 r = add_locales_from_archive(locales);
200 if (r < 0 && r != -ENOENT)
201 return r;
202
203 r = add_locales_from_libdir(locales);
204 if (r < 0)
205 return r;
206
a2f79377
YW
207 char *locale;
208 SET_FOREACH(locale, locales) {
209 r = locale_is_installed(locale);
210 if (r < 0)
211 return r;
212 if (r == 0)
213 free(set_remove(locales, locale));
214 }
215
75683450
LP
216 l = set_get_strv(locales);
217 if (!l)
218 return -ENOMEM;
219
065058e6
YW
220 /* Now, all elements are owned by strv 'l'. Hence, do not call set_free_free(). */
221 locales = set_free(locales);
222
a7d9fccd 223 r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
c83f4220 224 if (IN_SET(r, -ENXIO, 0)) {
a7d9fccd
LP
225 char **a, **b;
226
227 /* Filter out non-UTF-8 locales, because it's 2019, by default */
228 for (a = b = l; *a; a++) {
229
230 if (endswith(*a, "UTF-8") ||
231 strstr(*a, ".UTF-8@"))
232 *(b++) = *a;
233 else
234 free(*a);
235 }
236
237 *b = NULL;
238
239 } else if (r < 0)
240 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
241
75683450
LP
242 strv_sort(l);
243
1cc6c93a 244 *ret = TAKE_PTR(l);
75683450
LP
245
246 return 0;
247}
248
249bool locale_is_valid(const char *name) {
250
251 if (isempty(name))
252 return false;
253
254 if (strlen(name) >= 128)
255 return false;
256
257 if (!utf8_is_valid(name))
258 return false;
259
ae6c3cc0 260 if (!filename_is_valid(name))
75683450
LP
261 return false;
262
fa485e8f
AV
263 /* Locales look like: ll_CC.ENC@variant, where ll and CC are alphabetic, ENC is alphanumeric with
264 * dashes, and variant seems to be alphabetic.
265 * See: https://www.gnu.org/software/gettext/manual/html_node/Locale-Names.html */
266 if (!in_charset(name, ALPHANUMERICAL "_.-@"))
75683450
LP
267 return false;
268
269 return true;
270}
a3428668 271
23fa786c
LP
272int locale_is_installed(const char *name) {
273 if (!locale_is_valid(name))
274 return false;
275
276 if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
277 return true;
278
279 _cleanup_(freelocalep) locale_t loc =
280 newlocale(LC_ALL_MASK, name, 0);
281 if (loc == (locale_t) 0)
282 return errno == ENOMEM ? -ENOMEM : false;
283
284 return true;
285}
286
8752c575 287bool is_locale_utf8(void) {
8752c575 288 static int cached_answer = -1;
71ea8436
LP
289 const char *set;
290 int r;
8752c575
LP
291
292 /* Note that we default to 'true' here, since today UTF8 is
293 * pretty much supported everywhere. */
294
295 if (cached_answer >= 0)
296 goto out;
297
efb9b3ba 298 r = secure_getenv_bool("SYSTEMD_UTF8");
71ea8436
LP
299 if (r >= 0) {
300 cached_answer = r;
301 goto out;
302 } else if (r != -ENXIO)
303 log_debug_errno(r, "Failed to parse $SYSTEMD_UTF8, ignoring: %m");
304
ca13432d
YW
305 /* This function may be called from libsystemd, and setlocale() is not thread safe. Assuming yes. */
306 if (gettid() != raw_getpid()) {
307 cached_answer = true;
308 goto out;
309 }
310
8752c575
LP
311 if (!setlocale(LC_ALL, "")) {
312 cached_answer = true;
313 goto out;
314 }
315
316 set = nl_langinfo(CODESET);
317 if (!set) {
318 cached_answer = true;
319 goto out;
320 }
321
322 if (streq(set, "UTF-8")) {
323 cached_answer = true;
324 goto out;
325 }
326
5238e957 327 /* For LC_CTYPE=="C" return true, because CTYPE is effectively
8752c575
LP
328 * unset and everything can do to UTF-8 nowadays. */
329 set = setlocale(LC_CTYPE, NULL);
330 if (!set) {
331 cached_answer = true;
332 goto out;
333 }
334
335 /* Check result, but ignore the result if C was set
336 * explicitly. */
337 cached_answer =
338 STR_IN_SET(set, "C", "POSIX") &&
339 !getenv("LC_ALL") &&
340 !getenv("LC_CTYPE") &&
341 !getenv("LANG");
342
343out:
344 return (bool) cached_answer;
345}
346
f2a3de01 347void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
24ae45cb 348 free_many_charp(l, _VARIABLE_LC_MAX);
e6755a33
LP
349}
350
3d36b5d7
YW
351void locale_variables_simplify(char *l[_VARIABLE_LC_MAX]) {
352 assert(l);
353
354 for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++) {
355 if (p == VARIABLE_LANG)
356 continue;
357 if (isempty(l[p]) || streq_ptr(l[VARIABLE_LANG], l[p]))
358 l[p] = mfree(l[p]);
359 }
360}
361
a3428668 362static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
d2e96a4f
YW
363 [VARIABLE_LANG] = "LANG",
364 [VARIABLE_LANGUAGE] = "LANGUAGE",
365 [VARIABLE_LC_CTYPE] = "LC_CTYPE",
366 [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
367 [VARIABLE_LC_TIME] = "LC_TIME",
368 [VARIABLE_LC_COLLATE] = "LC_COLLATE",
369 [VARIABLE_LC_MONETARY] = "LC_MONETARY",
370 [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
371 [VARIABLE_LC_PAPER] = "LC_PAPER",
372 [VARIABLE_LC_NAME] = "LC_NAME",
373 [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
374 [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
375 [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
a3428668
MS
376 [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
377};
378
379DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);