]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/loadarchive.c
NEWS: Add advisories.
[thirdparty/glibc.git] / locale / loadarchive.c
CommitLineData
cb09a2cd 1/* Code to load locale data from the locale archive file.
dff8da6b 2 Copyright (C) 2002-2024 Free Software Foundation, Inc.
cb09a2cd
RM
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
cb09a2cd
RM
18
19#include <locale.h>
20#include <stddef.h>
c0ad824e 21#include <stdlib.h>
cb09a2cd
RM
22#include <stdbool.h>
23#include <errno.h>
24#include <assert.h>
25#include <string.h>
26#include <fcntl.h>
27#include <unistd.h>
e054f494 28#include <stdint.h>
cb09a2cd
RM
29#include <sys/mman.h>
30#include <sys/stat.h>
31#include <sys/param.h>
32
33#include "localeinfo.h"
34#include "locarchive.h"
8dab36a1 35#include <not-cancel.h>
cb09a2cd
RM
36
37/* Define the hash function. We define the function as static inline. */
38#define compute_hashval static inline compute_hashval
a3f9038c 39#define hashval_t uint32_t
cb09a2cd
RM
40#include "hashval.h"
41#undef compute_hashval
42
cb09a2cd
RM
43
44/* Name of the locale archive file. */
90fe682d 45static const char archfname[] = COMPLOCALEDIR "/locale-archive";
cb09a2cd 46
5bb99914
RM
47/* Size of initial mapping window, optimal if large enough to
48 cover the header plus the initial locale. */
49#define ARCHIVE_MAPPING_WINDOW (2 * 1024 * 1024)
50
4e20f1e2
RM
51#ifndef MAP_COPY
52/* This is not quite as good as MAP_COPY since unexamined pages
53 can change out from under us and give us inconsistent data.
54 But we rely on the user not to diddle the system's live archive.
55 Even though we only ever use PROT_READ, using MAP_SHARED would
56 not give the system sufficient freedom to e.g. let the on disk
57 file go away because it doesn't know we won't call mprotect later. */
58# define MAP_COPY MAP_PRIVATE
59#endif
60#ifndef MAP_FILE
61 /* Some systems do not have this flag; it is superfluous. */
62# define MAP_FILE 0
63#endif
cb09a2cd
RM
64
65/* Record of contiguous pages already mapped from the locale archive. */
66struct archmapped
67{
68 void *ptr;
69 uint32_t from;
70 uint32_t len;
71 struct archmapped *next;
72};
73static struct archmapped *archmapped;
74
75/* This describes the mapping at the beginning of the file that contains
76 the header data. There could be data in the following partial page,
77 so this is searched like any other. Once the archive has been used,
78 ARCHMAPPED points to this; if mapping the archive header failed,
79 then headmap.ptr is null. */
80static struct archmapped headmap;
52a5fe70 81static struct __stat64_t64 archive_stat; /* stat of archive when header mapped. */
cb09a2cd
RM
82
83/* Record of locales that we have already loaded from the archive. */
84struct locale_in_archive
85{
86 struct locale_in_archive *next;
dac68e4c 87 char *name;
f095bb72 88 struct __locale_data *data[__LC_LAST];
cb09a2cd
RM
89};
90static struct locale_in_archive *archloaded;
91
92
93/* Local structure and subroutine of _nl_load_archive, see below. */
94struct range
95{
96 uint32_t from;
97 uint32_t len;
98 int category;
99 void *result;
100};
101
102static int
103rangecmp (const void *p1, const void *p2)
104{
105 return ((struct range *) p1)->from - ((struct range *) p2)->from;
106}
107
108
109/* Calculate the amount of space needed for all the tables described
110 by the given header. Note we do not include the empty table space
111 that has been preallocated in the file, so our mapping may not be
112 large enough if localedef adds data to the file in place. However,
113 doing that would permute the header fields while we are accessing
114 them and thus not be safe anyway, so we don't allow for that. */
115static inline off_t
116calculate_head_size (const struct locarhead *h)
117{
118 off_t namehash_end = (h->namehash_offset
119 + h->namehash_size * sizeof (struct namehashent));
120 off_t string_end = h->string_offset + h->string_used;
121 off_t locrectab_end = (h->locrectab_offset
122 + h->locrectab_used * sizeof (struct locrecent));
123 return MAX (namehash_end, MAX (string_end, locrectab_end));
124}
125
126
127/* Find the locale *NAMEP in the locale archive, and return the
128 internalized data structure for its CATEGORY data. If this locale has
129 already been loaded from the archive, just returns the existing data
130 structure. If successful, sets *NAMEP to point directly into the mapped
131 archive string table; that way, the next call can short-circuit strcmp. */
f095bb72 132struct __locale_data *
cb09a2cd
RM
133_nl_load_locale_from_archive (int category, const char **namep)
134{
135 const char *name = *namep;
136 struct
137 {
138 void *addr;
139 size_t len;
140 } results[__LC_LAST];
141 struct locale_in_archive *lia;
142 struct locarhead *head;
143 struct namehashent *namehashtab;
144 struct locrecent *locrec;
145 struct archmapped *mapped;
146 struct archmapped *last;
147 unsigned long int hval;
148 size_t idx;
149 size_t incr;
150 struct range ranges[__LC_LAST - 1];
151 int nranges;
152 int cnt;
153 size_t ps = __sysconf (_SC_PAGE_SIZE);
154 int fd = -1;
155
156 /* Check if we have already loaded this locale from the archive.
157 If we previously loaded the locale but found bogons in the data,
158 then we will have stored a null pointer to return here. */
159 for (lia = archloaded; lia != NULL; lia = lia->next)
160 if (name == lia->name || !strcmp (name, lia->name))
161 {
162 *namep = lia->name;
163 return lia->data[category];
164 }
165
166 {
167 /* If the name contains a codeset, then we normalize the name before
168 doing the lookup. */
169 const char *p = strchr (name, '.');
170 if (p != NULL && p[1] != '@' && p[1] != '\0')
171 {
172 const char *rest = __strchrnul (++p, '@');
173 const char *normalized_codeset = _nl_normalize_codeset (p, rest - p);
174 if (normalized_codeset == NULL) /* malloc failure */
175 return NULL;
176 if (strncmp (normalized_codeset, p, rest - p) != 0
177 || normalized_codeset[rest - p] != '\0')
178 {
179 /* There is a normalized codeset name that is different from
180 what was specified; reconstruct a new locale name using it. */
181 size_t normlen = strlen (normalized_codeset);
182 size_t restlen = strlen (rest) + 1;
183 char *newname = alloca (p - name + normlen + restlen);
184 memcpy (__mempcpy (__mempcpy (newname, name, p - name),
185 normalized_codeset, normlen),
186 rest, restlen);
cb09a2cd
RM
187 name = newname;
188 }
641fc4a0 189 free ((char *) normalized_codeset);
cb09a2cd
RM
190 }
191 }
192
193 /* Make sure the archive is loaded. */
194 if (archmapped == NULL)
195 {
5bb99914
RM
196 void *result;
197 size_t headsize, mapsize;
198
cb09a2cd
RM
199 /* We do this early as a sign that we have tried to open the archive.
200 If headmap.ptr remains null, that's an indication that we tried
201 and failed, so we won't try again. */
202 archmapped = &headmap;
203
204 /* The archive has never been opened. */
c2284574 205 fd = __open_nocancel (archfname, O_RDONLY|O_LARGEFILE|O_CLOEXEC);
cb09a2cd
RM
206 if (fd < 0)
207 /* Cannot open the archive, for whatever reason. */
208 return NULL;
209
52a5fe70 210 if (__fstat64_time64 (fd, &archive_stat) == -1)
cb09a2cd
RM
211 {
212 /* stat failed, very strange. */
213 close_and_out:
dd4f2115 214 if (fd >= 0)
c181840c 215 __close_nocancel_nostatus (fd);
cb09a2cd
RM
216 return NULL;
217 }
218
cb09a2cd 219
5bb99914
RM
220 /* Map an initial window probably large enough to cover the header
221 and the first locale's data. With a large address space, we can
222 just map the whole file and be sure everything is covered. */
cb09a2cd 223
5bb99914 224 mapsize = (sizeof (void *) > 4 ? archive_stat.st_size
c88b4759 225 : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW));
5bb99914 226
4e20f1e2 227 result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0);
5bb99914
RM
228 if (result == MAP_FAILED)
229 goto close_and_out;
230
231 /* Check whether the file is large enough for the sizes given in
232 the header. Theoretically an archive could be so large that
233 just the header fails to fit in our initial mapping window. */
234 headsize = calculate_head_size ((const struct locarhead *) result);
235 if (headsize > mapsize)
236 {
237 (void) __munmap (result, mapsize);
238 if (sizeof (void *) > 4 || headsize > archive_stat.st_size)
239 /* The file is not big enough for the header. Bogus. */
cb09a2cd 240 goto close_and_out;
5bb99914
RM
241
242 /* Freakishly long header. */
243 /* XXX could use mremap when available */
244 mapsize = (headsize + ps - 1) & ~(ps - 1);
4e20f1e2
RM
245 result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY,
246 fd, 0);
cb09a2cd
RM
247 if (result == MAP_FAILED)
248 goto close_and_out;
5bb99914 249 }
cb09a2cd 250
5bb99914
RM
251 if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size)
252 {
253 /* We've mapped the whole file already, so we can be
254 sure we won't need this file descriptor later. */
c181840c 255 __close_nocancel_nostatus (fd);
5bb99914 256 fd = -1;
cb09a2cd 257 }
5bb99914
RM
258
259 headmap.ptr = result;
260 /* headmap.from already initialized to zero. */
261 headmap.len = mapsize;
cb09a2cd
RM
262 }
263
264 /* If there is no archive or it cannot be loaded for some reason fail. */
a1ffb40e 265 if (__glibc_unlikely (headmap.ptr == NULL))
dd4f2115 266 goto close_and_out;
cb09a2cd
RM
267
268 /* We have the archive available. To find the name we first have to
269 determine its hash value. */
270 hval = compute_hashval (name, strlen (name));
271
272 head = headmap.ptr;
273 namehashtab = (struct namehashent *) ((char *) head
274 + head->namehash_offset);
275
18d14251 276 /* Avoid division by 0 if the file is corrupted. */
ef21bd2d 277 if (__glibc_unlikely (head->namehash_size <= 2))
18d14251
AJ
278 goto close_and_out;
279
cb09a2cd
RM
280 idx = hval % head->namehash_size;
281 incr = 1 + hval % (head->namehash_size - 2);
282
283 /* If the name_offset field is zero this means this is a
284 deleted entry and therefore no entry can be found. */
285 while (1)
286 {
287 if (namehashtab[idx].name_offset == 0)
288 /* Not found. */
dd4f2115 289 goto close_and_out;
cb09a2cd
RM
290
291 if (namehashtab[idx].hashval == hval
292 && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0)
293 /* Found the entry. */
294 break;
295
296 idx += incr;
297 if (idx >= head->namehash_size)
298 idx -= head->namehash_size;
299 }
300
301 /* We found an entry. It might be a placeholder for a removed one. */
302 if (namehashtab[idx].locrec_offset == 0)
dd4f2115 303 goto close_and_out;
cb09a2cd
RM
304
305 locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset);
306
307 if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */)
308 {
309 /* We already have the whole locale archive mapped in. */
310 assert (headmap.len == archive_stat.st_size);
311 for (cnt = 0; cnt < __LC_LAST; ++cnt)
312 if (cnt != LC_ALL)
313 {
314 if (locrec->record[cnt].offset + locrec->record[cnt].len
315 > headmap.len)
316 /* The archive locrectab contains bogus offsets. */
dd4f2115 317 goto close_and_out;
cb09a2cd
RM
318 results[cnt].addr = headmap.ptr + locrec->record[cnt].offset;
319 results[cnt].len = locrec->record[cnt].len;
320 }
321 }
322 else
323 {
324 /* Get the offsets of the data files and sort them. */
325 for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt)
326 if (cnt != LC_ALL)
327 {
328 ranges[nranges].from = locrec->record[cnt].offset;
329 ranges[nranges].len = locrec->record[cnt].len;
330 ranges[nranges].category = cnt;
331 ranges[nranges].result = NULL;
332
333 ++nranges;
334 }
335
336 qsort (ranges, nranges, sizeof (ranges[0]), rangecmp);
337
338 /* The information about mmap'd blocks is kept in a list.
339 Skip over the blocks which are before the data we need. */
340 last = mapped = archmapped;
341 for (cnt = 0; cnt < nranges; ++cnt)
342 {
343 int upper;
344 size_t from;
345 size_t to;
346 void *addr;
347 struct archmapped *newp;
348
349 /* Determine whether the appropriate page is already mapped. */
350 while (mapped != NULL
b5560a44
RM
351 && (mapped->from + mapped->len
352 <= ranges[cnt].from + ranges[cnt].len))
cb09a2cd
RM
353 {
354 last = mapped;
355 mapped = mapped->next;
356 }
357
358 /* Do we have a match? */
359 if (mapped != NULL
360 && mapped->from <= ranges[cnt].from
b5560a44
RM
361 && (ranges[cnt].from + ranges[cnt].len
362 <= mapped->from + mapped->len))
cb09a2cd
RM
363 {
364 /* Yep, already loaded. */
365 results[ranges[cnt].category].addr = ((char *) mapped->ptr
366 + ranges[cnt].from
367 - mapped->from);
368 results[ranges[cnt].category].len = ranges[cnt].len;
369 continue;
370 }
371
372 /* Map the range with the locale data from the file. We will
373 try to cover as much of the locale as possible. I.e., if the
374 next category (next as in "next offset") is on the current or
375 immediately following page we use it as well. */
376 assert (powerof2 (ps));
377 from = ranges[cnt].from & ~(ps - 1);
378 upper = cnt;
379 do
380 {
4e20f1e2 381 to = ranges[upper].from + ranges[upper].len;
6dd67bd5 382 if (to > (size_t) archive_stat.st_size)
4e20f1e2 383 /* The archive locrectab contains bogus offsets. */
dd4f2115 384 goto close_and_out;
4e20f1e2
RM
385 to = (to + ps - 1) & ~(ps - 1);
386
b5560a44
RM
387 /* If a range is already mmaped in, stop. */
388 if (mapped != NULL && ranges[upper].from >= mapped->from)
389 break;
4e20f1e2 390
cb09a2cd
RM
391 ++upper;
392 }
393 /* Loop while still in contiguous pages. */
394 while (upper < nranges && ranges[upper].from < to + ps);
395
cb09a2cd
RM
396 /* Open the file if it hasn't happened yet. */
397 if (fd == -1)
398 {
52a5fe70 399 struct __stat64_t64 st;
c2284574
AZ
400 fd = __open_nocancel (archfname,
401 O_RDONLY|O_LARGEFILE|O_CLOEXEC);
cb09a2cd
RM
402 if (fd == -1)
403 /* Cannot open the archive, for whatever reason. */
404 return NULL;
405 /* Now verify we think this is really the same archive file
406 we opened before. If it has been changed we cannot trust
407 the header we read previously. */
52a5fe70 408 if (__fstat64_time64 (fd, &st) < 0
cb09a2cd
RM
409 || st.st_size != archive_stat.st_size
410 || st.st_mtime != archive_stat.st_mtime
411 || st.st_dev != archive_stat.st_dev
412 || st.st_ino != archive_stat.st_ino)
dd4f2115 413 goto close_and_out;
cb09a2cd
RM
414 }
415
416 /* Map the range from the archive. */
4e20f1e2
RM
417 addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY,
418 fd, from);
cb09a2cd 419 if (addr == MAP_FAILED)
dd4f2115 420 goto close_and_out;
cb09a2cd
RM
421
422 /* Allocate a record for this mapping. */
423 newp = (struct archmapped *) malloc (sizeof (struct archmapped));
424 if (newp == NULL)
425 {
7a8bdff0 426 (void) __munmap (addr, to - from);
dd4f2115 427 goto close_and_out;
cb09a2cd
RM
428 }
429
430 /* And queue it. */
431 newp->ptr = addr;
432 newp->from = from;
433 newp->len = to - from;
434 assert (last->next == mapped);
435 newp->next = mapped;
436 last->next = newp;
437 last = newp;
438
439 /* Determine the load addresses for the category data. */
440 do
441 {
442 assert (ranges[cnt].from >= from);
443 results[ranges[cnt].category].addr = ((char *) addr
444 + ranges[cnt].from - from);
445 results[ranges[cnt].category].len = ranges[cnt].len;
446 }
447 while (++cnt < upper);
448 --cnt; /* The 'for' will increase 'cnt' again. */
449 }
450 }
451
dd4f2115
UD
452 /* We don't need the file descriptor any longer. */
453 if (fd >= 0)
c181840c 454 __close_nocancel_nostatus (fd);
dd4f2115
UD
455 fd = -1;
456
cb09a2cd
RM
457 /* We succeeded in mapping all the necessary regions of the archive.
458 Now we need the expected data structures to point into the data. */
459
460 lia = malloc (sizeof *lia);
a1ffb40e 461 if (__glibc_unlikely (lia == NULL))
cb09a2cd
RM
462 return NULL;
463
ae65d4f3 464 lia->name = __strdup (*namep);
a1ffb40e 465 if (__glibc_unlikely (lia->name == NULL))
dac68e4c
RM
466 {
467 free (lia);
468 return NULL;
469 }
470
cb09a2cd
RM
471 lia->next = archloaded;
472 archloaded = lia;
473
474 for (cnt = 0; cnt < __LC_LAST; ++cnt)
475 if (cnt != LC_ALL)
476 {
477 lia->data[cnt] = _nl_intern_locale_data (cnt,
478 results[cnt].addr,
479 results[cnt].len);
a1ffb40e 480 if (__glibc_likely (lia->data[cnt] != NULL))
cb09a2cd
RM
481 {
482 /* _nl_intern_locale_data leaves us these fields to initialize. */
483 lia->data[cnt]->alloc = ld_archive;
484 lia->data[cnt]->name = lia->name;
0f283ffc
RM
485
486 /* We do this instead of bumping the count each time we return
487 this data because the mappings stay around forever anyway
488 and we might as well hold on to a little more memory and not
489 have to rebuild it on the next lookup of the same thing.
490 If we were to maintain the usage_count normally and let the
491 structures be freed, we would have to remove the elements
492 from archloaded too. */
493 lia->data[cnt]->usage_count = UNDELETABLE;
cb09a2cd
RM
494 }
495 }
496
497 *namep = lia->name;
498 return lia->data[category];
499}
a89a3dab 500
88677348 501void
a89a3dab
RM
502_nl_archive_subfreeres (void)
503{
504 struct locale_in_archive *lia;
505 struct archmapped *am;
506
507 /* Toss out our cached locales. */
508 lia = archloaded;
509 while (lia != NULL)
510 {
511 int category;
512 struct locale_in_archive *dead = lia;
513 lia = lia->next;
514
dac68e4c 515 free (dead->name);
a89a3dab 516 for (category = 0; category < __LC_LAST; ++category)
782723d6 517 if (category != LC_ALL && dead->data[category] != NULL)
bbebe83a 518 _nl_unload_locale (category, dead->data[category]);
a89a3dab
RM
519 free (dead);
520 }
521 archloaded = NULL;
522
523 if (archmapped != NULL)
524 {
525 /* Now toss all the mapping windows, which we know nothing is using any
526 more because we just tossed all the locales that point into them. */
527
528 assert (archmapped == &headmap);
529 archmapped = NULL;
7a8bdff0 530 (void) __munmap (headmap.ptr, headmap.len);
a89a3dab
RM
531 am = headmap.next;
532 while (am != NULL)
533 {
534 struct archmapped *dead = am;
535 am = am->next;
7a8bdff0 536 (void) __munmap (dead->ptr, dead->len);
a89a3dab
RM
537 free (dead);
538 }
539 }
540}