]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/loadarchive.c
* sysdeps/generic/libc-tls.c (__pthread_initialize_minimal): Pass
[thirdparty/glibc.git] / locale / loadarchive.c
CommitLineData
cb09a2cd
RM
1/* Code to load locale data from the locale archive file.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
19
20#include <locale.h>
21#include <stddef.h>
c0ad824e 22#include <stdlib.h>
cb09a2cd
RM
23#include <stdbool.h>
24#include <errno.h>
25#include <assert.h>
26#include <string.h>
27#include <fcntl.h>
28#include <unistd.h>
29#include <sys/mman.h>
30#include <sys/stat.h>
31#include <sys/param.h>
32
33#include "localeinfo.h"
34#include "locarchive.h"
35
36/* Define the hash function. We define the function as static inline. */
37#define compute_hashval static inline compute_hashval
a3f9038c 38#define hashval_t uint32_t
cb09a2cd
RM
39#include "hashval.h"
40#undef compute_hashval
41
cb09a2cd
RM
42
43/* Name of the locale archive file. */
7c6af012 44static const char archfname[] = LOCALEDIR "/locale-archive";
cb09a2cd 45
5bb99914
RM
46/* Size of initial mapping window, optimal if large enough to
47 cover the header plus the initial locale. */
48#define ARCHIVE_MAPPING_WINDOW (2 * 1024 * 1024)
49
4e20f1e2
RM
50#ifndef MAP_COPY
51/* This is not quite as good as MAP_COPY since unexamined pages
52 can change out from under us and give us inconsistent data.
53 But we rely on the user not to diddle the system's live archive.
54 Even though we only ever use PROT_READ, using MAP_SHARED would
55 not give the system sufficient freedom to e.g. let the on disk
56 file go away because it doesn't know we won't call mprotect later. */
57# define MAP_COPY MAP_PRIVATE
58#endif
59#ifndef MAP_FILE
60 /* Some systems do not have this flag; it is superfluous. */
61# define MAP_FILE 0
62#endif
cb09a2cd
RM
63
64/* Record of contiguous pages already mapped from the locale archive. */
65struct archmapped
66{
67 void *ptr;
68 uint32_t from;
69 uint32_t len;
70 struct archmapped *next;
71};
72static struct archmapped *archmapped;
73
74/* This describes the mapping at the beginning of the file that contains
75 the header data. There could be data in the following partial page,
76 so this is searched like any other. Once the archive has been used,
77 ARCHMAPPED points to this; if mapping the archive header failed,
78 then headmap.ptr is null. */
79static struct archmapped headmap;
80static struct stat64 archive_stat; /* stat of archive when header mapped. */
81
82/* Record of locales that we have already loaded from the archive. */
83struct locale_in_archive
84{
85 struct locale_in_archive *next;
dac68e4c 86 char *name;
cb09a2cd
RM
87 struct locale_data *data[__LC_LAST];
88};
89static struct locale_in_archive *archloaded;
90
91
92/* Local structure and subroutine of _nl_load_archive, see below. */
93struct range
94{
95 uint32_t from;
96 uint32_t len;
97 int category;
98 void *result;
99};
100
101static int
102rangecmp (const void *p1, const void *p2)
103{
104 return ((struct range *) p1)->from - ((struct range *) p2)->from;
105}
106
107
108/* Calculate the amount of space needed for all the tables described
109 by the given header. Note we do not include the empty table space
110 that has been preallocated in the file, so our mapping may not be
111 large enough if localedef adds data to the file in place. However,
112 doing that would permute the header fields while we are accessing
113 them and thus not be safe anyway, so we don't allow for that. */
114static inline off_t
115calculate_head_size (const struct locarhead *h)
116{
117 off_t namehash_end = (h->namehash_offset
118 + h->namehash_size * sizeof (struct namehashent));
119 off_t string_end = h->string_offset + h->string_used;
120 off_t locrectab_end = (h->locrectab_offset
121 + h->locrectab_used * sizeof (struct locrecent));
122 return MAX (namehash_end, MAX (string_end, locrectab_end));
123}
124
125
126/* Find the locale *NAMEP in the locale archive, and return the
127 internalized data structure for its CATEGORY data. If this locale has
128 already been loaded from the archive, just returns the existing data
129 structure. If successful, sets *NAMEP to point directly into the mapped
130 archive string table; that way, the next call can short-circuit strcmp. */
131struct locale_data *
132internal_function
133_nl_load_locale_from_archive (int category, const char **namep)
134{
135 const char *name = *namep;
136 struct
137 {
138 void *addr;
139 size_t len;
140 } results[__LC_LAST];
141 struct locale_in_archive *lia;
142 struct locarhead *head;
143 struct namehashent *namehashtab;
144 struct locrecent *locrec;
145 struct archmapped *mapped;
146 struct archmapped *last;
147 unsigned long int hval;
148 size_t idx;
149 size_t incr;
150 struct range ranges[__LC_LAST - 1];
151 int nranges;
152 int cnt;
153 size_t ps = __sysconf (_SC_PAGE_SIZE);
154 int fd = -1;
155
156 /* Check if we have already loaded this locale from the archive.
157 If we previously loaded the locale but found bogons in the data,
158 then we will have stored a null pointer to return here. */
159 for (lia = archloaded; lia != NULL; lia = lia->next)
160 if (name == lia->name || !strcmp (name, lia->name))
161 {
162 *namep = lia->name;
163 return lia->data[category];
164 }
165
166 {
167 /* If the name contains a codeset, then we normalize the name before
168 doing the lookup. */
169 const char *p = strchr (name, '.');
170 if (p != NULL && p[1] != '@' && p[1] != '\0')
171 {
172 const char *rest = __strchrnul (++p, '@');
173 const char *normalized_codeset = _nl_normalize_codeset (p, rest - p);
174 if (normalized_codeset == NULL) /* malloc failure */
175 return NULL;
176 if (strncmp (normalized_codeset, p, rest - p) != 0
177 || normalized_codeset[rest - p] != '\0')
178 {
179 /* There is a normalized codeset name that is different from
180 what was specified; reconstruct a new locale name using it. */
181 size_t normlen = strlen (normalized_codeset);
182 size_t restlen = strlen (rest) + 1;
183 char *newname = alloca (p - name + normlen + restlen);
184 memcpy (__mempcpy (__mempcpy (newname, name, p - name),
185 normalized_codeset, normlen),
186 rest, restlen);
187 free ((char *) normalized_codeset);
188 name = newname;
189 }
190 }
191 }
192
193 /* Make sure the archive is loaded. */
194 if (archmapped == NULL)
195 {
5bb99914
RM
196 void *result;
197 size_t headsize, mapsize;
198
cb09a2cd
RM
199 /* We do this early as a sign that we have tried to open the archive.
200 If headmap.ptr remains null, that's an indication that we tried
201 and failed, so we won't try again. */
202 archmapped = &headmap;
203
204 /* The archive has never been opened. */
205 fd = __open64 (archfname, O_RDONLY);
206 if (fd < 0)
207 /* Cannot open the archive, for whatever reason. */
208 return NULL;
209
210 if (__fxstat64 (_STAT_VER, fd, &archive_stat) == -1)
211 {
212 /* stat failed, very strange. */
213 close_and_out:
dd4f2115
UD
214 if (fd >= 0)
215 __close (fd);
cb09a2cd
RM
216 return NULL;
217 }
218
cb09a2cd 219
5bb99914
RM
220 /* Map an initial window probably large enough to cover the header
221 and the first locale's data. With a large address space, we can
222 just map the whole file and be sure everything is covered. */
cb09a2cd 223
5bb99914 224 mapsize = (sizeof (void *) > 4 ? archive_stat.st_size
c88b4759 225 : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW));
5bb99914 226
4e20f1e2 227 result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0);
5bb99914
RM
228 if (result == MAP_FAILED)
229 goto close_and_out;
230
231 /* Check whether the file is large enough for the sizes given in
232 the header. Theoretically an archive could be so large that
233 just the header fails to fit in our initial mapping window. */
234 headsize = calculate_head_size ((const struct locarhead *) result);
235 if (headsize > mapsize)
236 {
237 (void) __munmap (result, mapsize);
238 if (sizeof (void *) > 4 || headsize > archive_stat.st_size)
239 /* The file is not big enough for the header. Bogus. */
cb09a2cd 240 goto close_and_out;
5bb99914
RM
241
242 /* Freakishly long header. */
243 /* XXX could use mremap when available */
244 mapsize = (headsize + ps - 1) & ~(ps - 1);
4e20f1e2
RM
245 result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY,
246 fd, 0);
cb09a2cd
RM
247 if (result == MAP_FAILED)
248 goto close_and_out;
5bb99914 249 }
cb09a2cd 250
5bb99914
RM
251 if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size)
252 {
253 /* We've mapped the whole file already, so we can be
254 sure we won't need this file descriptor later. */
255 __close (fd);
256 fd = -1;
cb09a2cd 257 }
5bb99914
RM
258
259 headmap.ptr = result;
260 /* headmap.from already initialized to zero. */
261 headmap.len = mapsize;
cb09a2cd
RM
262 }
263
264 /* If there is no archive or it cannot be loaded for some reason fail. */
265 if (__builtin_expect (headmap.ptr == NULL, 0))
dd4f2115 266 goto close_and_out;
cb09a2cd
RM
267
268 /* We have the archive available. To find the name we first have to
269 determine its hash value. */
270 hval = compute_hashval (name, strlen (name));
271
272 head = headmap.ptr;
273 namehashtab = (struct namehashent *) ((char *) head
274 + head->namehash_offset);
275
276 idx = hval % head->namehash_size;
277 incr = 1 + hval % (head->namehash_size - 2);
278
279 /* If the name_offset field is zero this means this is a
280 deleted entry and therefore no entry can be found. */
281 while (1)
282 {
283 if (namehashtab[idx].name_offset == 0)
284 /* Not found. */
dd4f2115 285 goto close_and_out;
cb09a2cd
RM
286
287 if (namehashtab[idx].hashval == hval
288 && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0)
289 /* Found the entry. */
290 break;
291
292 idx += incr;
293 if (idx >= head->namehash_size)
294 idx -= head->namehash_size;
295 }
296
297 /* We found an entry. It might be a placeholder for a removed one. */
298 if (namehashtab[idx].locrec_offset == 0)
dd4f2115 299 goto close_and_out;
cb09a2cd
RM
300
301 locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset);
302
303 if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */)
304 {
305 /* We already have the whole locale archive mapped in. */
306 assert (headmap.len == archive_stat.st_size);
307 for (cnt = 0; cnt < __LC_LAST; ++cnt)
308 if (cnt != LC_ALL)
309 {
310 if (locrec->record[cnt].offset + locrec->record[cnt].len
311 > headmap.len)
312 /* The archive locrectab contains bogus offsets. */
dd4f2115 313 goto close_and_out;
cb09a2cd
RM
314 results[cnt].addr = headmap.ptr + locrec->record[cnt].offset;
315 results[cnt].len = locrec->record[cnt].len;
316 }
317 }
318 else
319 {
320 /* Get the offsets of the data files and sort them. */
321 for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt)
322 if (cnt != LC_ALL)
323 {
324 ranges[nranges].from = locrec->record[cnt].offset;
325 ranges[nranges].len = locrec->record[cnt].len;
326 ranges[nranges].category = cnt;
327 ranges[nranges].result = NULL;
328
329 ++nranges;
330 }
331
332 qsort (ranges, nranges, sizeof (ranges[0]), rangecmp);
333
334 /* The information about mmap'd blocks is kept in a list.
335 Skip over the blocks which are before the data we need. */
336 last = mapped = archmapped;
337 for (cnt = 0; cnt < nranges; ++cnt)
338 {
339 int upper;
340 size_t from;
341 size_t to;
342 void *addr;
343 struct archmapped *newp;
344
345 /* Determine whether the appropriate page is already mapped. */
346 while (mapped != NULL
b5560a44
RM
347 && (mapped->from + mapped->len
348 <= ranges[cnt].from + ranges[cnt].len))
cb09a2cd
RM
349 {
350 last = mapped;
351 mapped = mapped->next;
352 }
353
354 /* Do we have a match? */
355 if (mapped != NULL
356 && mapped->from <= ranges[cnt].from
b5560a44
RM
357 && (ranges[cnt].from + ranges[cnt].len
358 <= mapped->from + mapped->len))
cb09a2cd
RM
359 {
360 /* Yep, already loaded. */
361 results[ranges[cnt].category].addr = ((char *) mapped->ptr
362 + ranges[cnt].from
363 - mapped->from);
364 results[ranges[cnt].category].len = ranges[cnt].len;
365 continue;
366 }
367
368 /* Map the range with the locale data from the file. We will
369 try to cover as much of the locale as possible. I.e., if the
370 next category (next as in "next offset") is on the current or
371 immediately following page we use it as well. */
372 assert (powerof2 (ps));
373 from = ranges[cnt].from & ~(ps - 1);
374 upper = cnt;
375 do
376 {
4e20f1e2 377 to = ranges[upper].from + ranges[upper].len;
6dd67bd5 378 if (to > (size_t) archive_stat.st_size)
4e20f1e2 379 /* The archive locrectab contains bogus offsets. */
dd4f2115 380 goto close_and_out;
4e20f1e2
RM
381 to = (to + ps - 1) & ~(ps - 1);
382
b5560a44
RM
383 /* If a range is already mmaped in, stop. */
384 if (mapped != NULL && ranges[upper].from >= mapped->from)
385 break;
4e20f1e2 386
cb09a2cd
RM
387 ++upper;
388 }
389 /* Loop while still in contiguous pages. */
390 while (upper < nranges && ranges[upper].from < to + ps);
391
cb09a2cd
RM
392 /* Open the file if it hasn't happened yet. */
393 if (fd == -1)
394 {
395 struct stat64 st;
396 fd = __open64 (archfname, O_RDONLY);
397 if (fd == -1)
398 /* Cannot open the archive, for whatever reason. */
399 return NULL;
400 /* Now verify we think this is really the same archive file
401 we opened before. If it has been changed we cannot trust
402 the header we read previously. */
403 if (__fxstat64 (_STAT_VER, fd, &st) < 0
404 || st.st_size != archive_stat.st_size
405 || st.st_mtime != archive_stat.st_mtime
406 || st.st_dev != archive_stat.st_dev
407 || st.st_ino != archive_stat.st_ino)
dd4f2115 408 goto close_and_out;
cb09a2cd
RM
409 }
410
411 /* Map the range from the archive. */
4e20f1e2
RM
412 addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY,
413 fd, from);
cb09a2cd 414 if (addr == MAP_FAILED)
dd4f2115 415 goto close_and_out;
cb09a2cd
RM
416
417 /* Allocate a record for this mapping. */
418 newp = (struct archmapped *) malloc (sizeof (struct archmapped));
419 if (newp == NULL)
420 {
7a8bdff0 421 (void) __munmap (addr, to - from);
dd4f2115 422 goto close_and_out;
cb09a2cd
RM
423 }
424
425 /* And queue it. */
426 newp->ptr = addr;
427 newp->from = from;
428 newp->len = to - from;
429 assert (last->next == mapped);
430 newp->next = mapped;
431 last->next = newp;
432 last = newp;
433
434 /* Determine the load addresses for the category data. */
435 do
436 {
437 assert (ranges[cnt].from >= from);
438 results[ranges[cnt].category].addr = ((char *) addr
439 + ranges[cnt].from - from);
440 results[ranges[cnt].category].len = ranges[cnt].len;
441 }
442 while (++cnt < upper);
443 --cnt; /* The 'for' will increase 'cnt' again. */
444 }
445 }
446
dd4f2115
UD
447 /* We don't need the file descriptor any longer. */
448 if (fd >= 0)
449 __close (fd);
450 fd = -1;
451
cb09a2cd
RM
452 /* We succeeded in mapping all the necessary regions of the archive.
453 Now we need the expected data structures to point into the data. */
454
455 lia = malloc (sizeof *lia);
456 if (__builtin_expect (lia == NULL, 0))
457 return NULL;
458
dac68e4c
RM
459 lia->name = strdup (*namep);
460 if (__builtin_expect (lia->name == NULL, 0))
461 {
462 free (lia);
463 return NULL;
464 }
465
cb09a2cd
RM
466 lia->next = archloaded;
467 archloaded = lia;
468
469 for (cnt = 0; cnt < __LC_LAST; ++cnt)
470 if (cnt != LC_ALL)
471 {
472 lia->data[cnt] = _nl_intern_locale_data (cnt,
473 results[cnt].addr,
474 results[cnt].len);
475 if (__builtin_expect (lia->data[cnt] != NULL, 1))
476 {
477 /* _nl_intern_locale_data leaves us these fields to initialize. */
478 lia->data[cnt]->alloc = ld_archive;
479 lia->data[cnt]->name = lia->name;
0f283ffc
RM
480
481 /* We do this instead of bumping the count each time we return
482 this data because the mappings stay around forever anyway
483 and we might as well hold on to a little more memory and not
484 have to rebuild it on the next lookup of the same thing.
485 If we were to maintain the usage_count normally and let the
486 structures be freed, we would have to remove the elements
487 from archloaded too. */
488 lia->data[cnt]->usage_count = UNDELETABLE;
cb09a2cd
RM
489 }
490 }
491
492 *namep = lia->name;
493 return lia->data[category];
494}
a89a3dab
RM
495
496void
497_nl_archive_subfreeres (void)
498{
499 struct locale_in_archive *lia;
500 struct archmapped *am;
501
502 /* Toss out our cached locales. */
503 lia = archloaded;
504 while (lia != NULL)
505 {
506 int category;
507 struct locale_in_archive *dead = lia;
508 lia = lia->next;
509
dac68e4c 510 free (dead->name);
a89a3dab
RM
511 for (category = 0; category < __LC_LAST; ++category)
512 if (category != LC_ALL)
513 /* _nl_unload_locale just does this free for the archive case. */
514 free (dead->data[category]);
515 free (dead);
516 }
517 archloaded = NULL;
518
519 if (archmapped != NULL)
520 {
521 /* Now toss all the mapping windows, which we know nothing is using any
522 more because we just tossed all the locales that point into them. */
523
524 assert (archmapped == &headmap);
525 archmapped = NULL;
7a8bdff0 526 (void) __munmap (headmap.ptr, headmap.len);
a89a3dab
RM
527 am = headmap.next;
528 while (am != NULL)
529 {
530 struct archmapped *dead = am;
531 am = am->next;
7a8bdff0 532 (void) __munmap (dead->ptr, dead->len);
a89a3dab
RM
533 free (dead);
534 }
535 }
536}