]> git.ipfire.org Git - thirdparty/glibc.git/blame - locale/loadarchive.c
Use glibc_likely instead __builtin_expect.
[thirdparty/glibc.git] / locale / loadarchive.c
CommitLineData
cb09a2cd 1/* Code to load locale data from the locale archive file.
d4697bc9 2 Copyright (C) 2002-2014 Free Software Foundation, Inc.
cb09a2cd
RM
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
cb09a2cd
RM
18
19#include <locale.h>
20#include <stddef.h>
c0ad824e 21#include <stdlib.h>
cb09a2cd
RM
22#include <stdbool.h>
23#include <errno.h>
24#include <assert.h>
25#include <string.h>
26#include <fcntl.h>
27#include <unistd.h>
e054f494 28#include <stdint.h>
cb09a2cd
RM
29#include <sys/mman.h>
30#include <sys/stat.h>
31#include <sys/param.h>
32
33#include "localeinfo.h"
34#include "locarchive.h"
8dab36a1 35#include <not-cancel.h>
cb09a2cd
RM
36
37/* Define the hash function. We define the function as static inline. */
38#define compute_hashval static inline compute_hashval
a3f9038c 39#define hashval_t uint32_t
cb09a2cd
RM
40#include "hashval.h"
41#undef compute_hashval
42
cb09a2cd
RM
43
44/* Name of the locale archive file. */
7c6af012 45static const char archfname[] = LOCALEDIR "/locale-archive";
cb09a2cd 46
5bb99914
RM
47/* Size of initial mapping window, optimal if large enough to
48 cover the header plus the initial locale. */
49#define ARCHIVE_MAPPING_WINDOW (2 * 1024 * 1024)
50
4e20f1e2
RM
51#ifndef MAP_COPY
52/* This is not quite as good as MAP_COPY since unexamined pages
53 can change out from under us and give us inconsistent data.
54 But we rely on the user not to diddle the system's live archive.
55 Even though we only ever use PROT_READ, using MAP_SHARED would
56 not give the system sufficient freedom to e.g. let the on disk
57 file go away because it doesn't know we won't call mprotect later. */
58# define MAP_COPY MAP_PRIVATE
59#endif
60#ifndef MAP_FILE
61 /* Some systems do not have this flag; it is superfluous. */
62# define MAP_FILE 0
63#endif
cb09a2cd
RM
64
65/* Record of contiguous pages already mapped from the locale archive. */
66struct archmapped
67{
68 void *ptr;
69 uint32_t from;
70 uint32_t len;
71 struct archmapped *next;
72};
73static struct archmapped *archmapped;
74
75/* This describes the mapping at the beginning of the file that contains
76 the header data. There could be data in the following partial page,
77 so this is searched like any other. Once the archive has been used,
78 ARCHMAPPED points to this; if mapping the archive header failed,
79 then headmap.ptr is null. */
80static struct archmapped headmap;
81static struct stat64 archive_stat; /* stat of archive when header mapped. */
82
83/* Record of locales that we have already loaded from the archive. */
84struct locale_in_archive
85{
86 struct locale_in_archive *next;
dac68e4c 87 char *name;
f095bb72 88 struct __locale_data *data[__LC_LAST];
cb09a2cd
RM
89};
90static struct locale_in_archive *archloaded;
91
92
93/* Local structure and subroutine of _nl_load_archive, see below. */
94struct range
95{
96 uint32_t from;
97 uint32_t len;
98 int category;
99 void *result;
100};
101
102static int
103rangecmp (const void *p1, const void *p2)
104{
105 return ((struct range *) p1)->from - ((struct range *) p2)->from;
106}
107
108
109/* Calculate the amount of space needed for all the tables described
110 by the given header. Note we do not include the empty table space
111 that has been preallocated in the file, so our mapping may not be
112 large enough if localedef adds data to the file in place. However,
113 doing that would permute the header fields while we are accessing
114 them and thus not be safe anyway, so we don't allow for that. */
115static inline off_t
116calculate_head_size (const struct locarhead *h)
117{
118 off_t namehash_end = (h->namehash_offset
119 + h->namehash_size * sizeof (struct namehashent));
120 off_t string_end = h->string_offset + h->string_used;
121 off_t locrectab_end = (h->locrectab_offset
122 + h->locrectab_used * sizeof (struct locrecent));
123 return MAX (namehash_end, MAX (string_end, locrectab_end));
124}
125
126
127/* Find the locale *NAMEP in the locale archive, and return the
128 internalized data structure for its CATEGORY data. If this locale has
129 already been loaded from the archive, just returns the existing data
130 structure. If successful, sets *NAMEP to point directly into the mapped
131 archive string table; that way, the next call can short-circuit strcmp. */
f095bb72 132struct __locale_data *
cb09a2cd
RM
133internal_function
134_nl_load_locale_from_archive (int category, const char **namep)
135{
136 const char *name = *namep;
137 struct
138 {
139 void *addr;
140 size_t len;
141 } results[__LC_LAST];
142 struct locale_in_archive *lia;
143 struct locarhead *head;
144 struct namehashent *namehashtab;
145 struct locrecent *locrec;
146 struct archmapped *mapped;
147 struct archmapped *last;
148 unsigned long int hval;
149 size_t idx;
150 size_t incr;
151 struct range ranges[__LC_LAST - 1];
152 int nranges;
153 int cnt;
154 size_t ps = __sysconf (_SC_PAGE_SIZE);
155 int fd = -1;
156
157 /* Check if we have already loaded this locale from the archive.
158 If we previously loaded the locale but found bogons in the data,
159 then we will have stored a null pointer to return here. */
160 for (lia = archloaded; lia != NULL; lia = lia->next)
161 if (name == lia->name || !strcmp (name, lia->name))
162 {
163 *namep = lia->name;
164 return lia->data[category];
165 }
166
167 {
168 /* If the name contains a codeset, then we normalize the name before
169 doing the lookup. */
170 const char *p = strchr (name, '.');
171 if (p != NULL && p[1] != '@' && p[1] != '\0')
172 {
173 const char *rest = __strchrnul (++p, '@');
174 const char *normalized_codeset = _nl_normalize_codeset (p, rest - p);
175 if (normalized_codeset == NULL) /* malloc failure */
176 return NULL;
177 if (strncmp (normalized_codeset, p, rest - p) != 0
178 || normalized_codeset[rest - p] != '\0')
179 {
180 /* There is a normalized codeset name that is different from
181 what was specified; reconstruct a new locale name using it. */
182 size_t normlen = strlen (normalized_codeset);
183 size_t restlen = strlen (rest) + 1;
184 char *newname = alloca (p - name + normlen + restlen);
185 memcpy (__mempcpy (__mempcpy (newname, name, p - name),
186 normalized_codeset, normlen),
187 rest, restlen);
cb09a2cd
RM
188 name = newname;
189 }
641fc4a0 190 free ((char *) normalized_codeset);
cb09a2cd
RM
191 }
192 }
193
194 /* Make sure the archive is loaded. */
195 if (archmapped == NULL)
196 {
5bb99914
RM
197 void *result;
198 size_t headsize, mapsize;
199
cb09a2cd
RM
200 /* We do this early as a sign that we have tried to open the archive.
201 If headmap.ptr remains null, that's an indication that we tried
202 and failed, so we won't try again. */
203 archmapped = &headmap;
204
205 /* The archive has never been opened. */
d62a8200 206 fd = open_not_cancel_2 (archfname, O_RDONLY|O_LARGEFILE|O_CLOEXEC);
cb09a2cd
RM
207 if (fd < 0)
208 /* Cannot open the archive, for whatever reason. */
209 return NULL;
210
211 if (__fxstat64 (_STAT_VER, fd, &archive_stat) == -1)
212 {
213 /* stat failed, very strange. */
214 close_and_out:
dd4f2115 215 if (fd >= 0)
8dab36a1 216 close_not_cancel_no_status (fd);
cb09a2cd
RM
217 return NULL;
218 }
219
cb09a2cd 220
5bb99914
RM
221 /* Map an initial window probably large enough to cover the header
222 and the first locale's data. With a large address space, we can
223 just map the whole file and be sure everything is covered. */
cb09a2cd 224
5bb99914 225 mapsize = (sizeof (void *) > 4 ? archive_stat.st_size
c88b4759 226 : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW));
5bb99914 227
4e20f1e2 228 result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0);
5bb99914
RM
229 if (result == MAP_FAILED)
230 goto close_and_out;
231
232 /* Check whether the file is large enough for the sizes given in
233 the header. Theoretically an archive could be so large that
234 just the header fails to fit in our initial mapping window. */
235 headsize = calculate_head_size ((const struct locarhead *) result);
236 if (headsize > mapsize)
237 {
238 (void) __munmap (result, mapsize);
239 if (sizeof (void *) > 4 || headsize > archive_stat.st_size)
240 /* The file is not big enough for the header. Bogus. */
cb09a2cd 241 goto close_and_out;
5bb99914
RM
242
243 /* Freakishly long header. */
244 /* XXX could use mremap when available */
245 mapsize = (headsize + ps - 1) & ~(ps - 1);
4e20f1e2
RM
246 result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY,
247 fd, 0);
cb09a2cd
RM
248 if (result == MAP_FAILED)
249 goto close_and_out;
5bb99914 250 }
cb09a2cd 251
5bb99914
RM
252 if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size)
253 {
254 /* We've mapped the whole file already, so we can be
255 sure we won't need this file descriptor later. */
8dab36a1 256 close_not_cancel_no_status (fd);
5bb99914 257 fd = -1;
cb09a2cd 258 }
5bb99914
RM
259
260 headmap.ptr = result;
261 /* headmap.from already initialized to zero. */
262 headmap.len = mapsize;
cb09a2cd
RM
263 }
264
265 /* If there is no archive or it cannot be loaded for some reason fail. */
a1ffb40e 266 if (__glibc_unlikely (headmap.ptr == NULL))
dd4f2115 267 goto close_and_out;
cb09a2cd
RM
268
269 /* We have the archive available. To find the name we first have to
270 determine its hash value. */
271 hval = compute_hashval (name, strlen (name));
272
273 head = headmap.ptr;
274 namehashtab = (struct namehashent *) ((char *) head
275 + head->namehash_offset);
276
18d14251
AJ
277 /* Avoid division by 0 if the file is corrupted. */
278 if (__glibc_unlikely (head->namehash_size == 0))
279 goto close_and_out;
280
cb09a2cd
RM
281 idx = hval % head->namehash_size;
282 incr = 1 + hval % (head->namehash_size - 2);
283
284 /* If the name_offset field is zero this means this is a
285 deleted entry and therefore no entry can be found. */
286 while (1)
287 {
288 if (namehashtab[idx].name_offset == 0)
289 /* Not found. */
dd4f2115 290 goto close_and_out;
cb09a2cd
RM
291
292 if (namehashtab[idx].hashval == hval
293 && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0)
294 /* Found the entry. */
295 break;
296
297 idx += incr;
298 if (idx >= head->namehash_size)
299 idx -= head->namehash_size;
300 }
301
302 /* We found an entry. It might be a placeholder for a removed one. */
303 if (namehashtab[idx].locrec_offset == 0)
dd4f2115 304 goto close_and_out;
cb09a2cd
RM
305
306 locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset);
307
308 if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */)
309 {
310 /* We already have the whole locale archive mapped in. */
311 assert (headmap.len == archive_stat.st_size);
312 for (cnt = 0; cnt < __LC_LAST; ++cnt)
313 if (cnt != LC_ALL)
314 {
315 if (locrec->record[cnt].offset + locrec->record[cnt].len
316 > headmap.len)
317 /* The archive locrectab contains bogus offsets. */
dd4f2115 318 goto close_and_out;
cb09a2cd
RM
319 results[cnt].addr = headmap.ptr + locrec->record[cnt].offset;
320 results[cnt].len = locrec->record[cnt].len;
321 }
322 }
323 else
324 {
325 /* Get the offsets of the data files and sort them. */
326 for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt)
327 if (cnt != LC_ALL)
328 {
329 ranges[nranges].from = locrec->record[cnt].offset;
330 ranges[nranges].len = locrec->record[cnt].len;
331 ranges[nranges].category = cnt;
332 ranges[nranges].result = NULL;
333
334 ++nranges;
335 }
336
337 qsort (ranges, nranges, sizeof (ranges[0]), rangecmp);
338
339 /* The information about mmap'd blocks is kept in a list.
340 Skip over the blocks which are before the data we need. */
341 last = mapped = archmapped;
342 for (cnt = 0; cnt < nranges; ++cnt)
343 {
344 int upper;
345 size_t from;
346 size_t to;
347 void *addr;
348 struct archmapped *newp;
349
350 /* Determine whether the appropriate page is already mapped. */
351 while (mapped != NULL
b5560a44
RM
352 && (mapped->from + mapped->len
353 <= ranges[cnt].from + ranges[cnt].len))
cb09a2cd
RM
354 {
355 last = mapped;
356 mapped = mapped->next;
357 }
358
359 /* Do we have a match? */
360 if (mapped != NULL
361 && mapped->from <= ranges[cnt].from
b5560a44
RM
362 && (ranges[cnt].from + ranges[cnt].len
363 <= mapped->from + mapped->len))
cb09a2cd
RM
364 {
365 /* Yep, already loaded. */
366 results[ranges[cnt].category].addr = ((char *) mapped->ptr
367 + ranges[cnt].from
368 - mapped->from);
369 results[ranges[cnt].category].len = ranges[cnt].len;
370 continue;
371 }
372
373 /* Map the range with the locale data from the file. We will
374 try to cover as much of the locale as possible. I.e., if the
375 next category (next as in "next offset") is on the current or
376 immediately following page we use it as well. */
377 assert (powerof2 (ps));
378 from = ranges[cnt].from & ~(ps - 1);
379 upper = cnt;
380 do
381 {
4e20f1e2 382 to = ranges[upper].from + ranges[upper].len;
6dd67bd5 383 if (to > (size_t) archive_stat.st_size)
4e20f1e2 384 /* The archive locrectab contains bogus offsets. */
dd4f2115 385 goto close_and_out;
4e20f1e2
RM
386 to = (to + ps - 1) & ~(ps - 1);
387
b5560a44
RM
388 /* If a range is already mmaped in, stop. */
389 if (mapped != NULL && ranges[upper].from >= mapped->from)
390 break;
4e20f1e2 391
cb09a2cd
RM
392 ++upper;
393 }
394 /* Loop while still in contiguous pages. */
395 while (upper < nranges && ranges[upper].from < to + ps);
396
cb09a2cd
RM
397 /* Open the file if it hasn't happened yet. */
398 if (fd == -1)
399 {
400 struct stat64 st;
d62a8200
UD
401 fd = open_not_cancel_2 (archfname,
402 O_RDONLY|O_LARGEFILE|O_CLOEXEC);
cb09a2cd
RM
403 if (fd == -1)
404 /* Cannot open the archive, for whatever reason. */
405 return NULL;
406 /* Now verify we think this is really the same archive file
407 we opened before. If it has been changed we cannot trust
408 the header we read previously. */
409 if (__fxstat64 (_STAT_VER, fd, &st) < 0
410 || st.st_size != archive_stat.st_size
411 || st.st_mtime != archive_stat.st_mtime
412 || st.st_dev != archive_stat.st_dev
413 || st.st_ino != archive_stat.st_ino)
dd4f2115 414 goto close_and_out;
cb09a2cd
RM
415 }
416
417 /* Map the range from the archive. */
4e20f1e2
RM
418 addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY,
419 fd, from);
cb09a2cd 420 if (addr == MAP_FAILED)
dd4f2115 421 goto close_and_out;
cb09a2cd
RM
422
423 /* Allocate a record for this mapping. */
424 newp = (struct archmapped *) malloc (sizeof (struct archmapped));
425 if (newp == NULL)
426 {
7a8bdff0 427 (void) __munmap (addr, to - from);
dd4f2115 428 goto close_and_out;
cb09a2cd
RM
429 }
430
431 /* And queue it. */
432 newp->ptr = addr;
433 newp->from = from;
434 newp->len = to - from;
435 assert (last->next == mapped);
436 newp->next = mapped;
437 last->next = newp;
438 last = newp;
439
440 /* Determine the load addresses for the category data. */
441 do
442 {
443 assert (ranges[cnt].from >= from);
444 results[ranges[cnt].category].addr = ((char *) addr
445 + ranges[cnt].from - from);
446 results[ranges[cnt].category].len = ranges[cnt].len;
447 }
448 while (++cnt < upper);
449 --cnt; /* The 'for' will increase 'cnt' again. */
450 }
451 }
452
dd4f2115
UD
453 /* We don't need the file descriptor any longer. */
454 if (fd >= 0)
8dab36a1 455 close_not_cancel_no_status (fd);
dd4f2115
UD
456 fd = -1;
457
cb09a2cd
RM
458 /* We succeeded in mapping all the necessary regions of the archive.
459 Now we need the expected data structures to point into the data. */
460
461 lia = malloc (sizeof *lia);
a1ffb40e 462 if (__glibc_unlikely (lia == NULL))
cb09a2cd
RM
463 return NULL;
464
dac68e4c 465 lia->name = strdup (*namep);
a1ffb40e 466 if (__glibc_unlikely (lia->name == NULL))
dac68e4c
RM
467 {
468 free (lia);
469 return NULL;
470 }
471
cb09a2cd
RM
472 lia->next = archloaded;
473 archloaded = lia;
474
475 for (cnt = 0; cnt < __LC_LAST; ++cnt)
476 if (cnt != LC_ALL)
477 {
478 lia->data[cnt] = _nl_intern_locale_data (cnt,
479 results[cnt].addr,
480 results[cnt].len);
a1ffb40e 481 if (__glibc_likely (lia->data[cnt] != NULL))
cb09a2cd
RM
482 {
483 /* _nl_intern_locale_data leaves us these fields to initialize. */
484 lia->data[cnt]->alloc = ld_archive;
485 lia->data[cnt]->name = lia->name;
0f283ffc
RM
486
487 /* We do this instead of bumping the count each time we return
488 this data because the mappings stay around forever anyway
489 and we might as well hold on to a little more memory and not
490 have to rebuild it on the next lookup of the same thing.
491 If we were to maintain the usage_count normally and let the
492 structures be freed, we would have to remove the elements
493 from archloaded too. */
494 lia->data[cnt]->usage_count = UNDELETABLE;
cb09a2cd
RM
495 }
496 }
497
498 *namep = lia->name;
499 return lia->data[category];
500}
a89a3dab 501
c877418f 502void __libc_freeres_fn_section
a89a3dab
RM
503_nl_archive_subfreeres (void)
504{
505 struct locale_in_archive *lia;
506 struct archmapped *am;
507
508 /* Toss out our cached locales. */
509 lia = archloaded;
510 while (lia != NULL)
511 {
512 int category;
513 struct locale_in_archive *dead = lia;
514 lia = lia->next;
515
dac68e4c 516 free (dead->name);
a89a3dab
RM
517 for (category = 0; category < __LC_LAST; ++category)
518 if (category != LC_ALL)
f6e50e66
UD
519 {
520 /* _nl_unload_locale just does this free for the archive case. */
521 if (dead->data[category]->private.cleanup)
522 (*dead->data[category]->private.cleanup) (dead->data[category]);
523
524 free (dead->data[category]);
525 }
a89a3dab
RM
526 free (dead);
527 }
528 archloaded = NULL;
529
530 if (archmapped != NULL)
531 {
532 /* Now toss all the mapping windows, which we know nothing is using any
533 more because we just tossed all the locales that point into them. */
534
535 assert (archmapped == &headmap);
536 archmapped = NULL;
7a8bdff0 537 (void) __munmap (headmap.ptr, headmap.len);
a89a3dab
RM
538 am = headmap.next;
539 while (am != NULL)
540 {
541 struct archmapped *dead = am;
542 am = am->next;
7a8bdff0 543 (void) __munmap (dead->ptr, dead->len);
a89a3dab
RM
544 free (dead);
545 }
546 }
547}