]>
Commit | Line | Data |
---|---|---|
cb09a2cd RM |
1 | /* Code to load locale data from the locale archive file. |
2 | Copyright (C) 2002 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, write to the Free | |
17 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
18 | 02111-1307 USA. */ | |
19 | ||
20 | #include <locale.h> | |
21 | #include <stddef.h> | |
c0ad824e | 22 | #include <stdlib.h> |
cb09a2cd RM |
23 | #include <stdbool.h> |
24 | #include <errno.h> | |
25 | #include <assert.h> | |
26 | #include <string.h> | |
27 | #include <fcntl.h> | |
28 | #include <unistd.h> | |
29 | #include <sys/mman.h> | |
30 | #include <sys/stat.h> | |
31 | #include <sys/param.h> | |
32 | ||
33 | #include "localeinfo.h" | |
34 | #include "locarchive.h" | |
35 | ||
36 | /* Define the hash function. We define the function as static inline. */ | |
37 | #define compute_hashval static inline compute_hashval | |
a3f9038c | 38 | #define hashval_t uint32_t |
cb09a2cd RM |
39 | #include "hashval.h" |
40 | #undef compute_hashval | |
41 | ||
cb09a2cd RM |
42 | |
43 | /* Name of the locale archive file. */ | |
7c6af012 | 44 | static const char archfname[] = LOCALEDIR "/locale-archive"; |
cb09a2cd | 45 | |
5bb99914 RM |
46 | /* Size of initial mapping window, optimal if large enough to |
47 | cover the header plus the initial locale. */ | |
48 | #define ARCHIVE_MAPPING_WINDOW (2 * 1024 * 1024) | |
49 | ||
4e20f1e2 RM |
50 | #ifndef MAP_COPY |
51 | /* This is not quite as good as MAP_COPY since unexamined pages | |
52 | can change out from under us and give us inconsistent data. | |
53 | But we rely on the user not to diddle the system's live archive. | |
54 | Even though we only ever use PROT_READ, using MAP_SHARED would | |
55 | not give the system sufficient freedom to e.g. let the on disk | |
56 | file go away because it doesn't know we won't call mprotect later. */ | |
57 | # define MAP_COPY MAP_PRIVATE | |
58 | #endif | |
59 | #ifndef MAP_FILE | |
60 | /* Some systems do not have this flag; it is superfluous. */ | |
61 | # define MAP_FILE 0 | |
62 | #endif | |
cb09a2cd RM |
63 | |
64 | /* Record of contiguous pages already mapped from the locale archive. */ | |
65 | struct archmapped | |
66 | { | |
67 | void *ptr; | |
68 | uint32_t from; | |
69 | uint32_t len; | |
70 | struct archmapped *next; | |
71 | }; | |
72 | static struct archmapped *archmapped; | |
73 | ||
74 | /* This describes the mapping at the beginning of the file that contains | |
75 | the header data. There could be data in the following partial page, | |
76 | so this is searched like any other. Once the archive has been used, | |
77 | ARCHMAPPED points to this; if mapping the archive header failed, | |
78 | then headmap.ptr is null. */ | |
79 | static struct archmapped headmap; | |
80 | static struct stat64 archive_stat; /* stat of archive when header mapped. */ | |
81 | ||
82 | /* Record of locales that we have already loaded from the archive. */ | |
83 | struct locale_in_archive | |
84 | { | |
85 | struct locale_in_archive *next; | |
dac68e4c | 86 | char *name; |
cb09a2cd RM |
87 | struct locale_data *data[__LC_LAST]; |
88 | }; | |
89 | static struct locale_in_archive *archloaded; | |
90 | ||
91 | ||
92 | /* Local structure and subroutine of _nl_load_archive, see below. */ | |
93 | struct range | |
94 | { | |
95 | uint32_t from; | |
96 | uint32_t len; | |
97 | int category; | |
98 | void *result; | |
99 | }; | |
100 | ||
101 | static int | |
102 | rangecmp (const void *p1, const void *p2) | |
103 | { | |
104 | return ((struct range *) p1)->from - ((struct range *) p2)->from; | |
105 | } | |
106 | ||
107 | ||
108 | /* Calculate the amount of space needed for all the tables described | |
109 | by the given header. Note we do not include the empty table space | |
110 | that has been preallocated in the file, so our mapping may not be | |
111 | large enough if localedef adds data to the file in place. However, | |
112 | doing that would permute the header fields while we are accessing | |
113 | them and thus not be safe anyway, so we don't allow for that. */ | |
114 | static inline off_t | |
115 | calculate_head_size (const struct locarhead *h) | |
116 | { | |
117 | off_t namehash_end = (h->namehash_offset | |
118 | + h->namehash_size * sizeof (struct namehashent)); | |
119 | off_t string_end = h->string_offset + h->string_used; | |
120 | off_t locrectab_end = (h->locrectab_offset | |
121 | + h->locrectab_used * sizeof (struct locrecent)); | |
122 | return MAX (namehash_end, MAX (string_end, locrectab_end)); | |
123 | } | |
124 | ||
125 | ||
126 | /* Find the locale *NAMEP in the locale archive, and return the | |
127 | internalized data structure for its CATEGORY data. If this locale has | |
128 | already been loaded from the archive, just returns the existing data | |
129 | structure. If successful, sets *NAMEP to point directly into the mapped | |
130 | archive string table; that way, the next call can short-circuit strcmp. */ | |
131 | struct locale_data * | |
132 | internal_function | |
133 | _nl_load_locale_from_archive (int category, const char **namep) | |
134 | { | |
135 | const char *name = *namep; | |
136 | struct | |
137 | { | |
138 | void *addr; | |
139 | size_t len; | |
140 | } results[__LC_LAST]; | |
141 | struct locale_in_archive *lia; | |
142 | struct locarhead *head; | |
143 | struct namehashent *namehashtab; | |
144 | struct locrecent *locrec; | |
145 | struct archmapped *mapped; | |
146 | struct archmapped *last; | |
147 | unsigned long int hval; | |
148 | size_t idx; | |
149 | size_t incr; | |
150 | struct range ranges[__LC_LAST - 1]; | |
151 | int nranges; | |
152 | int cnt; | |
153 | size_t ps = __sysconf (_SC_PAGE_SIZE); | |
154 | int fd = -1; | |
155 | ||
156 | /* Check if we have already loaded this locale from the archive. | |
157 | If we previously loaded the locale but found bogons in the data, | |
158 | then we will have stored a null pointer to return here. */ | |
159 | for (lia = archloaded; lia != NULL; lia = lia->next) | |
160 | if (name == lia->name || !strcmp (name, lia->name)) | |
161 | { | |
162 | *namep = lia->name; | |
163 | return lia->data[category]; | |
164 | } | |
165 | ||
166 | { | |
167 | /* If the name contains a codeset, then we normalize the name before | |
168 | doing the lookup. */ | |
169 | const char *p = strchr (name, '.'); | |
170 | if (p != NULL && p[1] != '@' && p[1] != '\0') | |
171 | { | |
172 | const char *rest = __strchrnul (++p, '@'); | |
173 | const char *normalized_codeset = _nl_normalize_codeset (p, rest - p); | |
174 | if (normalized_codeset == NULL) /* malloc failure */ | |
175 | return NULL; | |
176 | if (strncmp (normalized_codeset, p, rest - p) != 0 | |
177 | || normalized_codeset[rest - p] != '\0') | |
178 | { | |
179 | /* There is a normalized codeset name that is different from | |
180 | what was specified; reconstruct a new locale name using it. */ | |
181 | size_t normlen = strlen (normalized_codeset); | |
182 | size_t restlen = strlen (rest) + 1; | |
183 | char *newname = alloca (p - name + normlen + restlen); | |
184 | memcpy (__mempcpy (__mempcpy (newname, name, p - name), | |
185 | normalized_codeset, normlen), | |
186 | rest, restlen); | |
187 | free ((char *) normalized_codeset); | |
188 | name = newname; | |
189 | } | |
190 | } | |
191 | } | |
192 | ||
193 | /* Make sure the archive is loaded. */ | |
194 | if (archmapped == NULL) | |
195 | { | |
5bb99914 RM |
196 | void *result; |
197 | size_t headsize, mapsize; | |
198 | ||
cb09a2cd RM |
199 | /* We do this early as a sign that we have tried to open the archive. |
200 | If headmap.ptr remains null, that's an indication that we tried | |
201 | and failed, so we won't try again. */ | |
202 | archmapped = &headmap; | |
203 | ||
204 | /* The archive has never been opened. */ | |
205 | fd = __open64 (archfname, O_RDONLY); | |
206 | if (fd < 0) | |
207 | /* Cannot open the archive, for whatever reason. */ | |
208 | return NULL; | |
209 | ||
210 | if (__fxstat64 (_STAT_VER, fd, &archive_stat) == -1) | |
211 | { | |
212 | /* stat failed, very strange. */ | |
213 | close_and_out: | |
dd4f2115 UD |
214 | if (fd >= 0) |
215 | __close (fd); | |
cb09a2cd RM |
216 | return NULL; |
217 | } | |
218 | ||
cb09a2cd | 219 | |
5bb99914 RM |
220 | /* Map an initial window probably large enough to cover the header |
221 | and the first locale's data. With a large address space, we can | |
222 | just map the whole file and be sure everything is covered. */ | |
cb09a2cd | 223 | |
5bb99914 | 224 | mapsize = (sizeof (void *) > 4 ? archive_stat.st_size |
c88b4759 | 225 | : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW)); |
5bb99914 | 226 | |
4e20f1e2 | 227 | result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0); |
5bb99914 RM |
228 | if (result == MAP_FAILED) |
229 | goto close_and_out; | |
230 | ||
231 | /* Check whether the file is large enough for the sizes given in | |
232 | the header. Theoretically an archive could be so large that | |
233 | just the header fails to fit in our initial mapping window. */ | |
234 | headsize = calculate_head_size ((const struct locarhead *) result); | |
235 | if (headsize > mapsize) | |
236 | { | |
237 | (void) __munmap (result, mapsize); | |
238 | if (sizeof (void *) > 4 || headsize > archive_stat.st_size) | |
239 | /* The file is not big enough for the header. Bogus. */ | |
cb09a2cd | 240 | goto close_and_out; |
5bb99914 RM |
241 | |
242 | /* Freakishly long header. */ | |
243 | /* XXX could use mremap when available */ | |
244 | mapsize = (headsize + ps - 1) & ~(ps - 1); | |
4e20f1e2 RM |
245 | result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, |
246 | fd, 0); | |
cb09a2cd RM |
247 | if (result == MAP_FAILED) |
248 | goto close_and_out; | |
5bb99914 | 249 | } |
cb09a2cd | 250 | |
5bb99914 RM |
251 | if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size) |
252 | { | |
253 | /* We've mapped the whole file already, so we can be | |
254 | sure we won't need this file descriptor later. */ | |
255 | __close (fd); | |
256 | fd = -1; | |
cb09a2cd | 257 | } |
5bb99914 RM |
258 | |
259 | headmap.ptr = result; | |
260 | /* headmap.from already initialized to zero. */ | |
261 | headmap.len = mapsize; | |
cb09a2cd RM |
262 | } |
263 | ||
264 | /* If there is no archive or it cannot be loaded for some reason fail. */ | |
265 | if (__builtin_expect (headmap.ptr == NULL, 0)) | |
dd4f2115 | 266 | goto close_and_out; |
cb09a2cd RM |
267 | |
268 | /* We have the archive available. To find the name we first have to | |
269 | determine its hash value. */ | |
270 | hval = compute_hashval (name, strlen (name)); | |
271 | ||
272 | head = headmap.ptr; | |
273 | namehashtab = (struct namehashent *) ((char *) head | |
274 | + head->namehash_offset); | |
275 | ||
276 | idx = hval % head->namehash_size; | |
277 | incr = 1 + hval % (head->namehash_size - 2); | |
278 | ||
279 | /* If the name_offset field is zero this means this is a | |
280 | deleted entry and therefore no entry can be found. */ | |
281 | while (1) | |
282 | { | |
283 | if (namehashtab[idx].name_offset == 0) | |
284 | /* Not found. */ | |
dd4f2115 | 285 | goto close_and_out; |
cb09a2cd RM |
286 | |
287 | if (namehashtab[idx].hashval == hval | |
288 | && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0) | |
289 | /* Found the entry. */ | |
290 | break; | |
291 | ||
292 | idx += incr; | |
293 | if (idx >= head->namehash_size) | |
294 | idx -= head->namehash_size; | |
295 | } | |
296 | ||
297 | /* We found an entry. It might be a placeholder for a removed one. */ | |
298 | if (namehashtab[idx].locrec_offset == 0) | |
dd4f2115 | 299 | goto close_and_out; |
cb09a2cd RM |
300 | |
301 | locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset); | |
302 | ||
303 | if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */) | |
304 | { | |
305 | /* We already have the whole locale archive mapped in. */ | |
306 | assert (headmap.len == archive_stat.st_size); | |
307 | for (cnt = 0; cnt < __LC_LAST; ++cnt) | |
308 | if (cnt != LC_ALL) | |
309 | { | |
310 | if (locrec->record[cnt].offset + locrec->record[cnt].len | |
311 | > headmap.len) | |
312 | /* The archive locrectab contains bogus offsets. */ | |
dd4f2115 | 313 | goto close_and_out; |
cb09a2cd RM |
314 | results[cnt].addr = headmap.ptr + locrec->record[cnt].offset; |
315 | results[cnt].len = locrec->record[cnt].len; | |
316 | } | |
317 | } | |
318 | else | |
319 | { | |
320 | /* Get the offsets of the data files and sort them. */ | |
321 | for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt) | |
322 | if (cnt != LC_ALL) | |
323 | { | |
324 | ranges[nranges].from = locrec->record[cnt].offset; | |
325 | ranges[nranges].len = locrec->record[cnt].len; | |
326 | ranges[nranges].category = cnt; | |
327 | ranges[nranges].result = NULL; | |
328 | ||
329 | ++nranges; | |
330 | } | |
331 | ||
332 | qsort (ranges, nranges, sizeof (ranges[0]), rangecmp); | |
333 | ||
334 | /* The information about mmap'd blocks is kept in a list. | |
335 | Skip over the blocks which are before the data we need. */ | |
336 | last = mapped = archmapped; | |
337 | for (cnt = 0; cnt < nranges; ++cnt) | |
338 | { | |
339 | int upper; | |
340 | size_t from; | |
341 | size_t to; | |
342 | void *addr; | |
343 | struct archmapped *newp; | |
344 | ||
345 | /* Determine whether the appropriate page is already mapped. */ | |
346 | while (mapped != NULL | |
b5560a44 RM |
347 | && (mapped->from + mapped->len |
348 | <= ranges[cnt].from + ranges[cnt].len)) | |
cb09a2cd RM |
349 | { |
350 | last = mapped; | |
351 | mapped = mapped->next; | |
352 | } | |
353 | ||
354 | /* Do we have a match? */ | |
355 | if (mapped != NULL | |
356 | && mapped->from <= ranges[cnt].from | |
b5560a44 RM |
357 | && (ranges[cnt].from + ranges[cnt].len |
358 | <= mapped->from + mapped->len)) | |
cb09a2cd RM |
359 | { |
360 | /* Yep, already loaded. */ | |
361 | results[ranges[cnt].category].addr = ((char *) mapped->ptr | |
362 | + ranges[cnt].from | |
363 | - mapped->from); | |
364 | results[ranges[cnt].category].len = ranges[cnt].len; | |
365 | continue; | |
366 | } | |
367 | ||
368 | /* Map the range with the locale data from the file. We will | |
369 | try to cover as much of the locale as possible. I.e., if the | |
370 | next category (next as in "next offset") is on the current or | |
371 | immediately following page we use it as well. */ | |
372 | assert (powerof2 (ps)); | |
373 | from = ranges[cnt].from & ~(ps - 1); | |
374 | upper = cnt; | |
375 | do | |
376 | { | |
4e20f1e2 | 377 | to = ranges[upper].from + ranges[upper].len; |
6dd67bd5 | 378 | if (to > (size_t) archive_stat.st_size) |
4e20f1e2 | 379 | /* The archive locrectab contains bogus offsets. */ |
dd4f2115 | 380 | goto close_and_out; |
4e20f1e2 RM |
381 | to = (to + ps - 1) & ~(ps - 1); |
382 | ||
b5560a44 RM |
383 | /* If a range is already mmaped in, stop. */ |
384 | if (mapped != NULL && ranges[upper].from >= mapped->from) | |
385 | break; | |
4e20f1e2 | 386 | |
cb09a2cd RM |
387 | ++upper; |
388 | } | |
389 | /* Loop while still in contiguous pages. */ | |
390 | while (upper < nranges && ranges[upper].from < to + ps); | |
391 | ||
cb09a2cd RM |
392 | /* Open the file if it hasn't happened yet. */ |
393 | if (fd == -1) | |
394 | { | |
395 | struct stat64 st; | |
396 | fd = __open64 (archfname, O_RDONLY); | |
397 | if (fd == -1) | |
398 | /* Cannot open the archive, for whatever reason. */ | |
399 | return NULL; | |
400 | /* Now verify we think this is really the same archive file | |
401 | we opened before. If it has been changed we cannot trust | |
402 | the header we read previously. */ | |
403 | if (__fxstat64 (_STAT_VER, fd, &st) < 0 | |
404 | || st.st_size != archive_stat.st_size | |
405 | || st.st_mtime != archive_stat.st_mtime | |
406 | || st.st_dev != archive_stat.st_dev | |
407 | || st.st_ino != archive_stat.st_ino) | |
dd4f2115 | 408 | goto close_and_out; |
cb09a2cd RM |
409 | } |
410 | ||
411 | /* Map the range from the archive. */ | |
4e20f1e2 RM |
412 | addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY, |
413 | fd, from); | |
cb09a2cd | 414 | if (addr == MAP_FAILED) |
dd4f2115 | 415 | goto close_and_out; |
cb09a2cd RM |
416 | |
417 | /* Allocate a record for this mapping. */ | |
418 | newp = (struct archmapped *) malloc (sizeof (struct archmapped)); | |
419 | if (newp == NULL) | |
420 | { | |
7a8bdff0 | 421 | (void) __munmap (addr, to - from); |
dd4f2115 | 422 | goto close_and_out; |
cb09a2cd RM |
423 | } |
424 | ||
425 | /* And queue it. */ | |
426 | newp->ptr = addr; | |
427 | newp->from = from; | |
428 | newp->len = to - from; | |
429 | assert (last->next == mapped); | |
430 | newp->next = mapped; | |
431 | last->next = newp; | |
432 | last = newp; | |
433 | ||
434 | /* Determine the load addresses for the category data. */ | |
435 | do | |
436 | { | |
437 | assert (ranges[cnt].from >= from); | |
438 | results[ranges[cnt].category].addr = ((char *) addr | |
439 | + ranges[cnt].from - from); | |
440 | results[ranges[cnt].category].len = ranges[cnt].len; | |
441 | } | |
442 | while (++cnt < upper); | |
443 | --cnt; /* The 'for' will increase 'cnt' again. */ | |
444 | } | |
445 | } | |
446 | ||
dd4f2115 UD |
447 | /* We don't need the file descriptor any longer. */ |
448 | if (fd >= 0) | |
449 | __close (fd); | |
450 | fd = -1; | |
451 | ||
cb09a2cd RM |
452 | /* We succeeded in mapping all the necessary regions of the archive. |
453 | Now we need the expected data structures to point into the data. */ | |
454 | ||
455 | lia = malloc (sizeof *lia); | |
456 | if (__builtin_expect (lia == NULL, 0)) | |
457 | return NULL; | |
458 | ||
dac68e4c RM |
459 | lia->name = strdup (*namep); |
460 | if (__builtin_expect (lia->name == NULL, 0)) | |
461 | { | |
462 | free (lia); | |
463 | return NULL; | |
464 | } | |
465 | ||
cb09a2cd RM |
466 | lia->next = archloaded; |
467 | archloaded = lia; | |
468 | ||
469 | for (cnt = 0; cnt < __LC_LAST; ++cnt) | |
470 | if (cnt != LC_ALL) | |
471 | { | |
472 | lia->data[cnt] = _nl_intern_locale_data (cnt, | |
473 | results[cnt].addr, | |
474 | results[cnt].len); | |
475 | if (__builtin_expect (lia->data[cnt] != NULL, 1)) | |
476 | { | |
477 | /* _nl_intern_locale_data leaves us these fields to initialize. */ | |
478 | lia->data[cnt]->alloc = ld_archive; | |
479 | lia->data[cnt]->name = lia->name; | |
0f283ffc RM |
480 | |
481 | /* We do this instead of bumping the count each time we return | |
482 | this data because the mappings stay around forever anyway | |
483 | and we might as well hold on to a little more memory and not | |
484 | have to rebuild it on the next lookup of the same thing. | |
485 | If we were to maintain the usage_count normally and let the | |
486 | structures be freed, we would have to remove the elements | |
487 | from archloaded too. */ | |
488 | lia->data[cnt]->usage_count = UNDELETABLE; | |
cb09a2cd RM |
489 | } |
490 | } | |
491 | ||
492 | *namep = lia->name; | |
493 | return lia->data[category]; | |
494 | } | |
a89a3dab RM |
495 | |
496 | void | |
497 | _nl_archive_subfreeres (void) | |
498 | { | |
499 | struct locale_in_archive *lia; | |
500 | struct archmapped *am; | |
501 | ||
502 | /* Toss out our cached locales. */ | |
503 | lia = archloaded; | |
504 | while (lia != NULL) | |
505 | { | |
506 | int category; | |
507 | struct locale_in_archive *dead = lia; | |
508 | lia = lia->next; | |
509 | ||
dac68e4c | 510 | free (dead->name); |
a89a3dab RM |
511 | for (category = 0; category < __LC_LAST; ++category) |
512 | if (category != LC_ALL) | |
513 | /* _nl_unload_locale just does this free for the archive case. */ | |
514 | free (dead->data[category]); | |
515 | free (dead); | |
516 | } | |
517 | archloaded = NULL; | |
518 | ||
519 | if (archmapped != NULL) | |
520 | { | |
521 | /* Now toss all the mapping windows, which we know nothing is using any | |
522 | more because we just tossed all the locales that point into them. */ | |
523 | ||
524 | assert (archmapped == &headmap); | |
525 | archmapped = NULL; | |
7a8bdff0 | 526 | (void) __munmap (headmap.ptr, headmap.len); |
a89a3dab RM |
527 | am = headmap.next; |
528 | while (am != NULL) | |
529 | { | |
530 | struct archmapped *dead = am; | |
531 | am = am->next; | |
7a8bdff0 | 532 | (void) __munmap (dead->ptr, dead->len); |
a89a3dab RM |
533 | free (dead); |
534 | } | |
535 | } | |
536 | } |