From: Paul Pluzhnikov Date: Wed, 5 Mar 2014 03:07:05 +0000 (-0800) Subject: Add "fastload" support. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=590786950c039260b75db235e81f2ae1633a055e;p=thirdparty%2Fglibc.git Add "fastload" support. --- diff --git a/elf/dl-close.c b/elf/dl-close.c index ecd6729704e..cbf336609b9 100644 --- a/elf/dl-close.c +++ b/elf/dl-close.c @@ -135,6 +135,12 @@ _dl_close_worker (struct link_map *map, bool force) Lmid_t nsid = map->l_ns; struct link_namespaces *ns = &GL(dl_ns)[nsid]; + /* Recompute _ns_last next time we need it. + It is tempting to set _ns_last to map->l_prev, but map->l_prev might also + be going away as part of current dlclose, and using it may cause _ns_last + to become dangling. */ + ns->_ns_last = NULL; + retry: dl_close_state = pending; diff --git a/elf/dl-load.c b/elf/dl-load.c index 1b08f294e89..6f08e8ad131 100644 --- a/elf/dl-load.c +++ b/elf/dl-load.c @@ -1900,6 +1900,37 @@ open_path (const char *name, size_t namelen, off_t offset, int mode, return -1; } +static int +match_one (const char *name, struct link_map *l) +{ + /* If the requested name matches the soname of a loaded object, + use that object. Elide this check for names that have not + yet been opened. */ + if (__builtin_expect (l->l_faked, 0) != 0 + || __builtin_expect (l->l_removed, 0) != 0) + return 0; + if (!_dl_name_match_p (name, l)) + { + const char *soname; + + if (__builtin_expect (l->l_soname_added, 1) + || l->l_info[DT_SONAME] == NULL) + return 0; + + soname = ((const char *) D_PTR (l, l_info[DT_STRTAB]) + + l->l_info[DT_SONAME]->d_un.d_val); + if (strcmp (name, soname) != 0) + return 0; + + /* We have a match on a new name -- cache it. */ + add_name_to_object (l, soname); + l->l_soname_added = 1; + } + + /* We have a match. */ + return 1; +} + /* Map in the shared object file NAME. */ struct link_map * @@ -1917,33 +1948,21 @@ _dl_map_object (struct link_map *loader, const char *name, off_t offset, assert (nsid < GL(dl_nns)); /* Look for this name among those already loaded. */ - for (l = GL(dl_ns)[nsid]._ns_loaded; l; l = l->l_next) + if (!GLRO(dl_enable_fastload) || name[0] == '\0') { - /* If the requested name matches the soname of a loaded object, - use that object. Elide this check for names that have not - yet been opened. */ - if (__glibc_unlikely ((l->l_faked | l->l_removed) != 0)) - continue; - if (!_dl_name_match_p (name, l)) - { - const char *soname; - - if (__glibc_likely (l->l_soname_added) - || l->l_info[DT_SONAME] == NULL) - continue; - - soname = ((const char *) D_PTR (l, l_info[DT_STRTAB]) - + l->l_info[DT_SONAME]->d_un.d_val); - if (strcmp (name, soname) != 0) - continue; - - /* We have a match on a new name -- cache it. */ - add_name_to_object (l, soname); - l->l_soname_added = 1; - } + /* Special case: both main exe and vdso can have empty name; + so search from head: it is important to return the map for main + a.out; else dlsym(0, ...) will fail unexpectedly. */ - /* We have a match. */ - return l; + for (l = GL(dl_ns)[nsid]._ns_loaded; l; l = l->l_next) + if (match_one (name, l)) + return l; + } + else + { + for (l = _dl_last_entry (&GL(dl_ns)[nsid]); l; l = l->l_prev) + if (match_one (name, l)) + return l; } /* Display information if we are debugging. */ diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c index 41dced0e19c..297ff9dec5b 100644 --- a/elf/dl-lookup.c +++ b/elf/dl-lookup.c @@ -39,6 +39,11 @@ #define VERSTAG(tag) (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGIDX (tag)) +#ifndef ADDRIDX +# define ADDRIDX(tag) (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGNUM \ + + DT_EXTRANUM + DT_VALNUM + DT_ADDRTAGIDX (tag)) +#endif + struct sym_val { @@ -325,6 +330,450 @@ do_lookup_unique (const char *undef_name, uint_fast32_t new_hash, result->m = (struct link_map *) map; } +/* Bob Jenkins's hash function, free for any use. + http://burtleburtle.net/bob/hash/ */ +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +typedef unsigned int uint32 __attribute__ ((mode (SI))); +static uint32 +hash_with_seed (const char *k, uint32 length, uint32 seed) +{ + uint32 a, b, c, len; + + /* Set up the internal state */ + len = length; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + c = seed; /* the previous hash value */ + + /*---------------------------------------- handle most of the key */ + while (len >= 12) + { + a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24)); + b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24)); + c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24)); + mix (a,b,c); + k += 12; + len -= 12; + } + + /*------------------------------------- handle the last 11 bytes */ + c += length; + switch (len) /* all the case statements fall through */ + { + case 11: c+=((uint32)k[10]<<24); + case 10: c+=((uint32)k[9]<<16); + case 9 : c+=((uint32)k[8]<<8); + /* the first byte of c is reserved for the length */ + case 8 : b+=((uint32)k[7]<<24); + case 7 : b+=((uint32)k[6]<<16); + case 6 : b+=((uint32)k[5]<<8); + case 5 : b+=k[4]; + case 4 : a+=((uint32)k[3]<<24); + case 3 : a+=((uint32)k[2]<<16); + case 2 : a+=((uint32)k[1]<<8); + case 1 : a+=k[0]; + /* case 0: nothing left to add */ + } + mix (a,b,c); + /*-------------------------------------------- report the result */ + return c; +} + + +/* A hash table used to speed up lookups when we have a lot of shared + libraries. We record in the table how many ELF objects in the link + map we can safely skip, because they are known to not contain the symbol + we are looking for. + + Values start out at N (num objects in the main_map). We then go through + each object (executable or DSO) in their order in the main_map and insert + pos_in_map_map into the table. + + If there is already an earlier (smaller) entry in the table, we leave it + alone. It could be a hash collision, or it could be an earlier + instance of the same symbol (eg weak __stdout definition in + multiple objects). + + To save space, the table contains uint16_t entries, and so we can't + record main map positions over 65535. + + It turns out that even when only 25% of slots are used, collision + rate is around 10%, which is quite high, and causes lookups to be slow. + + So we use a second table (using a different hash seed). Hash collisions + using two separate hashes are quite rare. + + Note that we allocate the two tables together (a single block twice + the size of one table). */ + +static void +insert_into_hash_table_1 (int pos_in_main_map, const char *symbol_name, + size_t name_len, dl_position_table_entry_t *table, + int seed) +{ + uint32 hash = hash_with_seed (symbol_name, name_len, seed); + int pos = hash & GLRO(dl_position_hash_mask); + if (pos_in_main_map < table[pos]) + { + table[pos] = pos_in_main_map; + if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0)) + _dl_debug_printf ("fastload hash for %s: [%u] = %u (table %u)\n", + symbol_name, (unsigned int) pos, + (unsigned int) table[pos], + (GLRO(dl_position_hash_table) == table) ? 0 : 1); + } + else + { + if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0)) + _dl_debug_printf + ("fastload hash for %s: [%u] already set to %u <= %u (table %u)\n", + symbol_name, (unsigned int) pos, + (unsigned int) table[pos], + (unsigned int) pos_in_main_map, + (GLRO(dl_position_hash_table) == table) ? 0 : 1); + } +} + +#define HASH_SEED_A 0 +#define HASH_SEED_B 5381 /* As good as any other value. */ + +static void +insert_into_hash_table (int pos_in_main_map, const char *symbol_name) +{ + dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table); + const int table_size = GLRO(dl_position_hash_mask) + 1; + dl_position_table_entry_t *const table2 = table1 + table_size; + const size_t name_len = strlen (symbol_name); + + insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table1, + HASH_SEED_A); + insert_into_hash_table_1 (pos_in_main_map, symbol_name, name_len, table2, + HASH_SEED_B); +} + +static inline int +earliest_pos_from_hash_table_1 (const char *symbol_name, + const dl_position_table_entry_t *table, + int seed) +{ + int pos; + uint32 hash; + + hash = hash_with_seed (symbol_name, strlen (symbol_name), seed); + pos = hash & GLRO(dl_position_hash_mask); + if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0)) + _dl_debug_printf + ("fastload earliest pos for %s: [%u] = %u (table %u)\n", + symbol_name, (unsigned int) pos, (unsigned int) table[pos], + (GLRO(dl_position_hash_table) == table) ? 0 : 1); + + return table[pos]; +} + +static inline int +earliest_pos_from_hash_table (const char *undef_name) +{ + dl_position_table_entry_t *const table1 = GLRO(dl_position_hash_table); + + if (table1 == NULL) + return 0; /* Search from the beginning of the list. */ + else + { + const int table_size = GLRO(dl_position_hash_mask) + 1; + dl_position_table_entry_t *const table2 = table1 + table_size; + const int skip_1 = earliest_pos_from_hash_table_1 (undef_name, table1, + HASH_SEED_A); + const int skip_2 = earliest_pos_from_hash_table_1 (undef_name, table2, + HASH_SEED_B); + + /* Possibilities: + + * Both skip_1 and skip_2 are positive and same: + there were no collisions at insertion in either table, or there + was double-collision with symbols from the same library. + + If skip_1 == skip_2 == N (num_directly_loaded_objects), then the + slot was empty in both tables after they were filled (i.e. there + is no symbol that hashes to corresponding slot). In that case, + searching first [0, N) is pointless. See b/5609692. + + True skip_to == skip_1 == skip_2. + + * Both skip_1 and skip_2 are positive but different: + there was a collision in one or both tables, or one of the slots + remained unfilled after all symbols have been inserted. + It is safe to start at max, true skip_to count can not be less + than max. */ + + return (skip_1 < skip_2) ? skip_2 : skip_1; + } +} + + +static void +fill_hash_table (const struct link_map *const map, int pos_in_main_map, + int num_symbols) +{ + const ElfW(Sym) *const symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]); + const char *const strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + int k; + + /* Iterate through the symbols defined in this map and insert them + into the bloom filter. If the symbol wouldn't pass the tests in + do_lookup_x(), then don't insert it. */ + for (k = 0; k < num_symbols; k++) + { + const ElfW(Sym) *sym = &symtab[k]; + + if (sym->st_value == 0 + && ELFW(ST_TYPE) (sym->st_info) != STT_TLS) + continue; + + switch (ELFW(ST_TYPE) (sym->st_info)) + { + case STT_SECTION: + case STT_FILE: + case STT_COMMON: + continue; + default: + break; + } + + switch (ELFW(ST_BIND) (sym->st_info)) + { + case STB_LOCAL: /* Local symbols are ignored. */ + continue; + default: + break; + } + + insert_into_hash_table (pos_in_main_map, strtab + sym->st_name); + } +} + +static dl_position_table_entry_t * +allocate_position_hash_table (size_t table_size, int max_pos) +{ + /* Implementation detail: we actually allocate two "logical" tables in a + single block. */ + + const size_t total_size = 2 * table_size; + dl_position_table_entry_t *table + = malloc (total_size * sizeof (dl_position_table_entry_t)); + + if (table == NULL) + return NULL; + + for (int k = 0; k < total_size; ++k) + table[k] = max_pos; + + return table; +} + +static int +num_dynsyms (const struct link_map *const map) +{ + if (map->l_info[DT_HASH] != NULL) + { + /* If DT_HASH is present, we unconditionally use it, since it already + has the answer precomputed and waiting for us. */ + const Elf_Symndx *const hash = (void *) D_PTR (map, l_info[DT_HASH]); + + if (hash != NULL) + { + /* DT_HASH dynamic structure points to a symbol hash table + with the following layout: + + nbucket + nchain (== number of symbols in dynamic symbol table) + bucket[0] + ... + + http://refspecs.freestandards.org/elf/TIS1.1.pdf (p. 2.19). */ + + return hash[1]; + } + } + else if (map->l_info[ADDRIDX (DT_GNU_HASH)] != NULL) + { + /* The best documentation for GNU_HASH I found: + http://blogs.sun.com/ali/entry/gnu_hash_elf_sections + + _dl_setup_hash() has already set things up for us. */ + + if (__builtin_expect (map->l_nbuckets < 1, 0)) + { + /* Paranoia: neither gold nor gnu-ld will construct an empty + .gnu.hash, but some other linker just might. */ + return 0; + } + + /* In the GNU hash the symbol index of a symbol is determined by + the offset of the symbol's entry in the hash table itself. + + We start at the last bucket map->l_gnu_chain_zero[map->l_nbuckets-1] + and loop backward from the end until we find a bucket which is + not zero. + + Then we walk forward to find the last entry in the chain which + starts at that bucket. The terminating entry gives us desired + symbol index. */ + + int last_bucket_idx = map->l_nbuckets - 1; + + while (last_bucket_idx > 0 && + map->l_gnu_buckets[last_bucket_idx] == 0) + { + --last_bucket_idx; + } + + const Elf32_Word last_bucket = map->l_gnu_buckets[last_bucket_idx]; + + if (__builtin_expect (last_bucket == 0, 0)) + { + /* This shouldn't happen unless the library has only static + variables with global ctors, and nothing else. */ + return 0; + } + + /* Start of the hash chain for the last non-zero bucket. */ + const Elf32_Word *hasharr = &map->l_gnu_chain_zero[last_bucket]; + + /* The chain is terminated by an entry with LSBit set. */ + while ((*hasharr & 1u) == 0) + ++hasharr; + + /* Size of dynamic symbol table is "index of last symbol" + 1. */ + return hasharr - map->l_gnu_chain_zero + 1; + } + + return 0; +} + +/* Create the fastload position hash (if requested). Given a link_map + containing all required objects, iterate through all symbols. For each + symbol which could match in do_lookup_x, insert the index of the + providing object in main_map. */ + +void +_dl_fill_position_hash (struct link_map *main_map) +{ + const int num_objects = main_map->l_searchlist.r_nlist; + const int debug_fastload = GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD; + int k; + + /* If we have more than dl_position_hash_cutoff shared libraries, + fill in the hash table of earliest known positions for symbols in + main_map. We'll use this in do_lookup_x(). */ + + /* If cutoff or size is negative, fastload is disabled. */ + if (GLRO(dl_position_hash_cutoff) < 0 || GLRO(dl_position_hash_bits) < 0) + { + if (__builtin_expect (debug_fastload, 0)) + _dl_debug_printf ("fastload: disabled (configuration)\n"); + return; + } + + /* If we don't have enough mapped objects, fastload is disabled. */ + if (num_objects <= GLRO(dl_position_hash_cutoff)) + { + if (__builtin_expect (debug_fastload, 0)) + _dl_debug_printf ("fastload: disabled (too few objects," + " %u <= cutoff %u)\n", + num_objects, GLRO(dl_position_hash_cutoff)); + return; + } + + int *ndynsyms = NULL; + + if (GLRO(dl_position_hash_bits) == 0) + { + /* Table size has not been specified via LD_FASTLOAD_HASH_BITS. + Compute appropriate size for <= 25% load factor. */ + + int total_symbols = 0; + + /* It is safe to call alloca() here, because we are executing in + the context of dl_main, i.e. we are on the main stack, and are + not yet using much of it. Assuming sane upper limit of 10K direct + dependencies, we'll use at most 40K bytes here. */ + ndynsyms = alloca (num_objects * sizeof (int)); + + for (k = 0; k < num_objects; ++k) + { + const struct link_map *const map = main_map->l_searchlist.r_list[k]; + const int nsyms = num_dynsyms (map); + + ndynsyms[k] = nsyms; + total_symbols += nsyms; + if (__builtin_expect (debug_fastload, 0)) + _dl_debug_printf ("fastload: %s: %u symbols\n", map->l_name, nsyms); + } + + /* Ensure table size is >= 4 * total_symbols. Generally this + over-sizes the table by more than 4, since there usually are lots + of duplicate symbols (from different DSOs) as well. */ + + GLRO(dl_position_hash_bits) + = (8 * sizeof (total_symbols)) - __builtin_clz (total_symbols) + 2; + + if (__builtin_expect (debug_fastload, 0)) + _dl_debug_printf ("fastload: will use %u hash bits for %u total " + "symbols\n", + GLRO(dl_position_hash_bits), total_symbols); + } + + /* If the requested or computed table size is too large, limit it. */ + if (GLRO(dl_position_hash_bits) > DL_POSITION_HASH_BITS_MAX) + { + if (__builtin_expect (debug_fastload, 0)) + _dl_debug_printf ("fastload: requested table too large" + " (%u > max %u bits)\n", + GLRO(dl_position_hash_bits), + DL_POSITION_HASH_BITS_MAX); + GLRO(dl_position_hash_bits) = DL_POSITION_HASH_BITS_MAX; + } + + int table_size = 1 << GLRO(dl_position_hash_bits); + GLRO(dl_position_hash_mask) = table_size - 1; + if (__builtin_expect (debug_fastload, 0)) + _dl_debug_printf ("fastload: enabled with %u entry table\n", table_size); + + int max_pos = num_objects < DL_POSITION_MAX ? num_objects : DL_POSITION_MAX; + GLRO(dl_position_hash_table) = allocate_position_hash_table (table_size, + max_pos); + if (GLRO(dl_position_hash_table) == NULL) + { + if (__builtin_expect (debug_fastload, 0)) + _dl_debug_printf ("fastload: failed to allocate table\n"); + return; + } + + /* There is no point in going beyond max_pos: the rest of the table + has already been saturated with max_pos value. */ + for (k = 0; k < max_pos; ++k) + { + const struct link_map *const map = main_map->l_searchlist.r_list[k]; + + /* Reuse num_dynsyms() result we computed earlier if possible: + recomputing it may be somewhat expensive with DT_GNU_HASH. */ + const int nsyms = ndynsyms ? ndynsyms[k] : num_dynsyms (map); + + fill_hash_table (map, k, nsyms); + } +} + /* Inner part of the lookup functions. We return a value > 0 if we found the symbol, the value 0 if nothing is found and < 0 if something bad happened. */ @@ -344,6 +793,35 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash, __asm volatile ("" : "+r" (n), "+m" (scope->r_list)); struct link_map **list = scope->r_list; + if (GLRO(dl_enable_fastload)) + { + if (scope == GL(dl_ns)[LM_ID_BASE]._ns_main_searchlist && i == 0) + { + const int skip_to = earliest_pos_from_hash_table (undef_name); + + if (skip_to < n) + { + i = skip_to; + if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0)) + _dl_debug_printf ("fastload %s skipping to %u\n", + undef_name, (unsigned int) i); + } + else + { + /* Symbol was not found in any of the initial libraries, + and no new libraries have been added. */ + + assert (skip_to == n); + + if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FASTLOAD, 0)) + _dl_debug_printf ("fastload %s, %u >= %u (scope->r_nlist)\n", + undef_name, (unsigned int) skip_to, + (unsigned int) n); + return 0; + } + } + } + do { const struct link_map *map = list[i]->l_real; diff --git a/elf/dl-support.c b/elf/dl-support.c index 9349134fa03..4f1c9de4c5c 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -142,6 +142,13 @@ hp_timing_t _dl_cpuclock_offset; void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls; +/* The merged position hash table used if we have a lot of shared objects. */ +dl_position_table_entry_t *_dl_position_hash_table; +int _dl_position_hash_mask; +int _dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT; +int _dl_position_hash_bits; +int _dl_enable_fastload = 1; + size_t _dl_pagesize = EXEC_PAGESIZE; int _dl_inhibit_cache; diff --git a/elf/dl-version.c b/elf/dl-version.c index 7860cc817e5..6c1d9412333 100644 --- a/elf/dl-version.c +++ b/elf/dl-version.c @@ -34,11 +34,23 @@ find_needed (const char *name, struct link_map *map) struct link_map *tmap; unsigned int n; - for (tmap = GL(dl_ns)[map->l_ns]._ns_loaded; tmap != NULL; - tmap = tmap->l_next) - if (_dl_name_match_p (name, tmap)) - return tmap; - + if (!GLRO(dl_enable_fastload) || name[0] == '\0') + { + /* Special case: both main exe and vdso can have empty name; + so search from head: it is important to return the map for main + a.out; else dlsym(0, ...) will fail unexpectedly. */ + for (tmap = GL(dl_ns)[map->l_ns]._ns_loaded; tmap != NULL; + tmap = tmap->l_next) + if (_dl_name_match_p (name, tmap)) + return tmap; + } + else + { + for (tmap = _dl_last_entry (&GL(dl_ns)[map->l_ns]); tmap != NULL; + tmap = tmap->l_prev) + if (_dl_name_match_p (name, tmap)) + return tmap; + } /* The required object is not in the global scope, look to see if it is a dependency of the current object. */ for (n = 0; n < map->l_searchlist.r_nlist; n++) diff --git a/elf/rtld.c b/elf/rtld.c index 9468cd60d94..4f61d49d0ac 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -291,6 +291,9 @@ struct rtld_global_ro _rtld_global_ro attribute_relro = ._dl_open = _dl_open, ._dl_close = _dl_close, ._dl_tls_get_addr_soft = _dl_tls_get_addr_soft, + ._dl_position_hash_cutoff = DL_POSITION_HASH_CUTOFF_DEFAULT, + ._dl_position_hash_bits = 0, + ._dl_enable_fastload = 1, #ifdef HAVE_DL_DISCOVER_OSVERSION ._dl_discover_osversion = _dl_discover_osversion #endif @@ -1767,6 +1770,12 @@ ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n", dependencies in the executable's searchlist for symbol resolution. */ HP_TIMING_NOW (start); _dl_map_object_deps (main_map, preloads, npreloads, mode == trace, 0); + + /* We have now finished loading every required (linked-in) object. + Set up the position hash if needed. */ + if (GLRO(dl_enable_fastload) == 1) + _dl_fill_position_hash (main_map); + HP_TIMING_NOW (stop); HP_TIMING_DIFF (diff, start, stop); HP_TIMING_ACCUM_NT (load_time, diff); @@ -2394,10 +2403,12 @@ process_dl_debug (const char *dl_debug) DL_DEBUG_VERSIONS | DL_DEBUG_IMPCALLS }, { LEN_AND_STR ("scopes"), "display scope information", DL_DEBUG_SCOPES }, + { LEN_AND_STR ("fastload"), "display fastload information", + DL_DEBUG_FASTLOAD }, { LEN_AND_STR ("all"), "all previous options combined", DL_DEBUG_LIBS | DL_DEBUG_RELOC | DL_DEBUG_FILES | DL_DEBUG_SYMBOLS | DL_DEBUG_BINDINGS | DL_DEBUG_VERSIONS | DL_DEBUG_IMPCALLS - | DL_DEBUG_SCOPES }, + | DL_DEBUG_SCOPES | DL_DEBUG_FASTLOAD }, { LEN_AND_STR ("statistics"), "display relocation statistics", DL_DEBUG_STATISTICS }, { LEN_AND_STR ("unused"), "determined unused DSOs", @@ -2680,6 +2691,33 @@ process_envvars (enum mode *modep) EXTRA_LD_ENVVARS #endif } + + /* Handle all fastload-related env vars here. This may duplicate + effort with the switch table above, but it localizes changes + made by the fastload patch. On Linux, the '15' case is used + by another environment variable (LIBRARY_VERSION) and much + change would be needed to add support adding a variable of + that length with the existing style. */ + switch (len) + { + case 15: + if (memcmp (envline, "FASTLOAD_CUTOFF", 15) == 0) + GLRO(dl_position_hash_cutoff) + = _dl_strtoul (&envline[16], NULL);; + break; + + case 16: + if (memcmp (envline, "FASTLOAD_ENABLED", 16) == 0) + GLRO(dl_enable_fastload) + = _dl_strtoul (&envline[17], NULL);; + break; + + case 18: + if (memcmp (envline, "FASTLOAD_HASH_BITS", 18) == 0) + GLRO(dl_position_hash_bits) + = _dl_strtoul (&envline[19], NULL);; + break; + } } /* The caller wants this information. */ diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 7ea931817b0..1df593c96b1 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -275,6 +276,13 @@ typedef void (*receiver_fct) (int, const char *, const char *); The `-ldl' library functions in provide a simple user interface to run-time dynamic linking. */ +/* To save storage space in the hash table, we use uint16_t, which allows + us to support up to 64K of DSOs efficiently. */ +typedef uint16_t dl_position_table_entry_t; + +/* 65535 -- max position that could be recorded in + dl_position_table_entry_t. */ +#define DL_POSITION_MAX UINT16_MAX #ifndef SHARED # define EXTERN extern @@ -302,6 +310,8 @@ struct rtld_global { /* A pointer to the map for the main map. */ struct link_map *_ns_loaded; + /* Tail end of the _ns_loaded link chain. Computed on demand. */ + struct link_map *_ns_last; /* Number of object in the _dl_loaded list. */ unsigned int _ns_nloaded; /* Direct pointer to the searchlist of the main object. */ @@ -350,7 +360,7 @@ struct rtld_global /* The object to be initialized first. */ EXTERN struct link_map *_dl_initfirst; -#if HP_SMALL_TIMING_AVAIL +#if HP_SMALL_TIMING_AVAIL || HP_TIMING_PAD /* Start time on CPU clock. */ EXTERN hp_timing_t _dl_cpuclock_offset; #endif @@ -487,6 +497,9 @@ struct rtld_global_ro #define DL_DEBUG_HELP (1 << 10) #define DL_DEBUG_PRELINK (1 << 11) +/* Google-local. */ +#define DL_DEBUG_FASTLOAD (1 << 12) + /* OS version. */ EXTERN unsigned int _dl_osversion; /* Platform name. */ @@ -576,6 +589,29 @@ struct rtld_global_ro below. */ EXTERN struct r_search_path_elem *_dl_init_all_dirs; + /* The merged hash table used if we have a lot of shared objects. */ + EXTERN dl_position_table_entry_t *_dl_position_hash_table; + EXTERN int _dl_position_hash_mask; + EXTERN int _dl_position_hash_bits; + EXTERN int _dl_position_hash_cutoff; + EXTERN int _dl_enable_fastload; + +#define DL_POSITION_HASH_BITS_MAX 27 /* (1 << 27) entries. */ +#if defined(__powerpc__) && !defined(__powerpc64__) +#define DL_POSITION_HASH_CUTOFF_DEFAULT -1 /* Disabled. */ +#else +#define DL_POSITION_HASH_CUTOFF_DEFAULT 32 /* > 32 shared libs. */ +#endif + + /* Colon-separated list of absolute paths to ld.so.cache files + we'll load. */ + EXTERN const char *_google_ld_so_cache_list; + +#if HP_SMALL_TIMING_AVAIL || HP_TIMING_PAD + /* Overhead of a high-precision timing measurement. */ + EXTERN hp_timing_t _dl_hp_timing_overhead; +#endif + #ifdef NEED_DL_SYSINFO /* Syscall handling improvements. This is very specific to x86. */ EXTERN uintptr_t _dl_sysinfo; @@ -750,6 +786,8 @@ _dl_dprintf (int fd, const char *fmt, ...) } \ while (1) +/* Fill the position hash table used if we have a lot of shared libraries. */ +extern void _dl_fill_position_hash (struct link_map *main_map); /* An exception raised by the _dl_signal_error function family and caught by _dl_catch_error function family. Exceptions themselves @@ -1057,6 +1095,8 @@ extern ElfW(Addr) _dl_sysdep_start (void **start_argptr, extern void _dl_sysdep_start_cleanup (void) attribute_hidden; +extern int _dl_addr_inside_object (struct link_map *l, const ElfW(Addr) addr) + attribute_hidden; /* Determine next available module ID. */ extern size_t _dl_next_tls_modid (void) attribute_hidden; @@ -1193,6 +1233,31 @@ rtld_active (void) } #endif +/* Find last entry in the given scope. Cache the result. */ +static inline struct link_map * +_dl_last_entry (struct link_namespaces *ns) +{ + struct link_map *map = ns->_ns_last; + size_t len = 0; + + if (map == NULL) + map = ns->_ns_loaded; + + if (map == NULL) + return NULL; + + while (map->l_next != NULL) + { + map = map->l_next; + /* If we have more than _ns_nloaded libraries, it's likely we are + looking at a corrupt list. Fail hard. */ + assert (len++ < ns->_ns_nloaded); + } + + ns->_ns_last = map; + return map; +} + __END_DECLS #endif /* ldsodefs.h */