]>
Commit | Line | Data |
---|---|---|
3996f34b | 1 | /* Profiling of shared libraries. |
581c785b | 2 | Copyright (C) 1997-2022 Free Software Foundation, Inc. |
3996f34b | 3 | This file is part of the GNU C Library. |
9a0a462c | 4 | Based on the BSD mcount implementation. |
3996f34b UD |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
3996f34b UD |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
3996f34b | 15 | |
41bdb6e2 | 16 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
3996f34b | 19 | |
9710f75d | 20 | #include <assert.h> |
3996f34b UD |
21 | #include <errno.h> |
22 | #include <fcntl.h> | |
23 | #include <inttypes.h> | |
cbdee279 | 24 | #include <limits.h> |
3996f34b UD |
25 | #include <stdio.h> |
26 | #include <stdlib.h> | |
27 | #include <string.h> | |
28 | #include <unistd.h> | |
e054f494 | 29 | #include <stdint.h> |
a42195db | 30 | #include <ldsodefs.h> |
3996f34b UD |
31 | #include <sys/gmon.h> |
32 | #include <sys/gmon_out.h> | |
33 | #include <sys/mman.h> | |
650425ce | 34 | #include <sys/param.h> |
3996f34b | 35 | #include <sys/stat.h> |
4009bf40 | 36 | #include <atomic.h> |
329ea513 | 37 | #include <not-cancel.h> |
3996f34b UD |
38 | |
39 | /* The LD_PROFILE feature has to be implemented different to the | |
40 | normal profiling using the gmon/ functions. The problem is that an | |
41 | arbitrary amount of processes simulataneously can be run using | |
42 | profiling and all write the results in the same file. To provide | |
43 | this mechanism one could implement a complicated mechanism to merge | |
44 | the content of two profiling runs or one could extend the file | |
45 | format to allow more than one data set. For the second solution we | |
46 | would have the problem that the file can grow in size beyond any | |
47 | limit and both solutions have the problem that the concurrency of | |
48 | writing the results is a big problem. | |
49 | ||
50 | Another much simpler method is to use mmap to map the same file in | |
51 | all using programs and modify the data in the mmap'ed area and so | |
52 | also automatically on the disk. Using the MAP_SHARED option of | |
53 | mmap(2) this can be done without big problems in more than one | |
54 | file. | |
55 | ||
56 | This approach is very different from the normal profiling. We have | |
57 | to use the profiling data in exactly the way they are expected to | |
0413b54c UD |
58 | be written to disk. But the normal format used by gprof is not usable |
59 | to do this. It is optimized for size. It writes the tags as single | |
60 | bytes but this means that the following 32/64 bit values are | |
61 | unaligned. | |
62 | ||
63 | Therefore we use a new format. This will look like this | |
64 | ||
65 | 0 1 2 3 <- byte is 32 bit word | |
66 | 0000 g m o n | |
67 | 0004 *version* <- GMON_SHOBJ_VERSION | |
68 | 0008 00 00 00 00 | |
69 | 000c 00 00 00 00 | |
70 | 0010 00 00 00 00 | |
71 | ||
72 | 0014 *tag* <- GMON_TAG_TIME_HIST | |
73 | 0018 ?? ?? ?? ?? | |
74 | ?? ?? ?? ?? <- 32/64 bit LowPC | |
75 | 0018+A ?? ?? ?? ?? | |
76 | ?? ?? ?? ?? <- 32/64 bit HighPC | |
77 | 0018+2*A *histsize* | |
78 | 001c+2*A *profrate* | |
79 | 0020+2*A s e c o | |
80 | 0024+2*A n d s \0 | |
81 | 0028+2*A \0 \0 \0 \0 | |
82 | 002c+2*A \0 \0 \0 | |
83 | 002f+2*A s | |
84 | ||
85 | 0030+2*A ?? ?? ?? ?? <- Count data | |
86 | ... ... | |
87 | 0030+2*A+K ?? ?? ?? ?? | |
88 | ||
89 | 0030+2*A+K *tag* <- GMON_TAG_CG_ARC | |
90 | 0034+2*A+K *lastused* | |
91 | 0038+2*A+K ?? ?? ?? ?? | |
92 | ?? ?? ?? ?? <- FromPC#1 | |
93 | 0038+3*A+K ?? ?? ?? ?? | |
94 | ?? ?? ?? ?? <- ToPC#1 | |
95 | 0038+4*A+K ?? ?? ?? ?? <- Count#1 | |
96 | ... ... ... | |
97 | 0038+(2*(CN-1)+2)*A+(CN-1)*4+K ?? ?? ?? ?? | |
98 | ?? ?? ?? ?? <- FromPC#CGN | |
99 | 0038+(2*(CN-1)+3)*A+(CN-1)*4+K ?? ?? ?? ?? | |
100 | ?? ?? ?? ?? <- ToPC#CGN | |
101 | 0038+(2*CN+2)*A+(CN-1)*4+K ?? ?? ?? ?? <- Count#CGN | |
102 | ||
650425ce | 103 | We put (for now?) no basic block information in the file since this would |
0413b54c UD |
104 | introduce rase conditions among all the processes who want to write them. |
105 | ||
106 | `K' is the number of count entries which is computed as | |
107 | ||
108 | textsize / HISTFRACTION | |
109 | ||
110 | `CG' in the above table is the number of call graph arcs. Normally, | |
111 | the table is sparse and the profiling code writes out only the those | |
112 | entries which are really used in the program run. But since we must | |
113 | not extend this table (the profiling file) we'll keep them all here. | |
114 | So CN can be executed in advance as | |
115 | ||
116 | MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS | |
117 | ||
118 | Now the remaining question is: how to build the data structures we can | |
119 | work with from this data. We need the from set and must associate the | |
120 | froms with all the associated tos. We will do this by constructing this | |
121 | data structures at the program start. To do this we'll simply visit all | |
122 | entries in the call graph table and add it to the appropriate list. */ | |
3996f34b | 123 | |
d8cf93f4 | 124 | extern int __profile_frequency (void); |
37ba7d66 | 125 | libc_hidden_proto (__profile_frequency) |
3996f34b | 126 | |
3996f34b UD |
127 | /* We define a special type to address the elements of the arc table. |
128 | This is basically the `gmon_cg_arc_record' format but it includes | |
129 | the room for the tag and it uses real types. */ | |
130 | struct here_cg_arc_record | |
131 | { | |
0413b54c UD |
132 | uintptr_t from_pc; |
133 | uintptr_t self_pc; | |
ce61a2ad NF |
134 | /* The count field is atomically incremented in _dl_mcount, which |
135 | requires it to be properly aligned for its type, and for this | |
136 | alignment to be visible to the compiler. The amount of data | |
137 | before an array of this structure is calculated as | |
138 | expected_size in _dl_start_profile. Everything in that | |
139 | calculation is a multiple of 4 bytes (in the case of | |
140 | kcountsize, because it is derived from a subtraction of | |
141 | page-aligned values, and the corresponding calculation in | |
142 | __monstartup also ensures it is at least a multiple of the size | |
143 | of u_long), so all copies of this field do in fact have the | |
144 | appropriate alignment. */ | |
145 | uint32_t count __attribute__ ((aligned (__alignof__ (uint32_t)))); | |
0413b54c | 146 | } __attribute__ ((packed)); |
3996f34b UD |
147 | |
148 | static struct here_cg_arc_record *data; | |
149 | ||
7982ecfe UD |
150 | /* Nonzero if profiling is under way. */ |
151 | static int running; | |
152 | ||
0413b54c UD |
153 | /* This is the number of entry which have been incorporated in the toset. */ |
154 | static uint32_t narcs; | |
155 | /* This is a pointer to the object representing the number of entries | |
156 | currently in the mmaped file. At no point of time this has to be the | |
157 | same as NARCS. If it is equal all entries from the file are in our | |
158 | lists. */ | |
650425ce | 159 | static volatile uint32_t *narcsp; |
0413b54c | 160 | |
0413b54c | 161 | |
9a0a462c | 162 | struct here_fromstruct |
0413b54c UD |
163 | { |
164 | struct here_cg_arc_record volatile *here; | |
165 | uint16_t link; | |
166 | }; | |
167 | ||
d472655c | 168 | static volatile uint16_t *tos; |
9a0a462c UD |
169 | |
170 | static struct here_fromstruct *froms; | |
d472655c UD |
171 | static uint32_t fromlimit; |
172 | static volatile uint32_t fromidx; | |
0413b54c UD |
173 | |
174 | static uintptr_t lowpc; | |
0413b54c | 175 | static size_t textsize; |
0413b54c UD |
176 | static unsigned int log_hashfraction; |
177 | ||
9a0a462c | 178 | |
0413b54c UD |
179 | \f |
180 | /* Set up profiling data to profile object desribed by MAP. The output | |
181 | file is found (or created) in OUTPUT_DIR. */ | |
3996f34b | 182 | void |
53bfdc1c | 183 | _dl_start_profile (void) |
3996f34b UD |
184 | { |
185 | char *filename; | |
186 | int fd; | |
52a5fe70 | 187 | struct __stat64_t64 st; |
3996f34b UD |
188 | const ElfW(Phdr) *ph; |
189 | ElfW(Addr) mapstart = ~((ElfW(Addr)) 0); | |
190 | ElfW(Addr) mapend = 0; | |
9a0a462c | 191 | char *hist, *cp; |
0413b54c | 192 | size_t idx; |
128e2b0f UD |
193 | size_t tossize; |
194 | size_t fromssize; | |
195 | uintptr_t highpc; | |
321e8782 UD |
196 | uint16_t *kcount; |
197 | size_t kcountsize; | |
128e2b0f UD |
198 | struct gmon_hdr *addr = NULL; |
199 | off_t expected_size; | |
200 | /* See profil(2) where this is described. */ | |
201 | int s_scale; | |
202 | #define SCALE_1_TO_1 0x10000L | |
321e8782 | 203 | const char *errstr = NULL; |
3996f34b UD |
204 | |
205 | /* Compute the size of the sections which contain program code. */ | |
53bfdc1c UD |
206 | for (ph = GL(dl_profile_map)->l_phdr; |
207 | ph < &GL(dl_profile_map)->l_phdr[GL(dl_profile_map)->l_phnum]; ++ph) | |
3996f34b UD |
208 | if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X)) |
209 | { | |
afdca0f2 UD |
210 | ElfW(Addr) start = (ph->p_vaddr & ~(GLRO(dl_pagesize) - 1)); |
211 | ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + GLRO(dl_pagesize) - 1) | |
212 | & ~(GLRO(dl_pagesize) - 1)); | |
3996f34b UD |
213 | |
214 | if (start < mapstart) | |
215 | mapstart = start; | |
216 | if (end > mapend) | |
217 | mapend = end; | |
218 | } | |
219 | ||
220 | /* Now we can compute the size of the profiling data. This is done | |
221 | with the same formulars as in `monstartup' (see gmon.c). */ | |
7982ecfe | 222 | running = 0; |
53bfdc1c | 223 | lowpc = ROUNDDOWN (mapstart + GL(dl_profile_map)->l_addr, |
9a0a462c | 224 | HISTFRACTION * sizeof (HISTCOUNTER)); |
53bfdc1c | 225 | highpc = ROUNDUP (mapend + GL(dl_profile_map)->l_addr, |
9a0a462c | 226 | HISTFRACTION * sizeof (HISTCOUNTER)); |
0413b54c UD |
227 | textsize = highpc - lowpc; |
228 | kcountsize = textsize / HISTFRACTION; | |
3996f34b | 229 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) |
9710f75d UD |
230 | { |
231 | /* If HASHFRACTION is a power of two, mcount can use shifting | |
232 | instead of integer division. Precompute shift amount. | |
233 | ||
234 | This is a constant but the compiler cannot compile the | |
235 | expression away since the __ffs implementation is not known | |
236 | to the compiler. Help the compiler by precomputing the | |
237 | usual cases. */ | |
321e8782 | 238 | assert (HASHFRACTION == 2); |
9710f75d UD |
239 | |
240 | if (sizeof (*froms) == 8) | |
241 | log_hashfraction = 4; | |
242 | else if (sizeof (*froms) == 16) | |
243 | log_hashfraction = 5; | |
244 | else | |
321e8782 | 245 | log_hashfraction = __ffs (HASHFRACTION * sizeof (*froms)) - 1; |
9710f75d | 246 | } |
0413b54c UD |
247 | else |
248 | log_hashfraction = -1; | |
9a0a462c UD |
249 | tossize = textsize / HASHFRACTION; |
250 | fromlimit = textsize * ARCDENSITY / 100; | |
251 | if (fromlimit < MINARCS) | |
252 | fromlimit = MINARCS; | |
253 | if (fromlimit > MAXARCS) | |
254 | fromlimit = MAXARCS; | |
255 | fromssize = fromlimit * sizeof (struct here_fromstruct); | |
3996f34b UD |
256 | |
257 | expected_size = (sizeof (struct gmon_hdr) | |
0413b54c | 258 | + 4 + sizeof (struct gmon_hist_hdr) + kcountsize |
9a0a462c | 259 | + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record)); |
3996f34b UD |
260 | |
261 | /* Create the gmon_hdr we expect or write. */ | |
6cc8844f UD |
262 | struct real_gmon_hdr |
263 | { | |
264 | char cookie[4]; | |
265 | int32_t version; | |
266 | char spare[3 * 4]; | |
267 | } gmon_hdr; | |
268 | if (sizeof (gmon_hdr) != sizeof (struct gmon_hdr) | |
269 | || (offsetof (struct real_gmon_hdr, cookie) | |
270 | != offsetof (struct gmon_hdr, cookie)) | |
271 | || (offsetof (struct real_gmon_hdr, version) | |
272 | != offsetof (struct gmon_hdr, version))) | |
273 | abort (); | |
274 | ||
3996f34b | 275 | memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie)); |
6cc8844f UD |
276 | gmon_hdr.version = GMON_SHOBJ_VERSION; |
277 | memset (gmon_hdr.spare, '\0', sizeof (gmon_hdr.spare)); | |
3996f34b UD |
278 | |
279 | /* Create the hist_hdr we expect or write. */ | |
6cc8844f UD |
280 | struct real_gmon_hist_hdr |
281 | { | |
282 | char *low_pc; | |
283 | char *high_pc; | |
284 | int32_t hist_size; | |
285 | int32_t prof_rate; | |
286 | char dimen[15]; | |
287 | char dimen_abbrev; | |
288 | } hist_hdr; | |
289 | if (sizeof (hist_hdr) != sizeof (struct gmon_hist_hdr) | |
290 | || (offsetof (struct real_gmon_hist_hdr, low_pc) | |
291 | != offsetof (struct gmon_hist_hdr, low_pc)) | |
292 | || (offsetof (struct real_gmon_hist_hdr, high_pc) | |
293 | != offsetof (struct gmon_hist_hdr, high_pc)) | |
294 | || (offsetof (struct real_gmon_hist_hdr, hist_size) | |
295 | != offsetof (struct gmon_hist_hdr, hist_size)) | |
296 | || (offsetof (struct real_gmon_hist_hdr, prof_rate) | |
297 | != offsetof (struct gmon_hist_hdr, prof_rate)) | |
298 | || (offsetof (struct real_gmon_hist_hdr, dimen) | |
299 | != offsetof (struct gmon_hist_hdr, dimen)) | |
300 | || (offsetof (struct real_gmon_hist_hdr, dimen_abbrev) | |
301 | != offsetof (struct gmon_hist_hdr, dimen_abbrev))) | |
302 | abort (); | |
303 | ||
304 | hist_hdr.low_pc = (char *) mapstart; | |
305 | hist_hdr.high_pc = (char *) mapend; | |
306 | hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER); | |
307 | hist_hdr.prof_rate = __profile_frequency (); | |
1e823b7d | 308 | if (sizeof (hist_hdr.dimen) >= sizeof ("seconds")) |
7fec4f2f UD |
309 | { |
310 | memcpy (hist_hdr.dimen, "seconds", sizeof ("seconds")); | |
311 | memset (hist_hdr.dimen + sizeof ("seconds"), '\0', | |
312 | sizeof (hist_hdr.dimen) - sizeof ("seconds")); | |
313 | } | |
1e823b7d UD |
314 | else |
315 | strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); | |
3996f34b UD |
316 | hist_hdr.dimen_abbrev = 's'; |
317 | ||
318 | /* First determine the output name. We write in the directory | |
319 | OUTPUT_DIR and the name is composed from the shared objects | |
320 | soname (or the file name) and the ending ".profile". */ | |
53bfdc1c | 321 | filename = (char *) alloca (strlen (GLRO(dl_profile_output)) + 1 |
afdca0f2 | 322 | + strlen (GLRO(dl_profile)) + sizeof ".profile"); |
53bfdc1c | 323 | cp = __stpcpy (filename, GLRO(dl_profile_output)); |
9a0a462c | 324 | *cp++ = '/'; |
afdca0f2 | 325 | __stpcpy (__stpcpy (cp, GLRO(dl_profile)), ".profile"); |
3996f34b | 326 | |
329ea513 | 327 | fd = __open64_nocancel (filename, O_RDWR|O_CREAT|O_NOFOLLOW, DEFFILEMODE); |
3996f34b | 328 | if (fd == -1) |
650425ce | 329 | { |
650425ce | 330 | char buf[400]; |
321e8782 UD |
331 | int errnum; |
332 | ||
333 | /* We cannot write the profiling data so don't do anything. */ | |
334 | errstr = "%s: cannot open file: %s\n"; | |
335 | print_error: | |
336 | errnum = errno; | |
337 | if (fd != -1) | |
329ea513 | 338 | __close_nocancel (fd); |
321e8782 UD |
339 | _dl_error_printf (errstr, filename, |
340 | __strerror_r (errnum, buf, sizeof buf)); | |
650425ce UD |
341 | return; |
342 | } | |
3996f34b | 343 | |
52a5fe70 | 344 | if (__fstat64_time64 (fd, &st) < 0 || !S_ISREG (st.st_mode)) |
3996f34b UD |
345 | { |
346 | /* Not stat'able or not a regular file => don't use it. */ | |
321e8782 UD |
347 | errstr = "%s: cannot stat file: %s\n"; |
348 | goto print_error; | |
3996f34b UD |
349 | } |
350 | ||
351 | /* Test the size. If it does not match what we expect from the size | |
352 | values in the map MAP we don't use it and warn the user. */ | |
353 | if (st.st_size == 0) | |
354 | { | |
355 | /* We have to create the file. */ | |
afdca0f2 | 356 | char buf[GLRO(dl_pagesize)]; |
3996f34b | 357 | |
afdca0f2 | 358 | memset (buf, '\0', GLRO(dl_pagesize)); |
3996f34b | 359 | |
afdca0f2 | 360 | if (__lseek (fd, expected_size & ~(GLRO(dl_pagesize) - 1), SEEK_SET) == -1) |
3996f34b | 361 | { |
3996f34b | 362 | cannot_create: |
321e8782 UD |
363 | errstr = "%s: cannot create file: %s\n"; |
364 | goto print_error; | |
3996f34b UD |
365 | } |
366 | ||
329ea513 ZW |
367 | if (TEMP_FAILURE_RETRY |
368 | (__write_nocancel (fd, buf, (expected_size & (GLRO(dl_pagesize) - 1)))) | |
2bcf29ba | 369 | < 0) |
3996f34b UD |
370 | goto cannot_create; |
371 | } | |
372 | else if (st.st_size != expected_size) | |
373 | { | |
329ea513 | 374 | __close_nocancel (fd); |
3996f34b | 375 | wrong_format: |
0413b54c UD |
376 | |
377 | if (addr != NULL) | |
378 | __munmap ((void *) addr, expected_size); | |
379 | ||
35fc382a | 380 | _dl_error_printf ("%s: file is no correct profile data file for `%s'\n", |
afdca0f2 | 381 | filename, GLRO(dl_profile)); |
3996f34b UD |
382 | return; |
383 | } | |
384 | ||
0413b54c UD |
385 | addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE, |
386 | MAP_SHARED|MAP_FILE, fd, 0); | |
387 | if (addr == (struct gmon_hdr *) MAP_FAILED) | |
3996f34b | 388 | { |
321e8782 UD |
389 | errstr = "%s: cannot map file: %s\n"; |
390 | goto print_error; | |
3996f34b UD |
391 | } |
392 | ||
ce6e047f | 393 | /* We don't need the file descriptor anymore. */ |
329ea513 | 394 | __close_nocancel (fd); |
3996f34b UD |
395 | |
396 | /* Pointer to data after the header. */ | |
397 | hist = (char *) (addr + 1); | |
0413b54c UD |
398 | kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t) |
399 | + sizeof (struct gmon_hist_hdr)); | |
3996f34b UD |
400 | |
401 | /* Compute pointer to array of the arc information. */ | |
650425ce UD |
402 | narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t)); |
403 | data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t)); | |
3996f34b UD |
404 | |
405 | if (st.st_size == 0) | |
406 | { | |
407 | /* Create the signature. */ | |
3996f34b UD |
408 | memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr)); |
409 | ||
0413b54c UD |
410 | *(uint32_t *) hist = GMON_TAG_TIME_HIST; |
411 | memcpy (hist + sizeof (uint32_t), &hist_hdr, | |
412 | sizeof (struct gmon_hist_hdr)); | |
3996f34b | 413 | |
650425ce | 414 | narcsp[-1] = GMON_TAG_CG_ARC; |
3996f34b UD |
415 | } |
416 | else | |
417 | { | |
418 | /* Test the signature in the file. */ | |
419 | if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0 | |
0413b54c UD |
420 | || *(uint32_t *) hist != GMON_TAG_TIME_HIST |
421 | || memcmp (hist + sizeof (uint32_t), &hist_hdr, | |
422 | sizeof (struct gmon_hist_hdr)) != 0 | |
650425ce | 423 | || narcsp[-1] != GMON_TAG_CG_ARC) |
3996f34b UD |
424 | goto wrong_format; |
425 | } | |
426 | ||
0413b54c | 427 | /* Allocate memory for the froms data and the pointer to the tos records. */ |
9a0a462c | 428 | tos = (uint16_t *) calloc (tossize + fromssize, 1); |
650425ce | 429 | if (tos == NULL) |
0413b54c UD |
430 | { |
431 | __munmap ((void *) addr, expected_size); | |
35fc382a | 432 | _dl_fatal_printf ("Out of memory while initializing profiler\n"); |
0413b54c UD |
433 | /* NOTREACHED */ |
434 | } | |
435 | ||
9a0a462c UD |
436 | froms = (struct here_fromstruct *) ((char *) tos + tossize); |
437 | fromidx = 0; | |
0413b54c UD |
438 | |
439 | /* Now we have to process all the arc count entries. BTW: it is | |
440 | not critical whether the *NARCSP value changes meanwhile. Before | |
441 | we enter a new entry in to toset we will check that everything is | |
442 | available in TOS. This happens in _dl_mcount. | |
443 | ||
444 | Loading the entries in reverse order should help to get the most | |
445 | frequently used entries at the front of the list. */ | |
650425ce | 446 | for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; ) |
0413b54c | 447 | { |
9a0a462c UD |
448 | size_t to_index; |
449 | size_t newfromidx; | |
0413b54c | 450 | --idx; |
321e8782 | 451 | to_index = (data[idx].self_pc / (HASHFRACTION * sizeof (*tos))); |
9a0a462c UD |
452 | newfromidx = fromidx++; |
453 | froms[newfromidx].here = &data[idx]; | |
454 | froms[newfromidx].link = tos[to_index]; | |
455 | tos[to_index] = newfromidx; | |
0413b54c UD |
456 | } |
457 | ||
9a0a462c UD |
458 | /* Setup counting data. */ |
459 | if (kcountsize < highpc - lowpc) | |
f4017d20 | 460 | { |
f4017d20 UD |
461 | #if 0 |
462 | s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1; | |
463 | #else | |
cbdee279 UD |
464 | size_t range = highpc - lowpc; |
465 | size_t quot = range / kcountsize; | |
466 | ||
467 | if (quot >= SCALE_1_TO_1) | |
468 | s_scale = 1; | |
469 | else if (quot >= SCALE_1_TO_1 / 256) | |
470 | s_scale = SCALE_1_TO_1 / quot; | |
471 | else if (range > ULONG_MAX / 256) | |
472 | s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256)); | |
473 | else | |
474 | s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize); | |
f4017d20 UD |
475 | #endif |
476 | } | |
9a0a462c UD |
477 | else |
478 | s_scale = SCALE_1_TO_1; | |
479 | ||
480 | /* Start the profiler. */ | |
481 | __profil ((void *) kcount, kcountsize, lowpc, s_scale); | |
482 | ||
3996f34b | 483 | /* Turn on profiling. */ |
7982ecfe | 484 | running = 1; |
3996f34b UD |
485 | } |
486 | ||
487 | ||
488 | void | |
489 | _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) | |
490 | { | |
d472655c | 491 | volatile uint16_t *topcindex; |
9a0a462c UD |
492 | size_t i, fromindex; |
493 | struct here_fromstruct *fromp; | |
494 | ||
7982ecfe | 495 | if (! running) |
7f81ac70 | 496 | return; |
3996f34b UD |
497 | |
498 | /* Compute relative addresses. The shared object can be loaded at | |
499 | any address. The value of frompc could be anything. We cannot | |
500 | restrict it in any way, just set to a fixed value (0) in case it | |
501 | is outside the allowed range. These calls show up as calls from | |
502 | <external> in the gprof output. */ | |
0413b54c UD |
503 | frompc -= lowpc; |
504 | if (frompc >= textsize) | |
3996f34b | 505 | frompc = 0; |
0413b54c UD |
506 | selfpc -= lowpc; |
507 | if (selfpc >= textsize) | |
508 | goto done; | |
509 | ||
9a0a462c UD |
510 | /* Getting here we now have to find out whether the location was |
511 | already used. If yes we are lucky and only have to increment a | |
512 | counter (this also has to be atomic). If the entry is new things | |
513 | are getting complicated... */ | |
514 | ||
515 | /* Avoid integer divide if possible. */ | |
516 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) | |
517 | i = selfpc >> log_hashfraction; | |
518 | else | |
321e8782 | 519 | i = selfpc / (HASHFRACTION * sizeof (*tos)); |
9a0a462c UD |
520 | |
521 | topcindex = &tos[i]; | |
522 | fromindex = *topcindex; | |
523 | ||
524 | if (fromindex == 0) | |
525 | goto check_new_or_add; | |
526 | ||
527 | fromp = &froms[fromindex]; | |
528 | ||
529 | /* We have to look through the chain of arcs whether there is already | |
530 | an entry for our arc. */ | |
c0fb8a56 | 531 | while (fromp->here->from_pc != frompc) |
9a0a462c UD |
532 | { |
533 | if (fromp->link != 0) | |
534 | do | |
535 | fromp = &froms[fromp->link]; | |
536 | while (fromp->link != 0 && fromp->here->from_pc != frompc); | |
537 | ||
650425ce | 538 | if (fromp->here->from_pc != frompc) |
9a0a462c UD |
539 | { |
540 | topcindex = &fromp->link; | |
541 | ||
542 | check_new_or_add: | |
543 | /* Our entry is not among the entries we read so far from the | |
544 | data file. Now see whether we have to update the list. */ | |
650425ce | 545 | while (narcs != *narcsp && narcs < fromlimit) |
9a0a462c UD |
546 | { |
547 | size_t to_index; | |
548 | size_t newfromidx; | |
650425ce | 549 | to_index = (data[narcs].self_pc |
321e8782 | 550 | / (HASHFRACTION * sizeof (*tos))); |
11bf311e | 551 | newfromidx = catomic_exchange_and_add (&fromidx, 1) + 1; |
9a0a462c UD |
552 | froms[newfromidx].here = &data[narcs]; |
553 | froms[newfromidx].link = tos[to_index]; | |
554 | tos[to_index] = newfromidx; | |
11bf311e | 555 | catomic_increment (&narcs); |
9a0a462c UD |
556 | } |
557 | ||
558 | /* If we still have no entry stop searching and insert. */ | |
559 | if (*topcindex == 0) | |
560 | { | |
535e935a | 561 | unsigned int newarc = catomic_exchange_and_add (narcsp, 1); |
9a0a462c UD |
562 | |
563 | /* In rare cases it could happen that all entries in FROMS are | |
564 | occupied. So we cannot count this anymore. */ | |
650425ce | 565 | if (newarc >= fromlimit) |
9a0a462c UD |
566 | goto done; |
567 | ||
11bf311e | 568 | *topcindex = catomic_exchange_and_add (&fromidx, 1) + 1; |
d472655c | 569 | fromp = &froms[*topcindex]; |
9a0a462c | 570 | |
650425ce UD |
571 | fromp->here = &data[newarc]; |
572 | data[newarc].from_pc = frompc; | |
573 | data[newarc].self_pc = selfpc; | |
574 | data[newarc].count = 0; | |
9a0a462c | 575 | fromp->link = 0; |
11bf311e | 576 | catomic_increment (&narcs); |
9a0a462c UD |
577 | |
578 | break; | |
579 | } | |
580 | ||
581 | fromp = &froms[*topcindex]; | |
582 | } | |
583 | else | |
584 | /* Found in. */ | |
585 | break; | |
586 | } | |
587 | ||
588 | /* Increment the counter. */ | |
11bf311e | 589 | catomic_increment (&fromp->here->count); |
3996f34b | 590 | |
0413b54c | 591 | done: |
ed073f0e | 592 | ; |
3996f34b | 593 | } |
ab97ee8f | 594 | rtld_hidden_def (_dl_mcount) |