]> git.ipfire.org Git - thirdparty/linux.git/blob - tools/perf/util/thread-stack.c
Merge tag 'sound-5.2-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[thirdparty/linux.git] / tools / perf / util / thread-stack.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * thread-stack.c: Synthesize a thread's stack using call / return events
4 * Copyright (c) 2014, Intel Corporation.
5 */
6
7 #include <linux/rbtree.h>
8 #include <linux/list.h>
9 #include <linux/log2.h>
10 #include <errno.h>
11 #include "thread.h"
12 #include "event.h"
13 #include "machine.h"
14 #include "env.h"
15 #include "util.h"
16 #include "debug.h"
17 #include "symbol.h"
18 #include "comm.h"
19 #include "call-path.h"
20 #include "thread-stack.h"
21
22 #define STACK_GROWTH 2048
23
24 /*
25 * State of retpoline detection.
26 *
27 * RETPOLINE_NONE: no retpoline detection
28 * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
29 * X86_RETPOLINE_DETECTED: x86 retpoline detected
30 */
31 enum retpoline_state_t {
32 RETPOLINE_NONE,
33 X86_RETPOLINE_POSSIBLE,
34 X86_RETPOLINE_DETECTED,
35 };
36
37 /**
38 * struct thread_stack_entry - thread stack entry.
39 * @ret_addr: return address
40 * @timestamp: timestamp (if known)
41 * @ref: external reference (e.g. db_id of sample)
42 * @branch_count: the branch count when the entry was created
43 * @db_id: id used for db-export
44 * @cp: call path
45 * @no_call: a 'call' was not seen
46 * @trace_end: a 'call' but trace ended
47 * @non_call: a branch but not a 'call' to the start of a different symbol
48 */
49 struct thread_stack_entry {
50 u64 ret_addr;
51 u64 timestamp;
52 u64 ref;
53 u64 branch_count;
54 u64 db_id;
55 struct call_path *cp;
56 bool no_call;
57 bool trace_end;
58 bool non_call;
59 };
60
61 /**
62 * struct thread_stack - thread stack constructed from 'call' and 'return'
63 * branch samples.
64 * @stack: array that holds the stack
65 * @cnt: number of entries in the stack
66 * @sz: current maximum stack size
67 * @trace_nr: current trace number
68 * @branch_count: running branch count
69 * @kernel_start: kernel start address
70 * @last_time: last timestamp
71 * @crp: call/return processor
72 * @comm: current comm
73 * @arr_sz: size of array if this is the first element of an array
74 * @rstate: used to detect retpolines
75 */
76 struct thread_stack {
77 struct thread_stack_entry *stack;
78 size_t cnt;
79 size_t sz;
80 u64 trace_nr;
81 u64 branch_count;
82 u64 kernel_start;
83 u64 last_time;
84 struct call_return_processor *crp;
85 struct comm *comm;
86 unsigned int arr_sz;
87 enum retpoline_state_t rstate;
88 };
89
90 /*
91 * Assume pid == tid == 0 identifies the idle task as defined by
92 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
93 * and therefore requires a stack for each cpu.
94 */
95 static inline bool thread_stack__per_cpu(struct thread *thread)
96 {
97 return !(thread->tid || thread->pid_);
98 }
99
100 static int thread_stack__grow(struct thread_stack *ts)
101 {
102 struct thread_stack_entry *new_stack;
103 size_t sz, new_sz;
104
105 new_sz = ts->sz + STACK_GROWTH;
106 sz = new_sz * sizeof(struct thread_stack_entry);
107
108 new_stack = realloc(ts->stack, sz);
109 if (!new_stack)
110 return -ENOMEM;
111
112 ts->stack = new_stack;
113 ts->sz = new_sz;
114
115 return 0;
116 }
117
118 static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
119 struct call_return_processor *crp)
120 {
121 int err;
122
123 err = thread_stack__grow(ts);
124 if (err)
125 return err;
126
127 if (thread->mg && thread->mg->machine) {
128 struct machine *machine = thread->mg->machine;
129 const char *arch = perf_env__arch(machine->env);
130
131 ts->kernel_start = machine__kernel_start(machine);
132 if (!strcmp(arch, "x86"))
133 ts->rstate = X86_RETPOLINE_POSSIBLE;
134 } else {
135 ts->kernel_start = 1ULL << 63;
136 }
137 ts->crp = crp;
138
139 return 0;
140 }
141
142 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
143 struct call_return_processor *crp)
144 {
145 struct thread_stack *ts = thread->ts, *new_ts;
146 unsigned int old_sz = ts ? ts->arr_sz : 0;
147 unsigned int new_sz = 1;
148
149 if (thread_stack__per_cpu(thread) && cpu > 0)
150 new_sz = roundup_pow_of_two(cpu + 1);
151
152 if (!ts || new_sz > old_sz) {
153 new_ts = calloc(new_sz, sizeof(*ts));
154 if (!new_ts)
155 return NULL;
156 if (ts)
157 memcpy(new_ts, ts, old_sz * sizeof(*ts));
158 new_ts->arr_sz = new_sz;
159 zfree(&thread->ts);
160 thread->ts = new_ts;
161 ts = new_ts;
162 }
163
164 if (thread_stack__per_cpu(thread) && cpu > 0 &&
165 (unsigned int)cpu < ts->arr_sz)
166 ts += cpu;
167
168 if (!ts->stack &&
169 thread_stack__init(ts, thread, crp))
170 return NULL;
171
172 return ts;
173 }
174
175 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
176 {
177 struct thread_stack *ts = thread->ts;
178
179 if (cpu < 0)
180 cpu = 0;
181
182 if (!ts || (unsigned int)cpu >= ts->arr_sz)
183 return NULL;
184
185 ts += cpu;
186
187 if (!ts->stack)
188 return NULL;
189
190 return ts;
191 }
192
193 static inline struct thread_stack *thread__stack(struct thread *thread,
194 int cpu)
195 {
196 if (!thread)
197 return NULL;
198
199 if (thread_stack__per_cpu(thread))
200 return thread__cpu_stack(thread, cpu);
201
202 return thread->ts;
203 }
204
205 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
206 bool trace_end)
207 {
208 int err = 0;
209
210 if (ts->cnt == ts->sz) {
211 err = thread_stack__grow(ts);
212 if (err) {
213 pr_warning("Out of memory: discarding thread stack\n");
214 ts->cnt = 0;
215 }
216 }
217
218 ts->stack[ts->cnt].trace_end = trace_end;
219 ts->stack[ts->cnt++].ret_addr = ret_addr;
220
221 return err;
222 }
223
224 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
225 {
226 size_t i;
227
228 /*
229 * In some cases there may be functions which are not seen to return.
230 * For example when setjmp / longjmp has been used. Or the perf context
231 * switch in the kernel which doesn't stop and start tracing in exactly
232 * the same code path. When that happens the return address will be
233 * further down the stack. If the return address is not found at all,
234 * we assume the opposite (i.e. this is a return for a call that wasn't
235 * seen for some reason) and leave the stack alone.
236 */
237 for (i = ts->cnt; i; ) {
238 if (ts->stack[--i].ret_addr == ret_addr) {
239 ts->cnt = i;
240 return;
241 }
242 }
243 }
244
245 static void thread_stack__pop_trace_end(struct thread_stack *ts)
246 {
247 size_t i;
248
249 for (i = ts->cnt; i; ) {
250 if (ts->stack[--i].trace_end)
251 ts->cnt = i;
252 else
253 return;
254 }
255 }
256
257 static bool thread_stack__in_kernel(struct thread_stack *ts)
258 {
259 if (!ts->cnt)
260 return false;
261
262 return ts->stack[ts->cnt - 1].cp->in_kernel;
263 }
264
265 static int thread_stack__call_return(struct thread *thread,
266 struct thread_stack *ts, size_t idx,
267 u64 timestamp, u64 ref, bool no_return)
268 {
269 struct call_return_processor *crp = ts->crp;
270 struct thread_stack_entry *tse;
271 struct call_return cr = {
272 .thread = thread,
273 .comm = ts->comm,
274 .db_id = 0,
275 };
276 u64 *parent_db_id;
277
278 tse = &ts->stack[idx];
279 cr.cp = tse->cp;
280 cr.call_time = tse->timestamp;
281 cr.return_time = timestamp;
282 cr.branch_count = ts->branch_count - tse->branch_count;
283 cr.db_id = tse->db_id;
284 cr.call_ref = tse->ref;
285 cr.return_ref = ref;
286 if (tse->no_call)
287 cr.flags |= CALL_RETURN_NO_CALL;
288 if (no_return)
289 cr.flags |= CALL_RETURN_NO_RETURN;
290 if (tse->non_call)
291 cr.flags |= CALL_RETURN_NON_CALL;
292
293 /*
294 * The parent db_id must be assigned before exporting the child. Note
295 * it is not possible to export the parent first because its information
296 * is not yet complete because its 'return' has not yet been processed.
297 */
298 parent_db_id = idx ? &(tse - 1)->db_id : NULL;
299
300 return crp->process(&cr, parent_db_id, crp->data);
301 }
302
303 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
304 {
305 struct call_return_processor *crp = ts->crp;
306 int err;
307
308 if (!crp) {
309 ts->cnt = 0;
310 return 0;
311 }
312
313 while (ts->cnt) {
314 err = thread_stack__call_return(thread, ts, --ts->cnt,
315 ts->last_time, 0, true);
316 if (err) {
317 pr_err("Error flushing thread stack!\n");
318 ts->cnt = 0;
319 return err;
320 }
321 }
322
323 return 0;
324 }
325
326 int thread_stack__flush(struct thread *thread)
327 {
328 struct thread_stack *ts = thread->ts;
329 unsigned int pos;
330 int err = 0;
331
332 if (ts) {
333 for (pos = 0; pos < ts->arr_sz; pos++) {
334 int ret = __thread_stack__flush(thread, ts + pos);
335
336 if (ret)
337 err = ret;
338 }
339 }
340
341 return err;
342 }
343
344 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
345 u64 to_ip, u16 insn_len, u64 trace_nr)
346 {
347 struct thread_stack *ts = thread__stack(thread, cpu);
348
349 if (!thread)
350 return -EINVAL;
351
352 if (!ts) {
353 ts = thread_stack__new(thread, cpu, NULL);
354 if (!ts) {
355 pr_warning("Out of memory: no thread stack\n");
356 return -ENOMEM;
357 }
358 ts->trace_nr = trace_nr;
359 }
360
361 /*
362 * When the trace is discontinuous, the trace_nr changes. In that case
363 * the stack might be completely invalid. Better to report nothing than
364 * to report something misleading, so flush the stack.
365 */
366 if (trace_nr != ts->trace_nr) {
367 if (ts->trace_nr)
368 __thread_stack__flush(thread, ts);
369 ts->trace_nr = trace_nr;
370 }
371
372 /* Stop here if thread_stack__process() is in use */
373 if (ts->crp)
374 return 0;
375
376 if (flags & PERF_IP_FLAG_CALL) {
377 u64 ret_addr;
378
379 if (!to_ip)
380 return 0;
381 ret_addr = from_ip + insn_len;
382 if (ret_addr == to_ip)
383 return 0; /* Zero-length calls are excluded */
384 return thread_stack__push(ts, ret_addr,
385 flags & PERF_IP_FLAG_TRACE_END);
386 } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
387 /*
388 * If the caller did not change the trace number (which would
389 * have flushed the stack) then try to make sense of the stack.
390 * Possibly, tracing began after returning to the current
391 * address, so try to pop that. Also, do not expect a call made
392 * when the trace ended, to return, so pop that.
393 */
394 thread_stack__pop(ts, to_ip);
395 thread_stack__pop_trace_end(ts);
396 } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
397 thread_stack__pop(ts, to_ip);
398 }
399
400 return 0;
401 }
402
403 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
404 {
405 struct thread_stack *ts = thread__stack(thread, cpu);
406
407 if (!ts)
408 return;
409
410 if (trace_nr != ts->trace_nr) {
411 if (ts->trace_nr)
412 __thread_stack__flush(thread, ts);
413 ts->trace_nr = trace_nr;
414 }
415 }
416
417 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
418 {
419 __thread_stack__flush(thread, ts);
420 zfree(&ts->stack);
421 }
422
423 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
424 {
425 unsigned int arr_sz = ts->arr_sz;
426
427 __thread_stack__free(thread, ts);
428 memset(ts, 0, sizeof(*ts));
429 ts->arr_sz = arr_sz;
430 }
431
432 void thread_stack__free(struct thread *thread)
433 {
434 struct thread_stack *ts = thread->ts;
435 unsigned int pos;
436
437 if (ts) {
438 for (pos = 0; pos < ts->arr_sz; pos++)
439 __thread_stack__free(thread, ts + pos);
440 zfree(&thread->ts);
441 }
442 }
443
444 static inline u64 callchain_context(u64 ip, u64 kernel_start)
445 {
446 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
447 }
448
449 void thread_stack__sample(struct thread *thread, int cpu,
450 struct ip_callchain *chain,
451 size_t sz, u64 ip, u64 kernel_start)
452 {
453 struct thread_stack *ts = thread__stack(thread, cpu);
454 u64 context = callchain_context(ip, kernel_start);
455 u64 last_context;
456 size_t i, j;
457
458 if (sz < 2) {
459 chain->nr = 0;
460 return;
461 }
462
463 chain->ips[0] = context;
464 chain->ips[1] = ip;
465
466 if (!ts) {
467 chain->nr = 2;
468 return;
469 }
470
471 last_context = context;
472
473 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
474 ip = ts->stack[ts->cnt - j].ret_addr;
475 context = callchain_context(ip, kernel_start);
476 if (context != last_context) {
477 if (i >= sz - 1)
478 break;
479 chain->ips[i++] = context;
480 last_context = context;
481 }
482 chain->ips[i] = ip;
483 }
484
485 chain->nr = i;
486 }
487
488 struct call_return_processor *
489 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
490 void *data)
491 {
492 struct call_return_processor *crp;
493
494 crp = zalloc(sizeof(struct call_return_processor));
495 if (!crp)
496 return NULL;
497 crp->cpr = call_path_root__new();
498 if (!crp->cpr)
499 goto out_free;
500 crp->process = process;
501 crp->data = data;
502 return crp;
503
504 out_free:
505 free(crp);
506 return NULL;
507 }
508
509 void call_return_processor__free(struct call_return_processor *crp)
510 {
511 if (crp) {
512 call_path_root__free(crp->cpr);
513 free(crp);
514 }
515 }
516
517 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
518 u64 timestamp, u64 ref, struct call_path *cp,
519 bool no_call, bool trace_end)
520 {
521 struct thread_stack_entry *tse;
522 int err;
523
524 if (!cp)
525 return -ENOMEM;
526
527 if (ts->cnt == ts->sz) {
528 err = thread_stack__grow(ts);
529 if (err)
530 return err;
531 }
532
533 tse = &ts->stack[ts->cnt++];
534 tse->ret_addr = ret_addr;
535 tse->timestamp = timestamp;
536 tse->ref = ref;
537 tse->branch_count = ts->branch_count;
538 tse->cp = cp;
539 tse->no_call = no_call;
540 tse->trace_end = trace_end;
541 tse->non_call = false;
542 tse->db_id = 0;
543
544 return 0;
545 }
546
547 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
548 u64 ret_addr, u64 timestamp, u64 ref,
549 struct symbol *sym)
550 {
551 int err;
552
553 if (!ts->cnt)
554 return 1;
555
556 if (ts->cnt == 1) {
557 struct thread_stack_entry *tse = &ts->stack[0];
558
559 if (tse->cp->sym == sym)
560 return thread_stack__call_return(thread, ts, --ts->cnt,
561 timestamp, ref, false);
562 }
563
564 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
565 !ts->stack[ts->cnt - 1].non_call) {
566 return thread_stack__call_return(thread, ts, --ts->cnt,
567 timestamp, ref, false);
568 } else {
569 size_t i = ts->cnt - 1;
570
571 while (i--) {
572 if (ts->stack[i].ret_addr != ret_addr ||
573 ts->stack[i].non_call)
574 continue;
575 i += 1;
576 while (ts->cnt > i) {
577 err = thread_stack__call_return(thread, ts,
578 --ts->cnt,
579 timestamp, ref,
580 true);
581 if (err)
582 return err;
583 }
584 return thread_stack__call_return(thread, ts, --ts->cnt,
585 timestamp, ref, false);
586 }
587 }
588
589 return 1;
590 }
591
592 static int thread_stack__bottom(struct thread_stack *ts,
593 struct perf_sample *sample,
594 struct addr_location *from_al,
595 struct addr_location *to_al, u64 ref)
596 {
597 struct call_path_root *cpr = ts->crp->cpr;
598 struct call_path *cp;
599 struct symbol *sym;
600 u64 ip;
601
602 if (sample->ip) {
603 ip = sample->ip;
604 sym = from_al->sym;
605 } else if (sample->addr) {
606 ip = sample->addr;
607 sym = to_al->sym;
608 } else {
609 return 0;
610 }
611
612 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
613 ts->kernel_start);
614
615 return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
616 true, false);
617 }
618
619 static int thread_stack__no_call_return(struct thread *thread,
620 struct thread_stack *ts,
621 struct perf_sample *sample,
622 struct addr_location *from_al,
623 struct addr_location *to_al, u64 ref)
624 {
625 struct call_path_root *cpr = ts->crp->cpr;
626 struct call_path *root = &cpr->call_path;
627 struct symbol *fsym = from_al->sym;
628 struct symbol *tsym = to_al->sym;
629 struct call_path *cp, *parent;
630 u64 ks = ts->kernel_start;
631 u64 addr = sample->addr;
632 u64 tm = sample->time;
633 u64 ip = sample->ip;
634 int err;
635
636 if (ip >= ks && addr < ks) {
637 /* Return to userspace, so pop all kernel addresses */
638 while (thread_stack__in_kernel(ts)) {
639 err = thread_stack__call_return(thread, ts, --ts->cnt,
640 tm, ref, true);
641 if (err)
642 return err;
643 }
644
645 /* If the stack is empty, push the userspace address */
646 if (!ts->cnt) {
647 cp = call_path__findnew(cpr, root, tsym, addr, ks);
648 return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
649 false);
650 }
651 } else if (thread_stack__in_kernel(ts) && ip < ks) {
652 /* Return to userspace, so pop all kernel addresses */
653 while (thread_stack__in_kernel(ts)) {
654 err = thread_stack__call_return(thread, ts, --ts->cnt,
655 tm, ref, true);
656 if (err)
657 return err;
658 }
659 }
660
661 if (ts->cnt)
662 parent = ts->stack[ts->cnt - 1].cp;
663 else
664 parent = root;
665
666 if (parent->sym == from_al->sym) {
667 /*
668 * At the bottom of the stack, assume the missing 'call' was
669 * before the trace started. So, pop the current symbol and push
670 * the 'to' symbol.
671 */
672 if (ts->cnt == 1) {
673 err = thread_stack__call_return(thread, ts, --ts->cnt,
674 tm, ref, false);
675 if (err)
676 return err;
677 }
678
679 if (!ts->cnt) {
680 cp = call_path__findnew(cpr, root, tsym, addr, ks);
681
682 return thread_stack__push_cp(ts, addr, tm, ref, cp,
683 true, false);
684 }
685
686 /*
687 * Otherwise assume the 'return' is being used as a jump (e.g.
688 * retpoline) and just push the 'to' symbol.
689 */
690 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
691
692 err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
693 if (!err)
694 ts->stack[ts->cnt - 1].non_call = true;
695
696 return err;
697 }
698
699 /*
700 * Assume 'parent' has not yet returned, so push 'to', and then push and
701 * pop 'from'.
702 */
703
704 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
705
706 err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
707 if (err)
708 return err;
709
710 cp = call_path__findnew(cpr, cp, fsym, ip, ks);
711
712 err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
713 if (err)
714 return err;
715
716 return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
717 }
718
719 static int thread_stack__trace_begin(struct thread *thread,
720 struct thread_stack *ts, u64 timestamp,
721 u64 ref)
722 {
723 struct thread_stack_entry *tse;
724 int err;
725
726 if (!ts->cnt)
727 return 0;
728
729 /* Pop trace end */
730 tse = &ts->stack[ts->cnt - 1];
731 if (tse->trace_end) {
732 err = thread_stack__call_return(thread, ts, --ts->cnt,
733 timestamp, ref, false);
734 if (err)
735 return err;
736 }
737
738 return 0;
739 }
740
741 static int thread_stack__trace_end(struct thread_stack *ts,
742 struct perf_sample *sample, u64 ref)
743 {
744 struct call_path_root *cpr = ts->crp->cpr;
745 struct call_path *cp;
746 u64 ret_addr;
747
748 /* No point having 'trace end' on the bottom of the stack */
749 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
750 return 0;
751
752 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
753 ts->kernel_start);
754
755 ret_addr = sample->ip + sample->insn_len;
756
757 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
758 false, true);
759 }
760
761 static bool is_x86_retpoline(const char *name)
762 {
763 const char *p = strstr(name, "__x86_indirect_thunk_");
764
765 return p == name || !strcmp(name, "__indirect_thunk_start");
766 }
767
768 /*
769 * x86 retpoline functions pollute the call graph. This function removes them.
770 * This does not handle function return thunks, nor is there any improvement
771 * for the handling of inline thunks or extern thunks.
772 */
773 static int thread_stack__x86_retpoline(struct thread_stack *ts,
774 struct perf_sample *sample,
775 struct addr_location *to_al)
776 {
777 struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
778 struct call_path_root *cpr = ts->crp->cpr;
779 struct symbol *sym = tse->cp->sym;
780 struct symbol *tsym = to_al->sym;
781 struct call_path *cp;
782
783 if (sym && is_x86_retpoline(sym->name)) {
784 /*
785 * This is a x86 retpoline fn. It pollutes the call graph by
786 * showing up everywhere there is an indirect branch, but does
787 * not itself mean anything. Here the top-of-stack is removed,
788 * by decrementing the stack count, and then further down, the
789 * resulting top-of-stack is replaced with the actual target.
790 * The result is that the retpoline functions will no longer
791 * appear in the call graph. Note this only affects the call
792 * graph, since all the original branches are left unchanged.
793 */
794 ts->cnt -= 1;
795 sym = ts->stack[ts->cnt - 2].cp->sym;
796 if (sym && sym == tsym && to_al->addr != tsym->start) {
797 /*
798 * Target is back to the middle of the symbol we came
799 * from so assume it is an indirect jmp and forget it
800 * altogether.
801 */
802 ts->cnt -= 1;
803 return 0;
804 }
805 } else if (sym && sym == tsym) {
806 /*
807 * Target is back to the symbol we came from so assume it is an
808 * indirect jmp and forget it altogether.
809 */
810 ts->cnt -= 1;
811 return 0;
812 }
813
814 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
815 sample->addr, ts->kernel_start);
816 if (!cp)
817 return -ENOMEM;
818
819 /* Replace the top-of-stack with the actual target */
820 ts->stack[ts->cnt - 1].cp = cp;
821
822 return 0;
823 }
824
825 int thread_stack__process(struct thread *thread, struct comm *comm,
826 struct perf_sample *sample,
827 struct addr_location *from_al,
828 struct addr_location *to_al, u64 ref,
829 struct call_return_processor *crp)
830 {
831 struct thread_stack *ts = thread__stack(thread, sample->cpu);
832 enum retpoline_state_t rstate;
833 int err = 0;
834
835 if (ts && !ts->crp) {
836 /* Supersede thread_stack__event() */
837 thread_stack__reset(thread, ts);
838 ts = NULL;
839 }
840
841 if (!ts) {
842 ts = thread_stack__new(thread, sample->cpu, crp);
843 if (!ts)
844 return -ENOMEM;
845 ts->comm = comm;
846 }
847
848 rstate = ts->rstate;
849 if (rstate == X86_RETPOLINE_DETECTED)
850 ts->rstate = X86_RETPOLINE_POSSIBLE;
851
852 /* Flush stack on exec */
853 if (ts->comm != comm && thread->pid_ == thread->tid) {
854 err = __thread_stack__flush(thread, ts);
855 if (err)
856 return err;
857 ts->comm = comm;
858 }
859
860 /* If the stack is empty, put the current symbol on the stack */
861 if (!ts->cnt) {
862 err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
863 if (err)
864 return err;
865 }
866
867 ts->branch_count += 1;
868 ts->last_time = sample->time;
869
870 if (sample->flags & PERF_IP_FLAG_CALL) {
871 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
872 struct call_path_root *cpr = ts->crp->cpr;
873 struct call_path *cp;
874 u64 ret_addr;
875
876 if (!sample->ip || !sample->addr)
877 return 0;
878
879 ret_addr = sample->ip + sample->insn_len;
880 if (ret_addr == sample->addr)
881 return 0; /* Zero-length calls are excluded */
882
883 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
884 to_al->sym, sample->addr,
885 ts->kernel_start);
886 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
887 cp, false, trace_end);
888
889 /*
890 * A call to the same symbol but not the start of the symbol,
891 * may be the start of a x86 retpoline.
892 */
893 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
894 from_al->sym == to_al->sym &&
895 to_al->addr != to_al->sym->start)
896 ts->rstate = X86_RETPOLINE_DETECTED;
897
898 } else if (sample->flags & PERF_IP_FLAG_RETURN) {
899 if (!sample->ip || !sample->addr)
900 return 0;
901
902 /* x86 retpoline 'return' doesn't match the stack */
903 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
904 ts->stack[ts->cnt - 1].ret_addr != sample->addr)
905 return thread_stack__x86_retpoline(ts, sample, to_al);
906
907 err = thread_stack__pop_cp(thread, ts, sample->addr,
908 sample->time, ref, from_al->sym);
909 if (err) {
910 if (err < 0)
911 return err;
912 err = thread_stack__no_call_return(thread, ts, sample,
913 from_al, to_al, ref);
914 }
915 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
916 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
917 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
918 err = thread_stack__trace_end(ts, sample, ref);
919 } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
920 from_al->sym != to_al->sym && to_al->sym &&
921 to_al->addr == to_al->sym->start) {
922 struct call_path_root *cpr = ts->crp->cpr;
923 struct call_path *cp;
924
925 /*
926 * The compiler might optimize a call/ret combination by making
927 * it a jmp. Make that visible by recording on the stack a
928 * branch to the start of a different symbol. Note, that means
929 * when a ret pops the stack, all jmps must be popped off first.
930 */
931 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
932 to_al->sym, sample->addr,
933 ts->kernel_start);
934 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
935 false);
936 if (!err)
937 ts->stack[ts->cnt - 1].non_call = true;
938 }
939
940 return err;
941 }
942
943 size_t thread_stack__depth(struct thread *thread, int cpu)
944 {
945 struct thread_stack *ts = thread__stack(thread, cpu);
946
947 if (!ts)
948 return 0;
949 return ts->cnt;
950 }