diff options
Diffstat (limited to 'tools/perf/util/thread-stack.c')
-rw-r--r-- | tools/perf/util/thread-stack.c | 161 |
1 files changed, 155 insertions, 6 deletions
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index f52c0f90915d..a8b45168513c 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -20,6 +20,7 @@ #include "thread.h" #include "event.h" #include "machine.h" +#include "env.h" #include "util.h" #include "debug.h" #include "symbol.h" @@ -29,6 +30,19 @@ #define STACK_GROWTH 2048 +/* + * State of retpoline detection. + * + * RETPOLINE_NONE: no retpoline detection + * X86_RETPOLINE_POSSIBLE: x86 retpoline possible + * X86_RETPOLINE_DETECTED: x86 retpoline detected + */ +enum retpoline_state_t { + RETPOLINE_NONE, + X86_RETPOLINE_POSSIBLE, + X86_RETPOLINE_DETECTED, +}; + /** * struct thread_stack_entry - thread stack entry. * @ret_addr: return address @@ -64,6 +78,7 @@ struct thread_stack_entry { * @crp: call/return processor * @comm: current comm * @arr_sz: size of array if this is the first element of an array + * @rstate: used to detect retpolines */ struct thread_stack { struct thread_stack_entry *stack; @@ -76,6 +91,7 @@ struct thread_stack { struct call_return_processor *crp; struct comm *comm; unsigned int arr_sz; + enum retpoline_state_t rstate; }; /* @@ -115,10 +131,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, if (err) return err; - if (thread->mg && thread->mg->machine) - ts->kernel_start = machine__kernel_start(thread->mg->machine); - else + if (thread->mg && thread->mg->machine) { + struct machine *machine = thread->mg->machine; + const char *arch = perf_env__arch(machine->env); + + ts->kernel_start = machine__kernel_start(machine); + if (!strcmp(arch, "x86")) + ts->rstate = X86_RETPOLINE_POSSIBLE; + } else { ts->kernel_start = 1ULL << 63; + } ts->crp = crp; return 0; @@ -638,14 +660,57 @@ static int thread_stack__no_call_return(struct thread *thread, else parent = root; - /* This 'return' had no 'call', so push and pop top of stack */ - cp = call_path__findnew(cpr, parent, fsym, ip, ks); + if (parent->sym == from_al->sym) { + /* + * At the bottom of the stack, assume the missing 'call' was + * before the trace started. So, pop the current symbol and push + * the 'to' symbol. + */ + if (ts->cnt == 1) { + err = thread_stack__call_return(thread, ts, --ts->cnt, + tm, ref, false); + if (err) + return err; + } + + if (!ts->cnt) { + cp = call_path__findnew(cpr, root, tsym, addr, ks); + + return thread_stack__push_cp(ts, addr, tm, ref, cp, + true, false); + } + + /* + * Otherwise assume the 'return' is being used as a jump (e.g. + * retpoline) and just push the 'to' symbol. + */ + cp = call_path__findnew(cpr, parent, tsym, addr, ks); + + err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); + if (!err) + ts->stack[ts->cnt - 1].non_call = true; + + return err; + } + + /* + * Assume 'parent' has not yet returned, so push 'to', and then push and + * pop 'from'. + */ + + cp = call_path__findnew(cpr, parent, tsym, addr, ks); err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); if (err) return err; - return thread_stack__pop_cp(thread, ts, addr, tm, ref, tsym); + cp = call_path__findnew(cpr, cp, fsym, ip, ks); + + err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); + if (err) + return err; + + return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); } static int thread_stack__trace_begin(struct thread *thread, @@ -690,6 +755,70 @@ static int thread_stack__trace_end(struct thread_stack *ts, false, true); } +static bool is_x86_retpoline(const char *name) +{ + const char *p = strstr(name, "__x86_indirect_thunk_"); + + return p == name || !strcmp(name, "__indirect_thunk_start"); +} + +/* + * x86 retpoline functions pollute the call graph. This function removes them. + * This does not handle function return thunks, nor is there any improvement + * for the handling of inline thunks or extern thunks. + */ +static int thread_stack__x86_retpoline(struct thread_stack *ts, + struct perf_sample *sample, + struct addr_location *to_al) +{ + struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; + struct call_path_root *cpr = ts->crp->cpr; + struct symbol *sym = tse->cp->sym; + struct symbol *tsym = to_al->sym; + struct call_path *cp; + + if (sym && is_x86_retpoline(sym->name)) { + /* + * This is a x86 retpoline fn. It pollutes the call graph by + * showing up everywhere there is an indirect branch, but does + * not itself mean anything. Here the top-of-stack is removed, + * by decrementing the stack count, and then further down, the + * resulting top-of-stack is replaced with the actual target. + * The result is that the retpoline functions will no longer + * appear in the call graph. Note this only affects the call + * graph, since all the original branches are left unchanged. + */ + ts->cnt -= 1; + sym = ts->stack[ts->cnt - 2].cp->sym; + if (sym && sym == tsym && to_al->addr != tsym->start) { + /* + * Target is back to the middle of the symbol we came + * from so assume it is an indirect jmp and forget it + * altogether. + */ + ts->cnt -= 1; + return 0; + } + } else if (sym && sym == tsym) { + /* + * Target is back to the symbol we came from so assume it is an + * indirect jmp and forget it altogether. + */ + ts->cnt -= 1; + return 0; + } + + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, + sample->addr, ts->kernel_start); + if (!cp) + return -ENOMEM; + + /* Replace the top-of-stack with the actual target */ + ts->stack[ts->cnt - 1].cp = cp; + + return 0; +} + int thread_stack__process(struct thread *thread, struct comm *comm, struct perf_sample *sample, struct addr_location *from_al, @@ -697,6 +826,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, struct call_return_processor *crp) { struct thread_stack *ts = thread__stack(thread, sample->cpu); + enum retpoline_state_t rstate; int err = 0; if (ts && !ts->crp) { @@ -712,6 +842,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->comm = comm; } + rstate = ts->rstate; + if (rstate == X86_RETPOLINE_DETECTED) + ts->rstate = X86_RETPOLINE_POSSIBLE; + /* Flush stack on exec */ if (ts->comm != comm && thread->pid_ == thread->tid) { err = __thread_stack__flush(thread, ts); @@ -748,10 +882,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->kernel_start); err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, false, trace_end); + + /* + * A call to the same symbol but not the start of the symbol, + * may be the start of a x86 retpoline. + */ + if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && + from_al->sym == to_al->sym && + to_al->addr != to_al->sym->start) + ts->rstate = X86_RETPOLINE_DETECTED; + } else if (sample->flags & PERF_IP_FLAG_RETURN) { if (!sample->ip || !sample->addr) return 0; + /* x86 retpoline 'return' doesn't match the stack */ + if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && + ts->stack[ts->cnt - 1].ret_addr != sample->addr) + return thread_stack__x86_retpoline(ts, sample, to_al); + err = thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, from_al->sym); if (err) { |