diff options
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r-- | tools/perf/builtin-trace.c | 588 |
1 files changed, 432 insertions, 156 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d7c7d29291fb..6ac51925ea42 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -39,6 +39,7 @@ #include "util/synthetic-events.h" #include "util/evlist.h" #include "util/evswitch.h" +#include "util/hashmap.h" #include "util/mmap.h" #include <subcmd/pager.h> #include <subcmd/exec-cmd.h> @@ -63,9 +64,9 @@ #include "print_binary.h" #include "string2.h" #include "syscalltbl.h" -#include "rb_resort.h" #include "../perf.h" #include "trace_augment.h" +#include "dwarf-regs.h" #include <errno.h> #include <inttypes.h> @@ -86,6 +87,7 @@ #include <linux/ctype.h> #include <perf/mmap.h> +#include <tools/libc_compat.h> #ifdef HAVE_LIBTRACEEVENT #include <event-parse.h> @@ -139,11 +141,19 @@ struct syscall_fmt { bool hexret; }; +enum summary_mode { + SUMMARY__NONE = 0, + SUMMARY__BY_TOTAL, + SUMMARY__BY_THREAD, +}; + struct trace { struct perf_tool tool; - struct syscalltbl *sctbl; struct { - struct syscall *table; + /** Sorted sycall numbers used by the trace. */ + struct syscall **table; + /** Size of table. */ + size_t table_size; struct { struct evsel *sys_enter, *sys_exit, @@ -177,14 +187,25 @@ struct trace { pid_t *entries; struct bpf_map *map; } filter_pids; + /* + * TODO: The map is from an ID (aka system call number) to struct + * syscall_stats. If there is >1 e_machine, such as i386 and x86-64 + * processes, then the stats here will gather wrong the statistics for + * the non EM_HOST system calls. A fix would be to add the e_machine + * into the key, but this would make the code inconsistent with the + * per-thread version. + */ + struct hashmap *syscall_stats; double duration_filter; double runtime_ms; + unsigned long pfmaj, pfmin; struct { u64 vfs_getname, proc_getname; } stats; unsigned int max_stack; unsigned int min_stack; + enum summary_mode summary_mode; int raw_augmented_syscalls_args_size; bool raw_augmented_syscalls; bool fd_path_disabled; @@ -1445,22 +1466,37 @@ static const struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias); } -/* - * is_exit: is this "exit" or "exit_group"? - * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. - * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. - * nonexistent: Just a hole in the syscall table, syscall id not allocated +/** + * struct syscall */ struct syscall { + /** @e_machine: The ELF machine associated with the entry. */ + int e_machine; + /** @id: id value from the tracepoint, the system call number. */ + int id; struct tep_event *tp_format; int nr_args; + /** + * @args_size: sum of the sizes of the syscall arguments, anything + * after that is augmented stuff: pathname for openat, etc. + */ + int args_size; struct { struct bpf_program *sys_enter, *sys_exit; } bpf_prog; + /** @is_exit: is this "exit" or "exit_group"? */ bool is_exit; + /** + * @is_open: is this "open" or "openat"? To associate the fd returned in + * sys_exit with the pathname in sys_enter. + */ bool is_open; + /** + * @nonexistent: Name lookup failed. Just a hole in the syscall table, + * syscall id not allocated. + */ bool nonexistent; bool use_btf; struct tep_format_field *args; @@ -1519,16 +1555,48 @@ struct thread_trace { struct file *table; } files; - struct intlist *syscall_stats; + struct hashmap *syscall_stats; }; -static struct thread_trace *thread_trace__new(void) +static size_t syscall_id_hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool syscall_id_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static struct hashmap *alloc_syscall_stats(void) +{ + return hashmap__new(syscall_id_hash, syscall_id_equal, NULL); +} + +static void delete_syscall_stats(struct hashmap *syscall_stats) +{ + struct hashmap_entry *pos; + size_t bkt; + + if (syscall_stats == NULL) + return; + + hashmap__for_each_entry(syscall_stats, pos, bkt) + zfree(&pos->pvalue); + hashmap__free(syscall_stats); +} + +static struct thread_trace *thread_trace__new(struct trace *trace) { struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); if (ttrace) { ttrace->files.max = -1; - ttrace->syscall_stats = intlist__new(NULL); + if (trace->summary) { + ttrace->syscall_stats = alloc_syscall_stats(); + if (IS_ERR(ttrace->syscall_stats)) + zfree(&ttrace); + } } return ttrace; @@ -1543,14 +1611,14 @@ static void thread_trace__delete(void *pttrace) if (!ttrace) return; - intlist__delete(ttrace->syscall_stats); + delete_syscall_stats(ttrace->syscall_stats); ttrace->syscall_stats = NULL; thread_trace__free_files(ttrace); zfree(&ttrace->entry_str); free(ttrace); } -static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) +static struct thread_trace *thread__trace(struct thread *thread, struct trace *trace) { struct thread_trace *ttrace; @@ -1558,7 +1626,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) goto fail; if (thread__priv(thread) == NULL) - thread__set_priv(thread, thread_trace__new()); + thread__set_priv(thread, thread_trace__new(trace)); if (thread__priv(thread) == NULL) goto fail; @@ -1568,7 +1636,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) return ttrace; fail: - color_fprintf(fp, PERF_COLOR_RED, + color_fprintf(trace->output, PERF_COLOR_RED, "WARNING: not enough memory, dropping samples!\n"); return NULL; } @@ -2066,22 +2134,21 @@ static int syscall__set_arg_fmts(struct syscall *sc) return 0; } -static int trace__read_syscall_info(struct trace *trace, int id) +static int syscall__read_info(struct syscall *sc, struct trace *trace) { char tp_name[128]; - struct syscall *sc; - const char *name = syscalltbl__name(trace->sctbl, id); + const char *name; int err; - if (trace->syscalls.table == NULL) { - trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); - if (trace->syscalls.table == NULL) - return -ENOMEM; - } - sc = trace->syscalls.table + id; if (sc->nonexistent) return -EEXIST; + if (sc->name) { + /* Info already read. */ + return 0; + } + + name = syscalltbl__name(sc->e_machine, sc->id); if (name == NULL) { sc->nonexistent = true; return -EEXIST; @@ -2104,11 +2171,16 @@ static int trace__read_syscall_info(struct trace *trace, int id) */ if (IS_ERR(sc->tp_format)) { sc->nonexistent = true; - return PTR_ERR(sc->tp_format); + err = PTR_ERR(sc->tp_format); + sc->tp_format = NULL; + return err; } - if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) + /* + * The tracepoint format contains __syscall_nr field, so it's one more + * than the actual number of syscall arguments. + */ + if (syscall__alloc_arg_fmts(sc, sc->tp_format->format.nr_fields - 1)) return -ENOMEM; sc->args = sc->tp_format->format.fields; @@ -2176,10 +2248,14 @@ static int trace__validate_ev_qualifier(struct trace *trace) strlist__for_each_entry(pos, trace->ev_qualifier) { const char *sc = pos->s; - int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; + /* + * TODO: Assume more than the validation/warnings are all for + * the same binary type as perf. + */ + int id = syscalltbl__id(EM_HOST, sc), match_next = -1; if (id < 0) { - id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); + id = syscalltbl__strglobmatch_first(EM_HOST, sc, &match_next); if (id >= 0) goto matches; @@ -2199,7 +2275,7 @@ matches: continue; while (1) { - id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); + id = syscalltbl__strglobmatch_next(EM_HOST, sc, &match_next); if (id < 0) break; if (nr_allocated == nr_used) { @@ -2397,13 +2473,92 @@ next_arg: return printed; } +static struct syscall *syscall__new(int e_machine, int id) +{ + struct syscall *sc = zalloc(sizeof(*sc)); + + if (!sc) + return NULL; + + sc->e_machine = e_machine; + sc->id = id; + return sc; +} + +static void syscall__delete(struct syscall *sc) +{ + if (!sc) + return; + + free(sc->arg_fmt); + free(sc); +} + +static int syscall__bsearch_cmp(const void *key, const void *entry) +{ + const struct syscall *a = key, *b = *((const struct syscall **)entry); + + if (a->e_machine != b->e_machine) + return a->e_machine - b->e_machine; + + return a->id - b->id; +} + +static int syscall__cmp(const void *va, const void *vb) +{ + const struct syscall *a = *((const struct syscall **)va); + const struct syscall *b = *((const struct syscall **)vb); + + if (a->e_machine != b->e_machine) + return a->e_machine - b->e_machine; + + return a->id - b->id; +} + +static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, int id) +{ + struct syscall key = { + .e_machine = e_machine, + .id = id, + }; + struct syscall *sc, **tmp; + + if (trace->syscalls.table) { + struct syscall **sc_entry = bsearch(&key, trace->syscalls.table, + trace->syscalls.table_size, + sizeof(trace->syscalls.table[0]), + syscall__bsearch_cmp); + + if (sc_entry) + return *sc_entry; + } + + sc = syscall__new(e_machine, id); + if (!sc) + return NULL; + + tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1, + sizeof(trace->syscalls.table[0])); + if (!tmp) { + syscall__delete(sc); + return NULL; + } + + trace->syscalls.table = tmp; + trace->syscalls.table[trace->syscalls.table_size++] = sc; + qsort(trace->syscalls.table, trace->syscalls.table_size, sizeof(trace->syscalls.table[0]), + syscall__cmp); + return sc; +} + typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel, union perf_event *event, struct perf_sample *sample); -static struct syscall *trace__syscall_info(struct trace *trace, - struct evsel *evsel, int id) +static struct syscall *trace__syscall_info(struct trace *trace, struct evsel *evsel, + int e_machine, int id) { + struct syscall *sc; int err = 0; if (id < 0) { @@ -2428,28 +2583,20 @@ static struct syscall *trace__syscall_info(struct trace *trace, err = -EINVAL; - if (id > trace->sctbl->syscalls.max_id) { - goto out_cant_read; - } - - if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) && - (err = trace__read_syscall_info(trace, id)) != 0) - goto out_cant_read; - - if (trace->syscalls.table && trace->syscalls.table[id].nonexistent) - goto out_cant_read; - - return &trace->syscalls.table[id]; + sc = trace__find_syscall(trace, e_machine, id); + if (sc) + err = syscall__read_info(sc, trace); -out_cant_read: - if (verbose > 0) { + if (err && verbose > 0) { char sbuf[STRERR_BUFSIZE]; - fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf))); - if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL) - fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); + + fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, + str_error_r(-err, sbuf, sizeof(sbuf))); + if (sc && sc->name) + fprintf(trace->output, "(%s)", sc->name); fputs(" information\n", trace->output); } - return NULL; + return err ? NULL : sc; } struct syscall_stats { @@ -2460,24 +2607,26 @@ struct syscall_stats { }; static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace, - int id, struct perf_sample *sample, long err, bool errno_summary) + int id, struct perf_sample *sample, long err, + struct trace *trace) { - struct int_node *inode; - struct syscall_stats *stats; + struct hashmap *syscall_stats = ttrace->syscall_stats; + struct syscall_stats *stats = NULL; u64 duration = 0; - inode = intlist__findnew(ttrace->syscall_stats, id); - if (inode == NULL) - return; + if (trace->summary_mode == SUMMARY__BY_TOTAL) + syscall_stats = trace->syscall_stats; - stats = inode->priv; - if (stats == NULL) { + if (!hashmap__find(syscall_stats, id, &stats)) { stats = zalloc(sizeof(*stats)); if (stats == NULL) return; init_stats(&stats->stats); - inode->priv = stats; + if (hashmap__add(syscall_stats, id, stats) < 0) { + free(stats); + return; + } } if (ttrace->entry_time && sample->time > ttrace->entry_time) @@ -2488,7 +2637,7 @@ static void thread__update_stats(struct thread *thread, struct thread_trace *ttr if (err < 0) { ++stats->nr_failures; - if (!errno_summary) + if (!trace->errno_summary) return; err = -err; @@ -2596,14 +2745,6 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam return NULL; } -static void syscall__exit(struct syscall *sc) -{ - if (!sc) - return; - - zfree(&sc->arg_fmt); -} - static int trace__sys_enter(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2613,16 +2754,17 @@ static int trace__sys_enter(struct trace *trace, struct evsel *evsel, int printed = 0; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; - int augmented_args_size = 0; + int augmented_args_size = 0, e_machine; void *augmented_args = NULL; - struct syscall *sc = trace__syscall_info(trace, evsel, id); + struct syscall *sc; struct thread_trace *ttrace; - if (sc == NULL) - return -1; - thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - ttrace = thread__trace(thread, trace->output); + e_machine = thread__e_machine(thread, trace->host); + sc = trace__syscall_info(trace, evsel, e_machine, id); + if (sc == NULL) + goto out_put; + ttrace = thread__trace(thread, trace); if (ttrace == NULL) goto out_put; @@ -2689,17 +2831,19 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, struct thread_trace *ttrace; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; - struct syscall *sc = trace__syscall_info(trace, evsel, id); + struct syscall *sc; char msg[1024]; void *args, *augmented_args = NULL; - int augmented_args_size; + int augmented_args_size, e_machine; size_t printed = 0; - if (sc == NULL) - return -1; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - ttrace = thread__trace(thread, trace->output); + e_machine = thread__e_machine(thread, trace->host); + sc = trace__syscall_info(trace, evsel, e_machine, id); + if (sc == NULL) + return -1; + ttrace = thread__trace(thread, trace); /* * We need to get ttrace just to make sure it is there when syscall__scnprintf_args() * and the rest of the beautifiers accessing it via struct syscall_arg touches it. @@ -2763,15 +2907,16 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel, bool duration_calculated = false; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0; - int alignment = trace->args_alignment; - struct syscall *sc = trace__syscall_info(trace, evsel, id); + int alignment = trace->args_alignment, e_machine; + struct syscall *sc; struct thread_trace *ttrace; - if (sc == NULL) - return -1; - thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - ttrace = thread__trace(thread, trace->output); + e_machine = thread__e_machine(thread, trace->host); + sc = trace__syscall_info(trace, evsel, e_machine, id); + if (sc == NULL) + goto out_put; + ttrace = thread__trace(thread, trace); if (ttrace == NULL) goto out_put; @@ -2780,7 +2925,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); if (trace->summary) - thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary); + thread__update_stats(thread, ttrace, id, sample, ret, trace); if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) { trace__set_fd_pathname(thread, ret, ttrace->filename.name); @@ -2960,7 +3105,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel, struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - struct thread_trace *ttrace = thread__trace(thread, trace->output); + struct thread_trace *ttrace = thread__trace(thread, trace); if (ttrace == NULL) goto out_dump; @@ -3080,7 +3225,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); } - return printed + fprintf(trace->output, "%.*s", (int)printed, bf); + return fprintf(trace->output, "%.*s", (int)printed, bf); } static int trace__event_handler(struct trace *trace, struct evsel *evsel, @@ -3117,7 +3262,8 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, if (evsel == trace->syscalls.events.bpf_output) { int id = perf_evsel__sc_tp_uint(evsel, id, sample); - struct syscall *sc = trace__syscall_info(trace, evsel, id); + int e_machine = thread ? thread__e_machine(thread, trace->host) : EM_HOST; + struct syscall *sc = trace__syscall_info(trace, evsel, e_machine, id); if (sc) { fprintf(trace->output, "%s(", sc->name); @@ -3214,14 +3360,17 @@ static int trace__pgfault(struct trace *trace, } } - ttrace = thread__trace(thread, trace->output); + ttrace = thread__trace(thread, trace); if (ttrace == NULL) goto out_put; - if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) + if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) { ttrace->pfmaj++; - else + trace->pfmaj++; + } else { ttrace->pfmin++; + trace->pfmin++; + } if (trace->summary_only) goto out; @@ -3380,6 +3529,7 @@ out_free: } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); +static size_t trace__fprintf_total_summary(struct trace *trace, FILE *fp); static bool evlist__add_vfs_getname(struct evlist *evlist) { @@ -3620,9 +3770,9 @@ out_unaugmented: return trace->skel->progs.syscall_unaugmented; } -static void trace__init_syscall_bpf_progs(struct trace *trace, int id) +static void trace__init_syscall_bpf_progs(struct trace *trace, int e_machine, int id) { - struct syscall *sc = trace__syscall_info(trace, NULL, id); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id); if (sc == NULL) return; @@ -3631,22 +3781,22 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id) sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit"); } -static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id) +static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int e_machine, int id) { - struct syscall *sc = trace__syscall_info(trace, NULL, id); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id); return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); } -static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) +static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int e_machine, int id) { - struct syscall *sc = trace__syscall_info(trace, NULL, id); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id); return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); } -static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array) +static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int e_machine, int key, unsigned int *beauty_array) { struct tep_format_field *field; - struct syscall *sc = trace__syscall_info(trace, NULL, key); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, key); const struct btf_type *bt; char *struct_offset, *tmp, name[32]; bool can_augment = false; @@ -3728,7 +3878,8 @@ static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigne return -1; } -static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) +static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, + struct syscall *sc) { struct tep_format_field *field, *candidate_field; /* @@ -3742,13 +3893,13 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace return NULL; try_to_find_pair: - for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { - int id = syscalltbl__id_at_idx(trace->sctbl, i); - struct syscall *pair = trace__syscall_info(trace, NULL, id); + for (int i = 0, num_idx = syscalltbl__num_idx(sc->e_machine); i < num_idx; ++i) { + int id = syscalltbl__id_at_idx(sc->e_machine, i); + struct syscall *pair = trace__syscall_info(trace, NULL, sc->e_machine, id); struct bpf_program *pair_prog; bool is_candidate = false; - if (pair == NULL || pair == sc || + if (pair == NULL || pair->id == sc->id || pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented) continue; @@ -3819,7 +3970,8 @@ try_to_find_pair: goto next_candidate; } - pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name); + pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, + sc->name); return pair_prog; next_candidate: continue; @@ -3828,7 +3980,7 @@ try_to_find_pair: return NULL; } -static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) +static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine) { int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); @@ -3836,27 +3988,27 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) int err = 0; unsigned int beauty_array[6]; - for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { - int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i); + for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) { + int prog_fd, key = syscalltbl__id_at_idx(e_machine, i); if (!trace__syscall_enabled(trace, key)) continue; - trace__init_syscall_bpf_progs(trace, key); + trace__init_syscall_bpf_progs(trace, e_machine, key); // It'll get at least the "!raw_syscalls:unaugmented" - prog_fd = trace__bpf_prog_sys_enter_fd(trace, key); + prog_fd = trace__bpf_prog_sys_enter_fd(trace, e_machine, key); err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY); if (err) break; - prog_fd = trace__bpf_prog_sys_exit_fd(trace, key); + prog_fd = trace__bpf_prog_sys_exit_fd(trace, e_machine, key); err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY); if (err) break; /* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */ memset(beauty_array, 0, sizeof(beauty_array)); - err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array); + err = trace__bpf_sys_enter_beauty_map(trace, e_machine, key, (unsigned int *)beauty_array); if (err) continue; err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY); @@ -3892,9 +4044,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) * first and second arg (this one on the raw_syscalls:sys_exit prog * array tail call, then that one will be used. */ - for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { - int key = syscalltbl__id_at_idx(trace->sctbl, i); - struct syscall *sc = trace__syscall_info(trace, NULL, key); + for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) { + int key = syscalltbl__id_at_idx(e_machine, i); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, key); struct bpf_program *pair_prog; int prog_fd; @@ -4015,13 +4167,16 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event) { struct evlist *evlist = trace->evlist; struct perf_sample sample; - int err = evlist__parse_sample(evlist, event, &sample); + int err; + perf_sample__init(&sample, /*all=*/false); + err = evlist__parse_sample(evlist, event, &sample); if (err) fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); else trace__handle_event(trace, event, &sample); + perf_sample__exit(&sample); return 0; } @@ -4292,6 +4447,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } + if (trace->summary_mode == SUMMARY__BY_TOTAL) { + trace->syscall_stats = alloc_syscall_stats(); + if (trace->syscall_stats == NULL) + goto out_delete_evlist; + } + evlist__config(evlist, &trace->opts, &callchain_param); if (forks) { @@ -4315,10 +4476,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv) * CPU the bpf-output event's file descriptor. */ perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) { + int mycpu = cpu.cpu; + bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__, - &cpu.cpu, sizeof(int), + &mycpu, sizeof(mycpu), xyarray__entry(trace->syscalls.events.bpf_output->core.fd, - cpu.cpu, 0), + mycpu, 0), sizeof(__u32), BPF_ANY); } } @@ -4331,8 +4494,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_error_mem; #ifdef HAVE_BPF_SKEL - if (trace->skel && trace->skel->progs.sys_enter) - trace__init_syscalls_bpf_prog_array_maps(trace); + if (trace->skel && trace->skel->progs.sys_enter) { + /* + * TODO: Initialize for all host binary machine types, not just + * those matching the perf binary. + */ + trace__init_syscalls_bpf_prog_array_maps(trace, EM_HOST); + } #endif if (trace->ev_qualifier_ids.nr > 0) { @@ -4357,7 +4525,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) * So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is * not in use. */ - trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close")); + /* TODO: support for more than just perf binary machine type close. */ + trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(EM_HOST, "close")); err = trace__expand_filters(trace, &evsel); if (err) @@ -4452,8 +4621,12 @@ out_disable: ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL); if (!err) { - if (trace->summary) - trace__fprintf_thread_summary(trace, trace->output); + if (trace->summary) { + if (trace->summary_mode == SUMMARY__BY_TOTAL) + trace__fprintf_total_summary(trace, trace->output); + else + trace__fprintf_thread_summary(trace, trace->output); + } if (trace->show_tool_stats) { fprintf(trace->output, "Stats:\n " @@ -4465,6 +4638,7 @@ out_disable: } out_delete_evlist: + delete_syscall_stats(trace->syscall_stats); trace__symbols__exit(trace); evlist__free_syscall_tp_fields(evlist); evlist__delete(evlist); @@ -4524,6 +4698,7 @@ static int trace__replay(struct trace *trace) struct evsel *evsel; int err = -1; + perf_tool__init(&trace->tool, /*ordered_events=*/true); trace->tool.sample = trace__process_sample; trace->tool.mmap = perf_event__process_mmap; trace->tool.mmap2 = perf_event__process_mmap2; @@ -4592,6 +4767,12 @@ static int trace__replay(struct trace *trace) evsel->handler = trace__pgfault; } + if (trace->summary_mode == SUMMARY__BY_TOTAL) { + trace->syscall_stats = alloc_syscall_stats(); + if (trace->syscall_stats == NULL) + goto out; + } + setup_pager(); err = perf_session__process_events(session); @@ -4602,12 +4783,13 @@ static int trace__replay(struct trace *trace) trace__fprintf_thread_summary(trace, trace->output); out: + delete_syscall_stats(trace->syscall_stats); perf_session__delete(session); return err; } -static size_t trace__fprintf_threads_header(FILE *fp) +static size_t trace__fprintf_summary_header(FILE *fp) { size_t printed; @@ -4616,29 +4798,56 @@ static size_t trace__fprintf_threads_header(FILE *fp) return printed; } -DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, +struct syscall_entry { struct syscall_stats *stats; double msecs; int syscall; -) +}; + +static int entry_cmp(const void *e1, const void *e2) { - struct int_node *source = rb_entry(nd, struct int_node, rb_node); - struct syscall_stats *stats = source->priv; + const struct syscall_entry *entry1 = e1; + const struct syscall_entry *entry2 = e2; - entry->syscall = source->i; - entry->stats = stats; - entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0; + return entry1->msecs > entry2->msecs ? -1 : 1; } -static size_t thread__dump_stats(struct thread_trace *ttrace, - struct trace *trace, FILE *fp) +static struct syscall_entry *syscall__sort_stats(struct hashmap *syscall_stats) +{ + struct syscall_entry *entry; + struct hashmap_entry *pos; + unsigned bkt, i, nr; + + nr = syscall_stats->sz; + entry = malloc(nr * sizeof(*entry)); + if (entry == NULL) + return NULL; + + i = 0; + hashmap__for_each_entry(syscall_stats, pos, bkt) { + struct syscall_stats *ss = pos->pvalue; + struct stats *st = &ss->stats; + + entry[i].stats = ss; + entry[i].msecs = (u64)st->n * (avg_stats(st) / NSEC_PER_MSEC); + entry[i].syscall = pos->key; + i++; + } + assert(i == nr); + + qsort(entry, nr, sizeof(*entry), entry_cmp); + return entry; +} + +static size_t syscall__dump_stats(struct trace *trace, int e_machine, FILE *fp, + struct hashmap *syscall_stats) { size_t printed = 0; struct syscall *sc; - struct rb_node *nd; - DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); + struct syscall_entry *entries; - if (syscall_stats == NULL) + entries = syscall__sort_stats(syscall_stats); + if (entries == NULL) return 0; printed += fprintf(fp, "\n"); @@ -4647,8 +4856,10 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); - resort_rb__for_each_entry(nd, syscall_stats) { - struct syscall_stats *stats = syscall_stats_entry->stats; + for (size_t i = 0; i < syscall_stats->sz; i++) { + struct syscall_entry *entry = &entries[i]; + struct syscall_stats *stats = entry->stats; + if (stats) { double min = (double)(stats->stats.min) / NSEC_PER_MSEC; double max = (double)(stats->stats.max) / NSEC_PER_MSEC; @@ -4659,10 +4870,13 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0; avg /= NSEC_PER_MSEC; - sc = &trace->syscalls.table[syscall_stats_entry->syscall]; + sc = trace__syscall_info(trace, /*evsel=*/NULL, e_machine, entry->syscall); + if (!sc) + continue; + printed += fprintf(fp, " %-15s", sc->name); printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f", - n, stats->nr_failures, syscall_stats_entry->msecs, min, avg); + n, stats->nr_failures, entry->msecs, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); if (trace->errno_summary && stats->nr_failures) { @@ -4676,16 +4890,28 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, } } - resort_rb__delete(syscall_stats); + free(entries); printed += fprintf(fp, "\n\n"); return printed; } +static size_t thread__dump_stats(struct thread_trace *ttrace, + struct trace *trace, int e_machine, FILE *fp) +{ + return syscall__dump_stats(trace, e_machine, fp, ttrace->syscall_stats); +} + +static size_t system__dump_stats(struct trace *trace, int e_machine, FILE *fp) +{ + return syscall__dump_stats(trace, e_machine, fp, trace->syscall_stats); +} + static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) { size_t printed = 0; struct thread_trace *ttrace = thread__priv(thread); + int e_machine = thread__e_machine(thread, trace->host); double ratio; if (ttrace == NULL) @@ -4705,7 +4931,7 @@ static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trac else if (fputc('\n', fp) != EOF) ++printed; - printed += thread__dump_stats(ttrace, trace, fp); + printed += thread__dump_stats(ttrace, trace, e_machine, fp); return printed; } @@ -4735,7 +4961,7 @@ static int trace_nr_events_cmp(void *priv __maybe_unused, static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - size_t printed = trace__fprintf_threads_header(fp); + size_t printed = trace__fprintf_summary_header(fp); LIST_HEAD(threads); if (machine__thread_list(trace->host, &threads) == 0) { @@ -4750,6 +4976,28 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) return printed; } +static size_t trace__fprintf_total_summary(struct trace *trace, FILE *fp) +{ + size_t printed = trace__fprintf_summary_header(fp); + + printed += fprintf(fp, " total, "); + printed += fprintf(fp, "%lu events", trace->nr_events); + + if (trace->pfmaj) + printed += fprintf(fp, ", %lu majfaults", trace->pfmaj); + if (trace->pfmin) + printed += fprintf(fp, ", %lu minfaults", trace->pfmin); + if (trace->sched) + printed += fprintf(fp, ", %.3f msec\n", trace->runtime_ms); + else if (fputc('\n', fp) != EOF) + ++printed; + + /* TODO: get all system e_machines. */ + printed += system__dump_stats(trace, EM_HOST, fp); + + return printed; +} + static int trace__set_duration(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -4937,8 +5185,9 @@ static int trace__parse_events_option(const struct option *opt, const char *str, *sep = '\0'; list = 0; - if (syscalltbl__id(trace->sctbl, s) >= 0 || - syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { + /* TODO: support for more than just perf binary machine type syscalls. */ + if (syscalltbl__id(EM_HOST, s) >= 0 || + syscalltbl__strglobmatch_first(EM_HOST, s, &idx) >= 0) { list = 1; goto do_concat; } @@ -5021,6 +5270,23 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u return 0; } +static int trace__parse_summary_mode(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct trace *trace = opt->value; + + if (!strcmp(str, "thread")) { + trace->summary_mode = SUMMARY__BY_THREAD; + } else if (!strcmp(str, "total")) { + trace->summary_mode = SUMMARY__BY_TOTAL; + } else { + pr_err("Unknown summary mode: %s\n", str); + return -1; + } + + return 0; +} + static int trace__config(const char *var, const char *value, void *arg) { struct trace *trace = arg; @@ -5067,17 +5333,20 @@ out: static void trace__exit(struct trace *trace) { - int i; - strlist__delete(trace->ev_qualifier); zfree(&trace->ev_qualifier_ids.entries); if (trace->syscalls.table) { - for (i = 0; i <= trace->sctbl->syscalls.max_id; i++) - syscall__exit(&trace->syscalls.table[i]); + for (size_t i = 0; i < trace->syscalls.table_size; i++) + syscall__delete(trace->syscalls.table[i]); zfree(&trace->syscalls.table); } - syscalltbl__delete(trace->sctbl); zfree(&trace->perfconfig_events); + evlist__delete(trace->evlist); + trace->evlist = NULL; +#ifdef HAVE_LIBBPF_SUPPORT + btf__free(trace->btf); + trace->btf = NULL; +#endif } #ifdef HAVE_BPF_SKEL @@ -5168,6 +5437,9 @@ int cmd_trace(int argc, const char **argv) "Show all syscalls and summary with statistics"), OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary, "Show errno stats per syscall, use with -s or -S"), + OPT_CALLBACK(0, "summary-mode", &trace, "mode", + "How to show summary: select thread (default) or total", + trace__parse_summary_mode), OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", "Trace pagefaults", parse_pagefaults, "maj"), OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), @@ -5222,9 +5494,8 @@ int cmd_trace(int argc, const char **argv) sigaction(SIGCHLD, &sigchld_act, NULL); trace.evlist = evlist__new(); - trace.sctbl = syscalltbl__new(); - if (trace.evlist == NULL || trace.sctbl == NULL) { + if (trace.evlist == NULL) { pr_err("Not enough memory to run!\n"); err = -ENOMEM; goto out; @@ -5452,8 +5723,10 @@ init_augmented_syscall_tp: } } - if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) - return trace__record(&trace, argc-1, &argv[1]); + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) { + err = trace__record(&trace, argc-1, &argv[1]); + goto out; + } /* Using just --errno-summary will trigger --summary */ if (trace.errno_summary && !trace.summary && !trace.summary_only) @@ -5464,8 +5737,11 @@ init_augmented_syscall_tp: trace.summary = trace.summary_only; /* Keep exited threads, otherwise information might be lost for summary */ - if (trace.summary) + if (trace.summary) { symbol_conf.keep_exited_threads = true; + if (trace.summary_mode == SUMMARY__NONE) + trace.summary_mode = SUMMARY__BY_THREAD; + } if (output_name != NULL) { err = trace__open_output(&trace, output_name); |