diff options
Diffstat (limited to 'tools/perf')
56 files changed, 2695 insertions, 496 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index c71b0f36d9e8..d460049cae8e 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -184,9 +184,10 @@ following filters are defined: - in_tx: only when the target is in a hardware transaction - no_tx: only when the target is not in a hardware transaction - abort_tx: only when the target is a hardware transaction abort + - cond: conditional branches + -The option requires at least one branch type among any, any_call, any_ret, ind_call. +The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. The privilege levels may be omitted, in which case, the privilege levels of the associated event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege levels are subject to permissions. When sampling on multiple events, branch stack sampling diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index a1b5185402d5..d2b59af62bc0 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -111,12 +111,28 @@ OPTIONS --fields=:: Specify output field - multiple keys can be specified in CSV format. Following fields are available: - overhead, overhead_sys, overhead_us, sample and period. + overhead, overhead_sys, overhead_us, overhead_children, sample and period. Also it can contain any sort key(s). By default, every sort keys not specified in -F will be appended automatically. + If --mem-mode option is used, following sort keys are also available + (incompatible with --branch-stack): + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. + + - symbol_daddr: name of data symbol being executed on at the time of sample + - dso_daddr: name of library or module containing the data being executed + on at the time of sample + - locked: whether the bus was locked at the time of sample + - tlb: type of tlb access for the data at the time of sample + - mem: type of memory access for the data at the time of sample + - snoop: type of snoop (if any) for the data at the time of sample + - dcacheline: the cacheline the data address is on at the time of sample + + And default sort keys are changed to local_weight, mem, sym, dso, + symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. + -p:: --parent=<regex>:: A regex filter to identify parent. The parent is a caller of this @@ -163,6 +179,11 @@ OPTIONS Default: fractal,0.5,callee,function. +--children:: + Accumulate callchain of children to parent entry so that then can + show up in the output. The output will have a new "Children" column + and will be sorted on the data. It requires callchains are recorded. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. This is a trade-off @@ -255,6 +276,13 @@ OPTIONS Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. +--mem-mode:: + Use the data addresses of samples in addition to instruction addresses + to build the histograms. To generate meaningful output, the perf.data + file must have been obtained using perf record -d -W and using a + special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See + 'perf mem' for simpler access. + --percent-limit:: Do not show entries which have an overhead under that percent. (Default: 0). diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index bc5990c33dc0..5e0f986dff38 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -43,27 +43,6 @@ TIMECHART OPTIONS --symfs=<directory>:: Look for files with symbols relative to this directory. - -EXAMPLES --------- - -$ perf timechart record git pull - - [ perf record: Woken up 13 times to write data ] - [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ] - -$ perf timechart - - Written 10.2 seconds of trace to output.svg. - -Record system-wide timechart: - - $ perf timechart record - - then generate timechart and highlight 'gcc' tasks: - - $ perf timechart --highlight gcc - -n:: --proc-num:: Print task info for at least given number of tasks. @@ -88,6 +67,26 @@ RECORD OPTIONS --callchain:: Do call-graph (stack chain/backtrace) recording +EXAMPLES +-------- + +$ perf timechart record git pull + + [ perf record: Woken up 13 times to write data ] + [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ] + +$ perf timechart + + Written 10.2 seconds of trace to output.svg. + +Record system-wide timechart: + + $ perf timechart record + + then generate timechart and highlight 'gcc' tasks: + + $ perf timechart --highlight gcc + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index dcfa54c851e9..180ae02137a5 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -119,7 +119,7 @@ Default is to monitor all CPUS. --fields=:: Specify output field - multiple keys can be specified in CSV format. Following fields are available: - overhead, overhead_sys, overhead_us, sample and period. + overhead, overhead_sys, overhead_us, overhead_children, sample and period. Also it can contain any sort key(s). By default, every sort keys not specified in --field will be appended @@ -161,6 +161,12 @@ Default is to monitor all CPUS. Setup and enable call-graph (stack chain/backtrace) recording, implies -g. +--children:: + Accumulate callchain of children to parent entry so that then can + show up in the output. The output will have a new "Children" column + and will be sorted on the data. It requires -g/--call-graph option + enabled. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. This is a trade-off diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 02f0a4dd1a80..9670a16fa577 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o LIB_OBJS += $(OUTPUT)tests/hists_link.o LIB_OBJS += $(OUTPUT)tests/hists_filter.o LIB_OBJS += $(OUTPUT)tests/hists_output.o +LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o LIB_OBJS += $(OUTPUT)tests/python-use.o LIB_OBJS += $(OUTPUT)tests/bp_signal.o LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o @@ -788,8 +789,8 @@ help: @echo '' @echo 'Perf install targets:' @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed' - @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular' - @echo ' path like make prefix=/usr/local install install-doc' + @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular' + @echo ' path like "make prefix=/usr/local install install-doc"' @echo ' install - install compiled binaries' @echo ' install-doc - install *all* documentation' @echo ' install-man - install manpage documentation' @@ -814,17 +815,20 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html $(DOC_TARGETS): $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) +TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol +TAG_FILES= ../../include/uapi/linux/perf_event.h + TAGS: - $(RM) TAGS - $(FIND) . -name '*.[hcS]' -print | xargs etags -a + $(QUIET_GEN)$(RM) TAGS; \ + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES) tags: - $(RM) tags - $(FIND) . -name '*.[hcS]' -print | xargs ctags -a + $(QUIET_GEN)$(RM) tags; \ + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES) cscope: - $(RM) cscope* - $(FIND) . -name '*.[hcS]' -print | xargs cscope -b + $(QUIET_GEN)$(RM) cscope*; \ + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) ### Detect prefix changes TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index d30d2c2e2a7a..1ec429fef2be 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return 0; } - he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0); + he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0, + true); if (he == NULL) return -ENOMEM; ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + hists__inc_nr_samples(&evsel->hists, true); return ret; } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 8bff543acaab..9a5a035cb426 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists, u64 weight, u64 transaction) { if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight, - transaction) != NULL) + transaction, true) != NULL) return 0; return -ENOMEM; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6a3af0013d68..16c7c11ad06e 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -72,7 +72,7 @@ static int perf_event__repipe_attr(struct perf_tool *tool, if (ret) return ret; - if (&inject->output.is_pipe) + if (!inject->output.is_pipe) return 0; return perf_event__repipe_synth(tool, event); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index cdcd4eb3a57d..c63fa2925075 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -288,6 +288,13 @@ static void cleanup_params(void) memset(¶ms, 0, sizeof(params)); } +static void pr_err_with_code(const char *msg, int err) +{ + pr_err("%s", msg); + pr_debug(" Reason: %s (Code: %d)", strerror(-err), err); + pr_err("\n"); +} + static int __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) { @@ -379,7 +386,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) } ret = parse_probe_event_argv(argc, argv); if (ret < 0) { - pr_err(" Error: Parse Error. (%d)\n", ret); + pr_err_with_code(" Error: Command Parse Error.", ret); return ret; } } @@ -419,8 +426,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) } ret = show_perf_probe_events(); if (ret < 0) - pr_err(" Error: Failed to show event list. (%d)\n", - ret); + pr_err_with_code(" Error: Failed to show event list.", ret); return ret; } if (params.show_funcs) { @@ -445,8 +451,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) strfilter__delete(params.filter); params.filter = NULL; if (ret < 0) - pr_err(" Error: Failed to show functions." - " (%d)\n", ret); + pr_err_with_code(" Error: Failed to show functions.", ret); return ret; } @@ -464,7 +469,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) ret = show_line_range(¶ms.line_range, params.target); if (ret < 0) - pr_err(" Error: Failed to show lines. (%d)\n", ret); + pr_err_with_code(" Error: Failed to show lines.", ret); return ret; } if (params.show_vars) { @@ -485,7 +490,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) strfilter__delete(params.filter); params.filter = NULL; if (ret < 0) - pr_err(" Error: Failed to show vars. (%d)\n", ret); + pr_err_with_code(" Error: Failed to show vars.", ret); return ret; } #endif @@ -493,7 +498,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) if (params.dellist) { ret = del_perf_probe_events(params.dellist); if (ret < 0) { - pr_err(" Error: Failed to delete events. (%d)\n", ret); + pr_err_with_code(" Error: Failed to delete events.", ret); return ret; } } @@ -504,7 +509,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) params.target, params.force_add); if (ret < 0) { - pr_err(" Error: Failed to add events. (%d)\n", ret); + pr_err_with_code(" Error: Failed to add events.", ret); return ret; } } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e4c85b8f46c2..378b85b731a7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -454,7 +454,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (done) break; err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1); - if (err < 0 && errno == EINTR) + /* + * Propagate error, only if there's any. Ignore positive + * number of returned events and interrupt error. + */ + if (err > 0 || (err < 0 && errno == EINTR)) err = 0; waking++; } @@ -544,6 +548,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), + BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_END }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bc0eec1ce4be..21d830bafff3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb) rep->min_percent = strtof(value, NULL); return 0; } + if (!strcmp(var, "report.children")) { + symbol_conf.cumulate_callchain = perf_config_bool(var, value); + return 0; + } return perf_default_config(var, value, cb); } @@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he) */ if (he->stat.nr_events == 1) rep->nr_entries++; - - /* - * Only counts number of samples at this stage as it's more - * natural to do it here and non-sample events are also - * counted in perf_session_deliver_event(). The dump_trace - * requires this info is ready before going to the output tree. - */ - hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - he->hists->stats.nr_non_filtered_samples++; } -static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al, - struct perf_sample *sample, struct perf_evsel *evsel) +static int hist_iter__report_callback(struct hist_entry_iter *iter, + struct addr_location *al, bool single, + void *arg) { - struct symbol *parent = NULL; - struct hist_entry *he; - struct mem_info *mi, *mx; - uint64_t cost; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - - if (err) - return err; + int err = 0; + struct report *rep = arg; + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + struct mem_info *mi; + struct branch_info *bi; - mi = sample__resolve_mem(sample, al); - if (!mi) - return -ENOMEM; + report__inc_stats(rep, he); - if (rep->hide_unresolved && !al->sym) + if (!ui__has_annotation()) return 0; - cost = sample->weight; - if (!cost) - cost = 1; - - /* - * must pass period=weight in order to get the correct - * sorting from hists__collapse_resort() which is solely - * based on periods. We want sorting be done on nr_events * weight - * and this is indirectly achieved by passing period=weight here - * and the he_stat__add_period() function. - */ - he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi, - cost, cost, 0); - if (!he) - return -ENOMEM; - - if (ui__has_annotation()) { - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - if (err) - goto out; - - mx = he->mem_info; - err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); + if (sort__mode == SORT_MODE__BRANCH) { + bi = he->branch_info; + err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); if (err) goto out; - } - - report__inc_stats(rep, he); - - err = hist_entry__append_callchain(he, sample); -out: - return err; -} - -static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al, - struct perf_sample *sample, struct perf_evsel *evsel) -{ - struct symbol *parent = NULL; - unsigned i; - struct hist_entry *he; - struct branch_info *bi, *bx; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - if (err) - return err; - - bi = sample__resolve_bstack(sample, al); - if (!bi) - return -ENOMEM; - - for (i = 0; i < sample->branch_stack->nr; i++) { - if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) - continue; + err = addr_map_symbol__inc_samples(&bi->to, evsel->idx); - err = -ENOMEM; - - /* overwrite the 'al' to branch-to info */ - al->map = bi[i].to.map; - al->sym = bi[i].to.sym; - al->addr = bi[i].to.addr; - /* - * The report shows the percentage of total branches captured - * and not events sampled. Thus we use a pseudo period of 1. - */ - he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL, - 1, 1, 0); - if (he) { - if (ui__has_annotation()) { - bx = he->branch_info; - err = addr_map_symbol__inc_samples(&bx->from, - evsel->idx); - if (err) - goto out; - - err = addr_map_symbol__inc_samples(&bx->to, - evsel->idx); - if (err) - goto out; - } - report__inc_stats(rep, he); - } else + } else if (rep->mem_mode) { + mi = he->mem_info; + err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx); + if (err) goto out; - } - err = 0; -out: - free(bi); - return err; -} - -static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel, - struct addr_location *al, struct perf_sample *sample) -{ - struct symbol *parent = NULL; - struct hist_entry *he; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - - if (err) - return err; - he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL, - sample->period, sample->weight, - sample->transaction); - if (he == NULL) - return -ENOMEM; - - err = hist_entry__append_callchain(he, sample); - if (err) - goto out; - - if (ui__has_annotation()) err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - report__inc_stats(rep, he); + } else if (symbol_conf.cumulate_callchain) { + if (single) + err = hist_entry__inc_addr_samples(he, evsel->idx, + al->addr); + } else { + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + } out: return err; } - static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool, { struct report *rep = container_of(tool, struct report, tool); struct addr_location al; + struct hist_entry_iter iter = { + .hide_unresolved = rep->hide_unresolved, + .add_entry_cb = hist_iter__report_callback, + }; int ret; if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { @@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (sort__mode == SORT_MODE__BRANCH) { - ret = report__add_branch_hist_entry(rep, &al, sample, evsel); - if (ret < 0) - pr_debug("problem adding lbr entry, skipping event\n"); - } else if (rep->mem_mode == 1) { - ret = report__add_mem_hist_entry(rep, &al, sample, evsel); - if (ret < 0) - pr_debug("problem adding mem entry, skipping event\n"); - } else { - if (al.map != NULL) - al.map->dso->hit = 1; + if (sort__mode == SORT_MODE__BRANCH) + iter.ops = &hist_iter_branch; + else if (rep->mem_mode) + iter.ops = &hist_iter_mem; + else if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; + else + iter.ops = &hist_iter_normal; + + if (al.map != NULL) + al.map->dso->hit = 1; + + ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack, + rep); + if (ret < 0) + pr_debug("problem adding hist entry, skipping event\n"); - ret = report__add_hist_entry(rep, evsel, &al, sample); - if (ret < 0) - pr_debug("problem incrementing symbol period, skipping event\n"); - } return ret; } @@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep) } } + if (symbol_conf.cumulate_callchain) { + /* Silently ignore if callchain is missing */ + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + symbol_conf.cumulate_callchain = false; + perf_hpp__cancel_cumulate(); + } + } + if (sort__mode == SORT_MODE__BRANCH) { if (!is_pipe && !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { @@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), + OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, + "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &report.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " @@ -804,8 +719,10 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - if (branch_mode == -1 && has_br_stack) + if (branch_mode == -1 && has_br_stack) { sort__mode = SORT_MODE__BRANCH; + symbol_conf.cumulate_callchain = false; + } if (report.mem_mode) { if (sort__mode == SORT_MODE__BRANCH) { @@ -813,6 +730,7 @@ repeat: goto error; } sort__mode = SORT_MODE__MEMORY; + symbol_conf.cumulate_callchain = false; } if (setup_sorting() < 0) { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d7176830b9b2..c38d06c04775 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ int err = 0; evsel->hists.stats.total_period += sample->period; - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + hists__inc_nr_samples(&evsel->hists, true); if (evsel->handler != NULL) { tracepoint_handler f = evsel->handler; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5b389ce4cd15..377971dc89a3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top, pthread_mutex_unlock(¬es->lock); + /* + * This function is now called with he->hists->lock held. + * Release it before going to sleep. + */ + pthread_mutex_unlock(&he->hists->lock); + if (err == -ERANGE && !he->ms.map->erange_warned) ui__warn_map_erange(he->ms.map, sym, ip); else if (err == -ENOMEM) { @@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top, sym->name); sleep(1); } + + pthread_mutex_lock(&he->hists->lock); } static void perf_top__show_details(struct perf_top *top) @@ -238,27 +246,6 @@ out_unlock: pthread_mutex_unlock(¬es->lock); } -static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, - struct addr_location *al, - struct perf_sample *sample) -{ - struct hist_entry *he; - - pthread_mutex_lock(&evsel->hists.lock); - he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, - sample->period, sample->weight, - sample->transaction); - pthread_mutex_unlock(&evsel->hists.lock); - if (he == NULL) - return NULL; - - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - evsel->hists.stats.nr_non_filtered_samples++; - - return he; -} - static void perf_top__print_sym_table(struct perf_top *top) { char bf[160]; @@ -662,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym) return 0; } +static int hist_iter__top_callback(struct hist_entry_iter *iter, + struct addr_location *al, bool single, + void *arg) +{ + struct perf_top *top = arg; + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + + if (sort__has_sym && single) { + u64 ip = al->addr; + + if (al->map) + ip = al->map->unmap_ip(al->map, ip); + + perf_top__record_precise_ip(top, he, evsel->idx, ip); + } + + return 0; +} + static void perf_event__process_sample(struct perf_tool *tool, const union perf_event *event, struct perf_evsel *evsel, @@ -669,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool, struct machine *machine) { struct perf_top *top = container_of(tool, struct perf_top, tool); - struct symbol *parent = NULL; - u64 ip = sample->ip; struct addr_location al; int err; @@ -745,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool, } if (al.sym == NULL || !al.sym->ignore) { - struct hist_entry *he; + struct hist_entry_iter iter = { + .add_entry_cb = hist_iter__top_callback, + }; - err = sample__resolve_callchain(sample, &parent, evsel, &al, - top->max_stack); - if (err) - return; + if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; + else + iter.ops = &hist_iter_normal; - he = perf_evsel__add_hist_entry(evsel, &al, sample); - if (he == NULL) { - pr_err("Problem incrementing symbol period, skipping event\n"); - return; - } + pthread_mutex_lock(&evsel->hists.lock); - err = hist_entry__append_callchain(he, sample); - if (err) - return; + err = hist_entry_iter__add(&iter, &al, evsel, sample, + top->max_stack, top); + if (err < 0) + pr_err("Problem incrementing symbol period, skipping event\n"); - if (sort__has_sym) - perf_top__record_precise_ip(top, he, evsel->idx, ip); + pthread_mutex_unlock(&evsel->hists.lock); } return; @@ -1001,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb) if (!strcmp(var, "top.call-graph")) return record_parse_callchain(value, &top->record_opts); + if (!strcmp(var, "top.children")) { + symbol_conf.cumulate_callchain = perf_config_bool(var, value); + return 0; + } return perf_default_config(var, value, cb); } @@ -1095,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK(0, "call-graph", &top.record_opts, "mode[,dump_size]", record_callchain_help, &parse_callchain_opt), + OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, + "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &top.max_stack, "Set the maximum stack depth when parsing the callchain. " "Default: " __stringify(PERF_MAX_STACK_DEPTH)), @@ -1200,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) top.sym_evsel = perf_evlist__first(top.evlist); + if (!symbol_conf.use_callchain) { + symbol_conf.cumulate_callchain = false; + perf_hpp__cancel_cumulate(); + } + symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 729bbdf5cec7..f30ac5e5d271 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -299,7 +299,11 @@ else NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 else - msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); + ifneq ($(filter s% -static%,$(LDFLAGS),),) + msg := $(error No static glibc found, please install glibc-static); + else + msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); + endif endif else ifndef NO_LIBDW_DWARF_UNWIND @@ -447,6 +451,7 @@ else ifneq ($(feature-libperl), 1) CFLAGS += -DNO_LIBPERL NO_LIBPERL := 1 + msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed); else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) @@ -599,7 +604,7 @@ endif # Make the path relative to DESTDIR, not to prefix ifndef DESTDIR -prefix = $(HOME) +prefix ?= $(HOME) endif bindir_relative = bin bindir = $(prefix)/$(bindir_relative) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 431798a4110d..95c58fc15284 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -458,6 +458,7 @@ int main(int argc, const char **argv) /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); + cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); cmd = perf_extract_argv0_path(argv[0]); if (!cmd) @@ -481,14 +482,18 @@ int main(int argc, const char **argv) fprintf(stderr, "cannot handle %s internally", cmd); goto out; } -#ifdef HAVE_LIBAUDIT_SUPPORT if (!prefixcmp(cmd, "trace")) { +#ifdef HAVE_LIBAUDIT_SUPPORT set_buildid_dir(); setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv, NULL); - } +#else + fprintf(stderr, + "trace command not available: missing audit-libs devel package at build time.\n"); + goto out; #endif + } /* Look for flags.. */ argv++; argc--; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 831f52cae197..6f8b01bc6033 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -3,6 +3,8 @@ * * Builtin regression testing command: ever growing number of sanity tests */ +#include <unistd.h> +#include <string.h> #include "builtin.h" #include "intlist.h" #include "tests.h" @@ -50,10 +52,18 @@ static struct test { .func = test__pmu, }, { - .desc = "Test dso data interface", + .desc = "Test dso data read", .func = test__dso_data, }, { + .desc = "Test dso data cache", + .func = test__dso_data_cache, + }, + { + .desc = "Test dso data reopen", + .func = test__dso_data_reopen, + }, + { .desc = "roundtrip evsel->name check", .func = test__perf_evsel__roundtrip_name_test, }, @@ -140,6 +150,10 @@ static struct test { .func = test__hists_output, }, { + .desc = "Test cumulation of child hist entries", + .func = test__hists_cumulate, + }, + { .func = NULL, }, }; @@ -168,6 +182,34 @@ static bool perf_test__matches(int curr, int argc, const char *argv[]) return false; } +static int run_test(struct test *test) +{ + int status, err = -1, child = fork(); + + if (child < 0) { + pr_err("failed to fork test: %s\n", strerror(errno)); + return -1; + } + + if (!child) { + pr_debug("test child forked, pid %d\n", getpid()); + err = test->func(); + exit(err); + } + + wait(&status); + + if (WIFEXITED(status)) { + err = WEXITSTATUS(status); + pr_debug("test child finished with %d\n", err); + } else if (WIFSIGNALED(status)) { + err = -1; + pr_debug("test child interrupted\n"); + } + + return err; +} + static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { int i = 0; @@ -196,7 +238,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) } pr_debug("\n--- start ---\n"); - err = tests[curr].func(); + err = run_test(&tests[curr]); pr_debug("---- end ----\n%s:", tests[curr].desc); switch (err) { diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 3e6cb171e3d3..630808cd7cc2 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -1,22 +1,27 @@ -#include "util.h" - #include <stdlib.h> #include <linux/types.h> #include <sys/stat.h> #include <fcntl.h> #include <string.h> - +#include <sys/time.h> +#include <sys/resource.h> +#include <api/fs/fs.h> +#include "util.h" #include "machine.h" #include "symbol.h" #include "tests.h" static char *test_file(int size) { - static char buf_templ[] = "/tmp/test-XXXXXX"; +#define TEMPL "/tmp/perf-test-XXXXXX" + static char buf_templ[sizeof(TEMPL)]; char *templ = buf_templ; int fd, i; unsigned char *buf; + strcpy(buf_templ, TEMPL); +#undef TEMPL + fd = mkstemp(templ); if (fd < 0) { perror("mkstemp failed"); @@ -150,3 +155,204 @@ int test__dso_data(void) unlink(file); return 0; } + +static long open_files_cnt(void) +{ + char path[PATH_MAX]; + struct dirent *dent; + DIR *dir; + long nr = 0; + + scnprintf(path, PATH_MAX, "%s/self/fd", procfs__mountpoint()); + pr_debug("fd path: %s\n", path); + + dir = opendir(path); + TEST_ASSERT_VAL("failed to open fd directory", dir); + + while ((dent = readdir(dir)) != NULL) { + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + nr++; + } + + closedir(dir); + return nr - 1; +} + +static struct dso **dsos; + +static int dsos__create(int cnt, int size) +{ + int i; + + dsos = malloc(sizeof(dsos) * cnt); + TEST_ASSERT_VAL("failed to alloc dsos array", dsos); + + for (i = 0; i < cnt; i++) { + char *file; + + file = test_file(size); + TEST_ASSERT_VAL("failed to get dso file", file); + + dsos[i] = dso__new(file); + TEST_ASSERT_VAL("failed to get dso", dsos[i]); + } + + return 0; +} + +static void dsos__delete(int cnt) +{ + int i; + + for (i = 0; i < cnt; i++) { + struct dso *dso = dsos[i]; + + unlink(dso->name); + dso__delete(dso); + } + + free(dsos); +} + +static int set_fd_limit(int n) +{ + struct rlimit rlim; + + if (getrlimit(RLIMIT_NOFILE, &rlim)) + return -1; + + pr_debug("file limit %ld, new %d\n", (long) rlim.rlim_cur, n); + + rlim.rlim_cur = n; + return setrlimit(RLIMIT_NOFILE, &rlim); +} + +int test__dso_data_cache(void) +{ + struct machine machine; + long nr_end, nr = open_files_cnt(); + int dso_cnt, limit, i, fd; + + memset(&machine, 0, sizeof(machine)); + + /* set as system limit */ + limit = nr * 4; + TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit)); + + /* and this is now our dso open FDs limit + 1 extra */ + dso_cnt = limit / 2 + 1; + TEST_ASSERT_VAL("failed to create dsos\n", + !dsos__create(dso_cnt, TEST_FILE_SIZE)); + + for (i = 0; i < (dso_cnt - 1); i++) { + struct dso *dso = dsos[i]; + + /* + * Open dsos via dso__data_fd or dso__data_read_offset. + * Both opens the data file and keep it open. + */ + if (i % 2) { + fd = dso__data_fd(dso, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + } else { + #define BUFSIZE 10 + u8 buf[BUFSIZE]; + ssize_t n; + + n = dso__data_read_offset(dso, &machine, 0, buf, BUFSIZE); + TEST_ASSERT_VAL("failed to read dso", n == BUFSIZE); + } + } + + /* open +1 dso over the allowed limit */ + fd = dso__data_fd(dsos[i], &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* should force the first one to be closed */ + TEST_ASSERT_VAL("failed to close dsos[0]", dsos[0]->data.fd == -1); + + /* cleanup everything */ + dsos__delete(dso_cnt); + + /* Make sure we did not leak any file descriptor. */ + nr_end = open_files_cnt(); + pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end); + TEST_ASSERT_VAL("failed leadking files", nr == nr_end); + return 0; +} + +int test__dso_data_reopen(void) +{ + struct machine machine; + long nr_end, nr = open_files_cnt(); + int fd, fd_extra; + +#define dso_0 (dsos[0]) +#define dso_1 (dsos[1]) +#define dso_2 (dsos[2]) + + memset(&machine, 0, sizeof(machine)); + + /* + * Test scenario: + * - create 3 dso objects + * - set process file descriptor limit to current + * files count + 3 + * - test that the first dso gets closed when we + * reach the files count limit + */ + + /* Make sure we are able to open 3 fds anyway */ + TEST_ASSERT_VAL("failed to set file limit", + !set_fd_limit((nr + 3))); + + TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(3, TEST_FILE_SIZE)); + + /* open dso_0 */ + fd = dso__data_fd(dso_0, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* open dso_1 */ + fd = dso__data_fd(dso_1, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* + * open extra file descriptor and we just + * reached the files count limit + */ + fd_extra = open("/dev/null", O_RDONLY); + TEST_ASSERT_VAL("failed to open extra fd", fd_extra > 0); + + /* open dso_2 */ + fd = dso__data_fd(dso_2, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* + * dso_0 should get closed, because we reached + * the file descriptor limit + */ + TEST_ASSERT_VAL("failed to close dso_0", dso_0->data.fd == -1); + + /* open dso_0 */ + fd = dso__data_fd(dso_0, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + + /* + * dso_1 should get closed, because we reached + * the file descriptor limit + */ + TEST_ASSERT_VAL("failed to close dso_1", dso_1->data.fd == -1); + + /* cleanup everything */ + close(fd_extra); + dsos__delete(3); + + /* Make sure we did not leak any file descriptor. */ + nr_end = open_files_cnt(); + pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end); + TEST_ASSERT_VAL("failed leadking files", nr == nr_end); + return 0; +} diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 108f0cd49f4e..96adb730b744 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -15,7 +15,7 @@ static int mmap_handler(struct perf_tool *tool __maybe_unused, struct perf_sample *sample __maybe_unused, struct machine *machine) { - return machine__process_mmap_event(machine, event, NULL); + return machine__process_mmap2_event(machine, event, NULL); } static int init_live_machine(struct machine *machine) diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index e4e01aadc3be..a62c09134516 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -12,9 +12,9 @@ static struct { u32 pid; const char *comm; } fake_threads[] = { - { 100, "perf" }, - { 200, "perf" }, - { 300, "bash" }, + { FAKE_PID_PERF1, "perf" }, + { FAKE_PID_PERF2, "perf" }, + { FAKE_PID_BASH, "bash" }, }; static struct { @@ -22,15 +22,15 @@ static struct { u64 start; const char *filename; } fake_mmap_info[] = { - { 100, 0x40000, "perf" }, - { 100, 0x50000, "libc" }, - { 100, 0xf0000, "[kernel]" }, - { 200, 0x40000, "perf" }, - { 200, 0x50000, "libc" }, - { 200, 0xf0000, "[kernel]" }, - { 300, 0x40000, "bash" }, - { 300, 0x50000, "libc" }, - { 300, 0xf0000, "[kernel]" }, + { FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" }, + { FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" }, + { FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" }, + { FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" }, + { FAKE_PID_BASH, FAKE_MAP_BASH, "bash" }, + { FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" }, }; struct fake_sym { @@ -40,27 +40,30 @@ struct fake_sym { }; static struct fake_sym perf_syms[] = { - { 700, 100, "main" }, - { 800, 100, "run_command" }, - { 900, 100, "cmd_record" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" }, }; static struct fake_sym bash_syms[] = { - { 700, 100, "main" }, - { 800, 100, "xmalloc" }, - { 900, 100, "xfree" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" }, }; static struct fake_sym libc_syms[] = { { 700, 100, "malloc" }, { 800, 100, "free" }, { 900, 100, "realloc" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" }, }; static struct fake_sym kernel_syms[] = { - { 700, 100, "schedule" }, - { 800, 100, "page_fault" }, - { 900, 100, "sys_perf_event_open" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" }, }; static struct { @@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines) .pid = fake_mmap_info[i].pid, .tid = fake_mmap_info[i].pid, .start = fake_mmap_info[i].start, - .len = 0x1000ULL, + .len = FAKE_MAP_LENGTH, .pgoff = 0ULL, }, }; @@ -193,10 +196,11 @@ void print_hists_out(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); if (!he->filtered) { - pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n", + pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n", i, thread__comm_str(he->thread), he->thread->tid, he->ms.map->dso->short_name, - he->ms.sym->name, he->stat.period); + he->ms.sym->name, he->stat.period, + he->stat_acc ? he->stat_acc->period : 0); } i++; diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h index 1415ae69d7b6..888254e8665c 100644 --- a/tools/perf/tests/hists_common.h +++ b/tools/perf/tests/hists_common.h @@ -4,6 +4,34 @@ struct machine; struct machines; +#define FAKE_PID_PERF1 100 +#define FAKE_PID_PERF2 200 +#define FAKE_PID_BASH 300 + +#define FAKE_MAP_PERF 0x400000 +#define FAKE_MAP_BASH 0x400000 +#define FAKE_MAP_LIBC 0x500000 +#define FAKE_MAP_KERNEL 0xf00000 +#define FAKE_MAP_LENGTH 0x100000 + +#define FAKE_SYM_OFFSET1 700 +#define FAKE_SYM_OFFSET2 800 +#define FAKE_SYM_OFFSET3 900 +#define FAKE_SYM_LENGTH 100 + +#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1 +#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2 +#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3 +#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1 +#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2 +#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3 +#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1 +#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2 +#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3 +#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1 +#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2 +#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3 + /* * The setup_fake_machine() provides a test environment which consists * of 3 processes that have 3 mappings and in turn, have 3 symbols @@ -13,7 +41,7 @@ struct machines; * ............. ............. ................... * perf: 100 perf main * perf: 100 perf run_command - * perf: 100 perf comd_record + * perf: 100 perf cmd_record * perf: 100 libc malloc * perf: 100 libc free * perf: 100 libc realloc @@ -22,7 +50,7 @@ struct machines; * perf: 100 [kernel] sys_perf_event_open * perf: 200 perf main * perf: 200 perf run_command - * perf: 200 perf comd_record + * perf: 200 perf cmd_record * perf: 200 libc malloc * perf: 200 libc free * perf: 200 libc realloc diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c new file mode 100644 index 000000000000..0ac240db2e24 --- /dev/null +++ b/tools/perf/tests/hists_cumulate.c @@ -0,0 +1,726 @@ +#include "perf.h" +#include "util/debug.h" +#include "util/symbol.h" +#include "util/sort.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/machine.h" +#include "util/thread.h" +#include "util/parse-events.h" +#include "tests/tests.h" +#include "tests/hists_common.h" + +struct sample { + u32 pid; + u64 ip; + struct thread *thread; + struct map *map; + struct symbol *sym; +}; + +/* For the numbers, see hists_common.c */ +static struct sample fake_samples[] = { + /* perf [kernel] schedule() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, + /* perf [perf] main() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, + /* perf [perf] cmd_record() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, }, + /* perf [libc] malloc() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, + /* perf [libc] free() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, }, + /* perf [perf] main() */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, + /* perf [kernel] page_fault() */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, + /* bash [bash] main() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, + /* bash [bash] xmalloc() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, + /* bash [kernel] page_fault() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, +}; + +/* + * Will be casted to struct ip_callchain which has all 64 bit entries + * of nr and ips[]. + */ +static u64 fake_callchains[][10] = { + /* schedule => run_command => main */ + { 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_PERF_MAIN, }, + /* cmd_record => run_command => main */ + { 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* malloc => cmd_record => run_command => main */ + { 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, + FAKE_IP_PERF_MAIN, }, + /* free => cmd_record => run_command => main */ + { 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, + FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_PERF_MAIN, }, + /* page_fault => sys_perf_event_open => run_command => main */ + { 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, + FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_BASH_MAIN, }, + /* xmalloc => malloc => xmalloc => malloc => xmalloc => main */ + { 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, + FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, }, + /* page_fault => malloc => main */ + { 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, }, +}; + +static int add_hist_entries(struct hists *hists, struct machine *machine) +{ + struct addr_location al; + struct perf_evsel *evsel = hists_to_evsel(hists); + struct perf_sample sample = { .period = 1000, }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { + const union perf_event event = { + .header = { + .misc = PERF_RECORD_MISC_USER, + }, + }; + struct hist_entry_iter iter = { + .hide_unresolved = false, + }; + + if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; + else + iter.ops = &hist_iter_normal; + + sample.pid = fake_samples[i].pid; + sample.tid = fake_samples[i].pid; + sample.ip = fake_samples[i].ip; + sample.callchain = (struct ip_callchain *)fake_callchains[i]; + + if (perf_event__preprocess_sample(&event, machine, &al, + &sample) < 0) + goto out; + + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH, NULL) < 0) + goto out; + + fake_samples[i].thread = al.thread; + fake_samples[i].map = al.map; + fake_samples[i].sym = al.sym; + } + + return TEST_OK; + +out: + pr_debug("Not enough memory for adding a hist entry\n"); + return TEST_FAIL; +} + +static void del_hist_entries(struct hists *hists) +{ + struct hist_entry *he; + struct rb_root *root_in; + struct rb_root *root_out; + struct rb_node *node; + + if (sort__need_collapse) + root_in = &hists->entries_collapsed; + else + root_in = hists->entries_in; + + root_out = &hists->entries; + + while (!RB_EMPTY_ROOT(root_out)) { + node = rb_first(root_out); + + he = rb_entry(node, struct hist_entry, rb_node); + rb_erase(node, root_out); + rb_erase(&he->rb_node_in, root_in); + hist_entry__free(he); + } +} + +typedef int (*test_fn_t)(struct perf_evsel *, struct machine *); + +#define COMM(he) (thread__comm_str(he->thread)) +#define DSO(he) (he->ms.map->dso->short_name) +#define SYM(he) (he->ms.sym->name) +#define CPU(he) (he->cpu) +#define PID(he) (he->thread->tid) +#define DEPTH(he) (he->callchain->max_depth) +#define CDSO(cl) (cl->ms.map->dso->short_name) +#define CSYM(cl) (cl->ms.sym->name) + +struct result { + u64 children; + u64 self; + const char *comm; + const char *dso; + const char *sym; +}; + +struct callchain_result { + u64 nr; + struct { + const char *dso; + const char *sym; + } node[10]; +}; + +static int do_test(struct hists *hists, struct result *expected, size_t nr_expected, + struct callchain_result *expected_callchain, size_t nr_callchain) +{ + char buf[32]; + size_t i, c; + struct hist_entry *he; + struct rb_root *root; + struct rb_node *node; + struct callchain_node *cnode; + struct callchain_list *clist; + + /* + * adding and deleting hist entries must be done outside of this + * function since TEST_ASSERT_VAL() returns in case of failure. + */ + hists__collapse_resort(hists, NULL); + hists__output_resort(hists); + + if (verbose > 2) { + pr_info("use callchain: %d, cumulate callchain: %d\n", + symbol_conf.use_callchain, + symbol_conf.cumulate_callchain); + print_hists_out(hists); + } + + root = &hists->entries; + for (node = rb_first(root), i = 0; + node && (he = rb_entry(node, struct hist_entry, rb_node)); + node = rb_next(node), i++) { + scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i); + + TEST_ASSERT_VAL("Incorrect number of hist entry", + i < nr_expected); + TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self && + !strcmp(COMM(he), expected[i].comm) && + !strcmp(DSO(he), expected[i].dso) && + !strcmp(SYM(he), expected[i].sym)); + + if (symbol_conf.cumulate_callchain) + TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children); + + if (!symbol_conf.use_callchain) + continue; + + /* check callchain entries */ + root = &he->callchain->node.rb_root; + cnode = rb_entry(rb_first(root), struct callchain_node, rb_node); + + c = 0; + list_for_each_entry(clist, &cnode->val, list) { + scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c); + + TEST_ASSERT_VAL("Incorrect number of callchain entry", + c < expected_callchain[i].nr); + TEST_ASSERT_VAL(buf, + !strcmp(CDSO(clist), expected_callchain[i].node[c].dso) && + !strcmp(CSYM(clist), expected_callchain[i].node[c].sym)); + c++; + } + /* TODO: handle multiple child nodes properly */ + TEST_ASSERT_VAL("Incorrect number of callchain entry", + c <= expected_callchain[i].nr); + } + TEST_ASSERT_VAL("Incorrect number of hist entry", + i == nr_expected); + TEST_ASSERT_VAL("Incorrect number of callchain entry", + !symbol_conf.use_callchain || nr_expected == nr_callchain); + return 0; +} + +/* NO callchain + NO children */ +static int test1(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Overhead Command Shared Object Symbol + * ======== ======= ============= ============== + * 20.00% perf perf [.] main + * 10.00% bash [kernel] [k] page_fault + * 10.00% bash bash [.] main + * 10.00% bash bash [.] xmalloc + * 10.00% perf [kernel] [k] page_fault + * 10.00% perf [kernel] [k] schedule + * 10.00% perf libc [.] free + * 10.00% perf libc [.] malloc + * 10.00% perf perf [.] cmd_record + */ + struct result expected[] = { + { 0, 2000, "perf", "perf", "main" }, + { 0, 1000, "bash", "[kernel]", "page_fault" }, + { 0, 1000, "bash", "bash", "main" }, + { 0, 1000, "bash", "bash", "xmalloc" }, + { 0, 1000, "perf", "[kernel]", "page_fault" }, + { 0, 1000, "perf", "[kernel]", "schedule" }, + { 0, 1000, "perf", "libc", "free" }, + { 0, 1000, "perf", "libc", "malloc" }, + { 0, 1000, "perf", "perf", "cmd_record" }, + }; + + symbol_conf.use_callchain = false; + symbol_conf.cumulate_callchain = false; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* callcain + NO children */ +static int test2(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Overhead Command Shared Object Symbol + * ======== ======= ============= ============== + * 20.00% perf perf [.] main + * | + * --- main + * + * 10.00% bash [kernel] [k] page_fault + * | + * --- page_fault + * malloc + * main + * + * 10.00% bash bash [.] main + * | + * --- main + * + * 10.00% bash bash [.] xmalloc + * | + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc + * main + * + * 10.00% perf [kernel] [k] page_fault + * | + * --- page_fault + * sys_perf_event_open + * run_command + * main + * + * 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * + * 10.00% perf libc [.] free + * | + * --- free + * cmd_record + * run_command + * main + * + * 10.00% perf libc [.] malloc + * | + * --- malloc + * cmd_record + * run_command + * main + * + * 10.00% perf perf [.] cmd_record + * | + * --- cmd_record + * run_command + * main + * + */ + struct result expected[] = { + { 0, 2000, "perf", "perf", "main" }, + { 0, 1000, "bash", "[kernel]", "page_fault" }, + { 0, 1000, "bash", "bash", "main" }, + { 0, 1000, "bash", "bash", "xmalloc" }, + { 0, 1000, "perf", "[kernel]", "page_fault" }, + { 0, 1000, "perf", "[kernel]", "schedule" }, + { 0, 1000, "perf", "libc", "free" }, + { 0, 1000, "perf", "libc", "malloc" }, + { 0, 1000, "perf", "perf", "cmd_record" }, + }; + struct callchain_result expected_callchain[] = { + { + 1, { { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "page_fault" }, + { "libc", "malloc" }, + { "bash", "main" }, }, + }, + { + 1, { { "bash", "main" }, }, + }, + { + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, + }, + { + 4, { { "[kernel]", "page_fault" }, + { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "free" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + }; + + symbol_conf.use_callchain = true; + symbol_conf.cumulate_callchain = false; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), + expected_callchain, ARRAY_SIZE(expected_callchain)); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* NO callchain + children */ +static int test3(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Children Self Command Shared Object Symbol + * ======== ======== ======= ============= ======================= + * 70.00% 20.00% perf perf [.] main + * 50.00% 0.00% perf perf [.] run_command + * 30.00% 10.00% bash bash [.] main + * 30.00% 10.00% perf perf [.] cmd_record + * 20.00% 0.00% bash libc [.] malloc + * 10.00% 10.00% bash [kernel] [k] page_fault + * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * 10.00% 10.00% perf [kernel] [k] page_fault + * 10.00% 10.00% perf libc [.] free + * 10.00% 10.00% perf libc [.] malloc + * 10.00% 10.00% bash bash [.] xmalloc + */ + struct result expected[] = { + { 7000, 2000, "perf", "perf", "main" }, + { 5000, 0, "perf", "perf", "run_command" }, + { 3000, 1000, "bash", "bash", "main" }, + { 3000, 1000, "perf", "perf", "cmd_record" }, + { 2000, 0, "bash", "libc", "malloc" }, + { 1000, 1000, "bash", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "libc", "free" }, + { 1000, 1000, "perf", "libc", "malloc" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, + }; + + symbol_conf.use_callchain = false; + symbol_conf.cumulate_callchain = true; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* callchain + children */ +static int test4(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Children Self Command Shared Object Symbol + * ======== ======== ======= ============= ======================= + * 70.00% 20.00% perf perf [.] main + * | + * --- main + * + * 50.00% 0.00% perf perf [.] run_command + * | + * --- run_command + * main + * + * 30.00% 10.00% bash bash [.] main + * | + * --- main + * + * 30.00% 10.00% perf perf [.] cmd_record + * | + * --- cmd_record + * run_command + * main + * + * 20.00% 0.00% bash libc [.] malloc + * | + * --- malloc + * | + * |--50.00%-- xmalloc + * | main + * --50.00%-- main + * + * 10.00% 10.00% bash [kernel] [k] page_fault + * | + * --- page_fault + * malloc + * main + * + * 10.00% 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * | + * --- sys_perf_event_open + * run_command + * main + * + * 10.00% 10.00% perf [kernel] [k] page_fault + * | + * --- page_fault + * sys_perf_event_open + * run_command + * main + * + * 10.00% 10.00% perf libc [.] free + * | + * --- free + * cmd_record + * run_command + * main + * + * 10.00% 10.00% perf libc [.] malloc + * | + * --- malloc + * cmd_record + * run_command + * main + * + * 10.00% 10.00% bash bash [.] xmalloc + * | + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc + * main + * + */ + struct result expected[] = { + { 7000, 2000, "perf", "perf", "main" }, + { 5000, 0, "perf", "perf", "run_command" }, + { 3000, 1000, "bash", "bash", "main" }, + { 3000, 1000, "perf", "perf", "cmd_record" }, + { 2000, 0, "bash", "libc", "malloc" }, + { 1000, 1000, "bash", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "libc", "free" }, + { 1000, 1000, "perf", "libc", "malloc" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, + }; + struct callchain_result expected_callchain[] = { + { + 1, { { "perf", "main" }, }, + }, + { + 2, { { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 1, { { "bash", "main" }, }, + }, + { + 3, { { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, + { "bash", "main" }, }, + }, + { + 3, { { "[kernel]", "page_fault" }, + { "libc", "malloc" }, + { "bash", "main" }, }, + }, + { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "[kernel]", "page_fault" }, + { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "free" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, + }, + }; + + symbol_conf.use_callchain = true; + symbol_conf.cumulate_callchain = true; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), + expected_callchain, ARRAY_SIZE(expected_callchain)); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +int test__hists_cumulate(void) +{ + int err = TEST_FAIL; + struct machines machines; + struct machine *machine; + struct perf_evsel *evsel; + struct perf_evlist *evlist = perf_evlist__new(); + size_t i; + test_fn_t testcases[] = { + test1, + test2, + test3, + test4, + }; + + TEST_ASSERT_VAL("No memory", evlist); + + err = parse_events(evlist, "cpu-clock"); + if (err) + goto out; + + machines__init(&machines); + + /* setup threads/dso/map/symbols also */ + machine = setup_fake_machine(&machines); + if (!machine) + goto out; + + if (verbose > 1) + machine__fprintf(machine, stderr); + + evsel = perf_evlist__first(evlist); + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + err = testcases[i](evsel, machine); + if (err < 0) + break; + } + +out: + /* tear down everything */ + perf_evlist__delete(evlist); + machines__exit(&machines); + + return err; +} diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index c5ba924a3581..821f581fd930 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -21,33 +21,33 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_samples[] = { /* perf [kernel] schedule() */ - { .pid = 100, .ip = 0xf0000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .pid = 100, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, /* perf [libc] malloc() */ - { .pid = 100, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [perf] main() */ - { .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */ /* perf [perf] cmd_record() */ - { .pid = 200, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* perf [kernel] page_fault() */ - { .pid = 200, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* bash [bash] main() */ - { .pid = 300, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, /* bash [bash] xmalloc() */ - { .pid = 300, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* bash [kernel] page_fault() */ - { .pid = 300, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }; -static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) +static int add_hist_entries(struct perf_evlist *evlist, + struct machine *machine __maybe_unused) { struct perf_evsel *evsel; struct addr_location al; - struct hist_entry *he; - struct perf_sample sample = { .cpu = 0, }; + struct perf_sample sample = { .period = 100, }; size_t i; /* @@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) .misc = PERF_RECORD_MISC_USER, }, }; + struct hist_entry_iter iter = { + .ops = &hist_iter_normal, + .hide_unresolved = false, + }; /* make sure it has no filter at first */ evsel->hists.thread_filter = NULL; @@ -76,18 +80,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 100, 1, 0); - if (he == NULL) + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH, NULL) < 0) goto out; fake_samples[i].thread = al.thread; fake_samples[i].map = al.map; fake_samples[i].sym = al.sym; - - hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - he->hists->stats.nr_non_filtered_samples++; } } diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 5ffa2c3eb77d..d4b34b0f50a2 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -21,41 +21,41 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_common_samples[] = { /* perf [kernel] schedule() */ - { .pid = 100, .ip = 0xf0000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .pid = 200, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* perf [perf] cmd_record() */ - { .pid = 200, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* bash [bash] xmalloc() */ - { .pid = 300, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, }; static struct sample fake_samples[][5] = { { /* perf [perf] run_command() */ - { .pid = 100, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, }, /* perf [libc] malloc() */ - { .pid = 100, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [kernel] page_fault() */ - { .pid = 100, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* perf [kernel] sys_perf_event_open() */ - { .pid = 200, .ip = 0xf0000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, }, /* bash [libc] free() */ - { .pid = 300, .ip = 0x50000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, }, }, { /* perf [libc] free() */ - { .pid = 200, .ip = 0x50000 + 800, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */ /* bash [bash] xfee() */ - { .pid = 300, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, }, /* bash [libc] realloc() */ - { .pid = 300, .ip = 0x50000 + 900, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, }, /* bash [kernel] page_fault() */ - { .pid = 300, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }, }; @@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) struct perf_evsel *evsel; struct addr_location al; struct hist_entry *he; - struct perf_sample sample = { .cpu = 0, }; + struct perf_sample sample = { .period = 1, }; size_t i = 0, k; /* @@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 1, 1, 0); + NULL, NULL, 1, 1, 0, true); if (he == NULL) goto out; @@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 1, 1, 0); + NULL, NULL, 1, 1, 0, true); if (he == NULL) goto out; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index a16850551797..e3bbd6c54c1b 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -22,31 +22,31 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_samples[] = { /* perf [kernel] schedule() */ - { .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, }, + { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .cpu = 1, .pid = 100, .ip = 0x40000 + 700, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, /* perf [perf] cmd_record() */ - { .cpu = 1, .pid = 100, .ip = 0x40000 + 900, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* perf [libc] malloc() */ - { .cpu = 1, .pid = 100, .ip = 0x50000 + 700, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [libc] free() */ - { .cpu = 2, .pid = 100, .ip = 0x50000 + 800, }, + { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, }, /* perf [perf] main() */ - { .cpu = 2, .pid = 200, .ip = 0x40000 + 700, }, + { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* perf [kernel] page_fault() */ - { .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, }, + { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* bash [bash] main() */ - { .cpu = 3, .pid = 300, .ip = 0x40000 + 700, }, + { .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, /* bash [bash] xmalloc() */ - { .cpu = 0, .pid = 300, .ip = 0x40000 + 800, }, + { .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [kernel] page_fault() */ - { .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, }, + { .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }; static int add_hist_entries(struct hists *hists, struct machine *machine) { struct addr_location al; - struct hist_entry *he; + struct perf_evsel *evsel = hists_to_evsel(hists); struct perf_sample sample = { .period = 100, }; size_t i; @@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) .misc = PERF_RECORD_MISC_USER, }, }; + struct hist_entry_iter iter = { + .ops = &hist_iter_normal, + .hide_unresolved = false, + }; sample.cpu = fake_samples[i].cpu; sample.pid = fake_samples[i].pid; @@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(hists, &al, NULL, NULL, NULL, - sample.period, 1, 0); - if (he == NULL) + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH, NULL) < 0) goto out; fake_samples[i].thread = al.thread; diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 2f92d6e7ee00..69a71ff84e01 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -205,8 +205,7 @@ $(run): ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)" >> $@ 2>&1; \ $(call test,$@) && \ - rm -f $@ \ - rm -rf $$TMP_DEST + rm -rf $@ $$TMP_DEST || (cat $@ ; false) $(run_O): $(call clean) @@ -217,9 +216,7 @@ $(run_O): ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)" >> $@ 2>&1; \ $(call test_O,$@) && \ - rm -f $@ && \ - rm -rf $$TMP_O \ - rm -rf $$TMP_DEST + rm -rf $@ $$TMP_O $$TMP_DEST || (cat $@ ; false) tarpkg: @cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \ diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index d76c0e2e6635..ed64790a395f 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -28,6 +28,8 @@ int test__syscall_open_tp_fields(void); int test__pmu(void); int test__attr(void); int test__dso_data(void); +int test__dso_data_cache(void); +int test__dso_data_reopen(void); int test__parse_events(void); int test__hists_link(void); int test__python_use(void); @@ -45,6 +47,7 @@ int test__hists_filter(void); int test__mmap_thread_lookup(void); int test__thread_mg_share(void); int test__hists_output(void); +int test__hists_cumulate(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index d11541d4d7d7..3ccf6e14f89b 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -194,7 +194,7 @@ int ui_browser__warning(struct ui_browser *browser, int timeout, ui_helpline__vpush(format, args); va_end(args); } else { - while ((key == ui__question_window("Warning!", text, + while ((key = ui__question_window("Warning!", text, "Press any key...", timeout)) == K_RESIZE) ui_browser__handle_resize(browser); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1c331b934ffc..52c03fbbba17 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, static void hist_browser__update_nr_entries(struct hist_browser *hb); static struct rb_node *hists__filter_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt); static bool hist_browser__has_filter(struct hist_browser *hb) @@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) struct hists *hists = browser->hists; for (nd = rb_first(&hists->entries); - (nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL; + (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; nd = rb_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); hist_entry__set_folding(he, unfold); @@ -651,13 +650,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ __hpp__slsmg_color_printf, true); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 __hpp_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int \ +hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + if (!symbol_conf.cumulate_callchain) { \ + int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \ + slsmg_printf("%s", hpp->buf); \ + \ + return ret; \ + } \ + return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%", \ + __hpp__slsmg_color_printf, true); \ +} + __HPP_COLOR_PERCENT_FN(overhead, period) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) __HPP_COLOR_PERCENT_FN(overhead_us, period_us) __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) +__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period) #undef __HPP_COLOR_PERCENT_FN +#undef __HPP_COLOR_ACC_PERCENT_FN void hist_browser__init_hpp(void) { @@ -671,6 +693,8 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_overhead_guest_sys; perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = hist_browser__hpp_color_overhead_guest_us; + perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = + hist_browser__hpp_color_overhead_acc; } static int hist_browser__show_entry(struct hist_browser *browser, @@ -783,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) for (nd = browser->top; nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(h->hists); - float percent = 0.0; + float percent; if (h->filtered) continue; - if (total) - percent = h->stat.period * 100.0 / total; - + percent = hist_entry__get_percent_limit(h); if (percent < hb->min_pcnt) continue; @@ -804,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) } static struct rb_node *hists__filter_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt) { while (nd != NULL) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(hists); - float percent = 0.0; - - if (total) - percent = h->stat.period * 100.0 / total; + float percent = hist_entry__get_percent_limit(h); if (!h->filtered && percent >= min_pcnt) return nd; @@ -825,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, } static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt) { while (nd != NULL) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(hists); - float percent = 0.0; - - if (total) - percent = h->stat.period * 100.0 / total; + float percent = hist_entry__get_percent_limit(h); if (!h->filtered && percent >= min_pcnt) return nd; @@ -863,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser, switch (whence) { case SEEK_SET: nd = hists__filter_entries(rb_first(browser->entries), - hb->hists, hb->min_pcnt); + hb->min_pcnt); break; case SEEK_CUR: nd = browser->top; goto do_offset; case SEEK_END: nd = hists__filter_prev_entries(rb_last(browser->entries), - hb->hists, hb->min_pcnt); + hb->min_pcnt); first = false; break; default: @@ -913,8 +924,7 @@ do_offset: break; } } - nd = hists__filter_entries(rb_next(nd), hb->hists, - hb->min_pcnt); + nd = hists__filter_entries(rb_next(nd), hb->min_pcnt); if (nd == NULL) break; --offset; @@ -947,7 +957,7 @@ do_offset: } } - nd = hists__filter_prev_entries(rb_prev(nd), hb->hists, + nd = hists__filter_prev_entries(rb_prev(nd), hb->min_pcnt); if (nd == NULL) break; @@ -1126,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) { struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), - browser->hists, browser->min_pcnt); int printed = 0; @@ -1134,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); printed += hist_browser__fprintf_entry(browser, h, fp); - nd = hists__filter_entries(rb_next(nd), browser->hists, - browser->min_pcnt); + nd = hists__filter_entries(rb_next(nd), browser->min_pcnt); } return printed; @@ -1372,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) return; } - while ((nd = hists__filter_entries(nd, hb->hists, - hb->min_pcnt)) != NULL) { + while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { nr_entries++; nd = rb_next(nd); } @@ -1699,14 +1706,14 @@ zoom_dso: zoom_out_dso: ui_helpline__pop(); browser->hists->dso_filter = NULL; - sort_dso.elide = false; + perf_hpp__set_elide(HISTC_DSO, false); } else { if (dso == NULL) continue; ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", dso->kernel ? "the Kernel" : dso->short_name); browser->hists->dso_filter = dso; - sort_dso.elide = true; + perf_hpp__set_elide(HISTC_DSO, true); pstack__push(fstack, &browser->hists->dso_filter); } hists__filter_by_dso(hists); @@ -1718,13 +1725,13 @@ zoom_thread: zoom_out_thread: ui_helpline__pop(); browser->hists->thread_filter = NULL; - sort_thread.elide = false; + perf_hpp__set_elide(HISTC_THREAD, false); } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); browser->hists->thread_filter = thread; - sort_thread.elide = true; + perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(fstack, &browser->hists->thread_filter); } hists__filter_by_thread(hists); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 9d90683914d4..6ca60e482cdc 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, __percent_color_snprintf, true); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 he_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \ + __percent_color_snprintf, true); \ +} + __HPP_COLOR_PERCENT_FN(overhead, period) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) __HPP_COLOR_PERCENT_FN(overhead_us, period_us) __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) +__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period) #undef __HPP_COLOR_PERCENT_FN @@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void) perf_gtk__hpp_color_overhead_guest_sys; perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = perf_gtk__hpp_color_overhead_guest_us; + perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = + perf_gtk__hpp_color_overhead_acc; } static void callchain_list__sym_name(struct callchain_list *cl, @@ -181,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, if (perf_hpp__should_skip(fmt)) continue; + /* + * XXX no way to determine where symcol column is.. + * Just use last column for now. + */ + if (perf_hpp__is_sort_entry(fmt)) + sym_col = col_idx; + fmt->header(fmt, &hpp, hists_to_evsel(hists)); gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), @@ -209,14 +233,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); GtkTreeIter iter; u64 total = hists__total_period(h->hists); - float percent = 0.0; + float percent; if (h->filtered) continue; - if (total) - percent = h->stat.period * 100.0 / total; - + percent = hist_entry__get_percent_limit(h); if (percent < min_pcnt) continue; @@ -238,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, if (symbol_conf.use_callchain && sort__has_sym) { if (callchain_param.mode == CHAIN_GRAPH_REL) - total = h->stat.period; + total = symbol_conf.cumulate_callchain ? + h->stat_acc->period : h->stat.period; perf_gtk__add_callchain(&h->sorted_chain, store, &iter, sym_col, total); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 4484f5bd1b14..498adb23c02e 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, return ret; } +int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he, + hpp_field_fn get_field, const char *fmt, + hpp_snprint_fn print_fn, bool fmt_percent) +{ + if (!symbol_conf.cumulate_callchain) { + return snprintf(hpp->buf, hpp->size, "%*s", + fmt_percent ? 8 : 12, "N/A"); + } + + return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent); +} + static int field_cmp(u64 field_a, u64 field_b) { if (field_a > field_b) @@ -160,6 +172,24 @@ out: return ret; } +static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field) +{ + s64 ret = 0; + + if (symbol_conf.cumulate_callchain) { + /* + * Put caller above callee when they have equal period. + */ + ret = field_cmp(get_field(a), get_field(b)); + if (ret) + return ret; + + ret = b->callchain->max_depth - a->callchain->max_depth; + } + return ret; +} + #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ struct perf_hpp *hpp, \ @@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ return __hpp__sort(a, b, he_get_##_field); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 he_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \ + hpp_color_scnprintf, true); \ +} + +#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \ +static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \ + struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt, \ + hpp_entry_scnprintf, true); \ +} + +#define __HPP_SORT_ACC_FN(_type, _field) \ +static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +{ \ + return __hpp__sort_acc(a, b, he_get_acc_##_field); \ +} + #define __HPP_ENTRY_RAW_FN(_type, _field) \ static u64 he_get_raw_##_field(struct hist_entry *he) \ { \ @@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field) \ __HPP_ENTRY_PERCENT_FN(_type, _field) \ __HPP_SORT_FN(_type, _field) +#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\ +__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ +__HPP_WIDTH_FN(_type, _min_width, _unit_width) \ +__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +__HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \ +__HPP_SORT_ACC_FN(_type, _field) + #define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \ __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ __HPP_WIDTH_FN(_type, _min_width, _unit_width) \ __HPP_ENTRY_RAW_FN(_type, _field) \ __HPP_SORT_RAW_FN(_type, _field) +__HPP_HEADER_FN(overhead_self, "Self", 8, 8) HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8) HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8) HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8) HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8) HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8) +HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8) HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12) HPP_RAW_FNS(period, "Period", period, 12, 12) @@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused, .sort = hpp__sort_ ## _name, \ } +#define HPP__COLOR_ACC_PRINT_FNS(_name) \ + { \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .color = hpp__color_ ## _name, \ + .entry = hpp__entry_ ## _name, \ + .cmp = hpp__nop_cmp, \ + .collapse = hpp__nop_cmp, \ + .sort = hpp__sort_ ## _name, \ + } + #define HPP__PRINT_FNS(_name) \ { \ .header = hpp__header_ ## _name, \ @@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = { HPP__COLOR_PRINT_FNS(overhead_us), HPP__COLOR_PRINT_FNS(overhead_guest_sys), HPP__COLOR_PRINT_FNS(overhead_guest_us), + HPP__COLOR_ACC_PRINT_FNS(overhead_acc), HPP__PRINT_FNS(samples), HPP__PRINT_FNS(period) }; @@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list); #undef HPP__COLOR_PRINT_FNS +#undef HPP__COLOR_ACC_PRINT_FNS #undef HPP__PRINT_FNS #undef HPP_PERCENT_FNS +#undef HPP_PERCENT_ACC_FNS #undef HPP_RAW_FNS #undef __HPP_HEADER_FN #undef __HPP_WIDTH_FN #undef __HPP_COLOR_PERCENT_FN #undef __HPP_ENTRY_PERCENT_FN +#undef __HPP_COLOR_ACC_PERCENT_FN +#undef __HPP_ENTRY_ACC_PERCENT_FN #undef __HPP_ENTRY_RAW_FN +#undef __HPP_SORT_FN +#undef __HPP_SORT_ACC_FN +#undef __HPP_SORT_RAW_FN void perf_hpp__init(void) @@ -361,6 +447,13 @@ void perf_hpp__init(void) if (field_order) return; + if (symbol_conf.cumulate_callchain) { + perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC); + + perf_hpp__format[PERF_HPP__OVERHEAD].header = + hpp__header_overhead_self; + } + perf_hpp__column_enable(PERF_HPP__OVERHEAD); if (symbol_conf.show_cpu_utilization) { @@ -383,6 +476,12 @@ void perf_hpp__init(void) list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list; if (list_empty(list)) list_add(list, &perf_hpp__sort_list); + + if (symbol_conf.cumulate_callchain) { + list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list; + if (list_empty(list)) + list_add(list, &perf_hpp__sort_list); + } } void perf_hpp__column_register(struct perf_hpp_fmt *format) @@ -390,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format) list_add_tail(&format->list, &perf_hpp__list); } +void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +{ + list_del(&format->list); +} + void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) { list_add_tail(&format->sort_list, &perf_hpp__sort_list); @@ -401,6 +505,21 @@ void perf_hpp__column_enable(unsigned col) perf_hpp__column_register(&perf_hpp__format[col]); } +void perf_hpp__column_disable(unsigned col) +{ + BUG_ON(col >= PERF_HPP__MAX_INDEX); + perf_hpp__column_unregister(&perf_hpp__format[col]); +} + +void perf_hpp__cancel_cumulate(void) +{ + if (field_order) + return; + + perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); + perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead; +} + void perf_hpp__setup_output_field(void) { struct perf_hpp_fmt *fmt; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 9f57991025a9..90122abd3721 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, { switch (callchain_param.mode) { case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period, + return callchain__fprintf_graph(fp, &he->sorted_chain, + symbol_conf.cumulate_callchain ? + he->stat_acc->period : he->stat.period, left_margin); break; case CHAIN_GRAPH_ABS: @@ -461,12 +463,12 @@ print_entries: for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - float percent = h->stat.period * 100.0 / - hists->stats.total_period; + float percent; if (h->filtered) continue; + percent = hist_entry__get_percent_limit(h); if (percent < min_pcnt) continue; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9a42382b3921..48b6d3f50012 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent if (sample->callchain == NULL) return 0; - if (symbol_conf.use_callchain || sort__has_parent) { + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || + sort__has_parent) { return machine__resolve_callchain(al->machine, evsel, al->thread, sample, parent, al, max_stack); } @@ -629,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp return 0; return callchain_append(he->callchain, &callchain_cursor, sample->period); } + +int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, + bool hide_unresolved) +{ + al->map = node->map; + al->sym = node->sym; + if (node->map) + al->addr = node->map->map_ip(node->map, node->ip); + else + al->addr = node->ip; + + if (al->sym == NULL) { + if (hide_unresolved) + return 0; + if (al->map == NULL) + goto out; + } + + if (al->map->groups == &al->machine->kmaps) { + if (machine__is_host(al->machine)) { + al->cpumode = PERF_RECORD_MISC_KERNEL; + al->level = 'k'; + } else { + al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + al->level = 'g'; + } + } else { + if (machine__is_host(al->machine)) { + al->cpumode = PERF_RECORD_MISC_USER; + al->level = '.'; + } else if (perf_guest) { + al->cpumode = PERF_RECORD_MISC_GUEST_USER; + al->level = 'u'; + } else { + al->cpumode = PERF_RECORD_MISC_HYPERVISOR; + al->level = 'H'; + } + } + +out: + return 1; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index bde2b0cc24cf..8f84423a75da 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -162,7 +162,18 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent struct perf_evsel *evsel, struct addr_location *al, int max_stack); int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); +int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, + bool hide_unresolved); extern const char record_callchain_help[]; int parse_callchain_report_opt(const char *arg); + +static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, + struct callchain_cursor *src) +{ + *dest = *src; + + dest->first = src->curr; + dest->nr -= src->pos; +} #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 64453d63b971..819f10414f08 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1,3 +1,6 @@ +#include <asm/bug.h> +#include <sys/time.h> +#include <sys/resource.h> #include "symbol.h" #include "dso.h" #include "machine.h" @@ -136,7 +139,48 @@ int dso__read_binary_type_filename(const struct dso *dso, return ret; } -static int open_dso(struct dso *dso, struct machine *machine) +/* + * Global list of open DSOs and the counter. + */ +static LIST_HEAD(dso__data_open); +static long dso__data_open_cnt; + +static void dso__list_add(struct dso *dso) +{ + list_add_tail(&dso->data.open_entry, &dso__data_open); + dso__data_open_cnt++; +} + +static void dso__list_del(struct dso *dso) +{ + list_del(&dso->data.open_entry); + WARN_ONCE(dso__data_open_cnt <= 0, + "DSO data fd counter out of bounds."); + dso__data_open_cnt--; +} + +static void close_first_dso(void); + +static int do_open(char *name) +{ + int fd; + + do { + fd = open(name, O_RDONLY); + if (fd >= 0) + return fd; + + pr_debug("dso open failed, mmap: %s\n", strerror(errno)); + if (!dso__data_open_cnt || errno != EMFILE) + break; + + close_first_dso(); + } while (1); + + return -1; +} + +static int __open_dso(struct dso *dso, struct machine *machine) { int fd; char *root_dir = (char *)""; @@ -154,11 +198,130 @@ static int open_dso(struct dso *dso, struct machine *machine) return -EINVAL; } - fd = open(name, O_RDONLY); + fd = do_open(name); free(name); return fd; } +static void check_data_close(void); + +/** + * dso_close - Open DSO data file + * @dso: dso object + * + * Open @dso's data file descriptor and updates + * list/count of open DSO objects. + */ +static int open_dso(struct dso *dso, struct machine *machine) +{ + int fd = __open_dso(dso, machine); + + if (fd > 0) { + dso__list_add(dso); + /* + * Check if we crossed the allowed number + * of opened DSOs and close one if needed. + */ + check_data_close(); + } + + return fd; +} + +static void close_data_fd(struct dso *dso) +{ + if (dso->data.fd >= 0) { + close(dso->data.fd); + dso->data.fd = -1; + dso->data.file_size = 0; + dso__list_del(dso); + } +} + +/** + * dso_close - Close DSO data file + * @dso: dso object + * + * Close @dso's data file descriptor and updates + * list/count of open DSO objects. + */ +static void close_dso(struct dso *dso) +{ + close_data_fd(dso); +} + +static void close_first_dso(void) +{ + struct dso *dso; + + dso = list_first_entry(&dso__data_open, struct dso, data.open_entry); + close_dso(dso); +} + +static rlim_t get_fd_limit(void) +{ + struct rlimit l; + rlim_t limit = 0; + + /* Allow half of the current open fd limit. */ + if (getrlimit(RLIMIT_NOFILE, &l) == 0) { + if (l.rlim_cur == RLIM_INFINITY) + limit = l.rlim_cur; + else + limit = l.rlim_cur / 2; + } else { + pr_err("failed to get fd limit\n"); + limit = 1; + } + + return limit; +} + +static bool may_cache_fd(void) +{ + static rlim_t limit; + + if (!limit) + limit = get_fd_limit(); + + if (limit == RLIM_INFINITY) + return true; + + return limit > (rlim_t) dso__data_open_cnt; +} + +/* + * Check and close LRU dso if we crossed allowed limit + * for opened dso file descriptors. The limit is half + * of the RLIMIT_NOFILE files opened. +*/ +static void check_data_close(void) +{ + bool cache_fd = may_cache_fd(); + + if (!cache_fd) + close_first_dso(); +} + +/** + * dso__data_close - Close DSO data file + * @dso: dso object + * + * External interface to close @dso's data file descriptor. + */ +void dso__data_close(struct dso *dso) +{ + close_dso(dso); +} + +/** + * dso__data_fd - Get dso's data file descriptor + * @dso: dso object + * @machine: machine object + * + * External interface to find dso's file, open it and + * returns file descriptor. + */ int dso__data_fd(struct dso *dso, struct machine *machine) { enum dso_binary_type binary_type_data[] = { @@ -168,8 +331,13 @@ int dso__data_fd(struct dso *dso, struct machine *machine) }; int i = 0; - if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) - return open_dso(dso, machine); + if (dso->data.fd >= 0) + return dso->data.fd; + + if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) { + dso->data.fd = open_dso(dso, machine); + return dso->data.fd; + } do { int fd; @@ -178,7 +346,7 @@ int dso__data_fd(struct dso *dso, struct machine *machine) fd = open_dso(dso, machine); if (fd >= 0) - return fd; + return dso->data.fd = fd; } while (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND); @@ -260,16 +428,10 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset, } static ssize_t -dso_cache__read(struct dso *dso, struct machine *machine, - u64 offset, u8 *data, ssize_t size) +dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) { struct dso_cache *cache; ssize_t ret; - int fd; - - fd = dso__data_fd(dso, machine); - if (fd < 0) - return -1; do { u64 cache_offset; @@ -283,16 +445,16 @@ dso_cache__read(struct dso *dso, struct machine *machine, cache_offset = offset & DSO__DATA_CACHE_MASK; ret = -EINVAL; - if (-1 == lseek(fd, cache_offset, SEEK_SET)) + if (-1 == lseek(dso->data.fd, cache_offset, SEEK_SET)) break; - ret = read(fd, cache->data, DSO__DATA_CACHE_SIZE); + ret = read(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE); if (ret <= 0) break; cache->offset = cache_offset; cache->size = ret; - dso_cache__insert(&dso->cache, cache); + dso_cache__insert(&dso->data.cache, cache); ret = dso_cache__memcpy(cache, offset, data, size); @@ -301,24 +463,27 @@ dso_cache__read(struct dso *dso, struct machine *machine, if (ret <= 0) free(cache); - close(fd); return ret; } -static ssize_t dso_cache_read(struct dso *dso, struct machine *machine, - u64 offset, u8 *data, ssize_t size) +static ssize_t dso_cache_read(struct dso *dso, u64 offset, + u8 *data, ssize_t size) { struct dso_cache *cache; - cache = dso_cache__find(&dso->cache, offset); + cache = dso_cache__find(&dso->data.cache, offset); if (cache) return dso_cache__memcpy(cache, offset, data, size); else - return dso_cache__read(dso, machine, offset, data, size); + return dso_cache__read(dso, offset, data, size); } -ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, - u64 offset, u8 *data, ssize_t size) +/* + * Reads and caches dso data DSO__DATA_CACHE_SIZE size chunks + * in the rb_tree. Any read to already cached data is served + * by cached data. + */ +static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) { ssize_t r = 0; u8 *p = data; @@ -326,7 +491,7 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, do { ssize_t ret; - ret = dso_cache_read(dso, machine, offset, p, size); + ret = dso_cache_read(dso, offset, p, size); if (ret < 0) return ret; @@ -346,6 +511,67 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, return r; } +static int data_file_size(struct dso *dso) +{ + struct stat st; + + if (!dso->data.file_size) { + if (fstat(dso->data.fd, &st)) { + pr_err("dso mmap failed, fstat: %s\n", strerror(errno)); + return -1; + } + dso->data.file_size = st.st_size; + } + + return 0; +} + +static ssize_t data_read_offset(struct dso *dso, u64 offset, + u8 *data, ssize_t size) +{ + if (data_file_size(dso)) + return -1; + + /* Check the offset sanity. */ + if (offset > dso->data.file_size) + return -1; + + if (offset + size < offset) + return -1; + + return cached_read(dso, offset, data, size); +} + +/** + * dso__data_read_offset - Read data from dso file offset + * @dso: dso object + * @machine: machine object + * @offset: file offset + * @data: buffer to store data + * @size: size of the @data buffer + * + * External interface to read data from dso file offset. Open + * dso data file and use cached_read to get the data. + */ +ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + if (dso__data_fd(dso, machine) < 0) + return -1; + + return data_read_offset(dso, offset, data, size); +} + +/** + * dso__data_read_addr - Read data from dso address + * @dso: dso object + * @machine: machine object + * @add: virtual memory address + * @data: buffer to store data + * @size: size of the @data buffer + * + * External interface to read data from dso address. + */ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size) @@ -473,7 +699,8 @@ struct dso *dso__new(const char *name) dso__set_short_name(dso, dso->name, false); for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; - dso->cache = RB_ROOT; + dso->data.cache = RB_ROOT; + dso->data.fd = -1; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; dso->binary_type = DSO_BINARY_TYPE__NOT_FOUND; dso->loaded = 0; @@ -485,6 +712,7 @@ struct dso *dso__new(const char *name) dso->kernel = DSO_TYPE_USER; dso->needs_swap = DSO_SWAP__UNSET; INIT_LIST_HEAD(&dso->node); + INIT_LIST_HEAD(&dso->data.open_entry); } return dso; @@ -506,7 +734,8 @@ void dso__delete(struct dso *dso) dso->long_name_allocated = false; } - dso_cache__free(&dso->cache); + dso__data_close(dso); + dso_cache__free(&dso->data.cache); dso__free_a2l(dso); zfree(&dso->symsrc_filename); free(dso); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 38efe95a7fdd..ad553ba257bf 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -76,7 +76,6 @@ struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; - struct rb_root cache; void *a2l; char *symsrc_filename; unsigned int a2l_fails; @@ -99,6 +98,15 @@ struct dso { const char *long_name; u16 long_name_len; u16 short_name_len; + + /* dso data file */ + struct { + struct rb_root cache; + int fd; + size_t file_size; + struct list_head open_entry; + } data; + char name[0]; }; @@ -141,7 +149,47 @@ char dso__symtab_origin(const struct dso *dso); int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size); +/* + * The dso__data_* external interface provides following functions: + * dso__data_fd + * dso__data_close + * dso__data_read_offset + * dso__data_read_addr + * + * Please refer to the dso.c object code for each function and + * arguments documentation. Following text tries to explain the + * dso file descriptor caching. + * + * The dso__data* interface allows caching of opened file descriptors + * to speed up the dso data accesses. The idea is to leave the file + * descriptor opened ideally for the whole life of the dso object. + * + * The current usage of the dso__data_* interface is as follows: + * + * Get DSO's fd: + * int fd = dso__data_fd(dso, machine); + * USE 'fd' SOMEHOW + * + * Read DSO's data: + * n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE); + * n = dso__data_read_addr(dso_0, &machine, 0, buf, BUFSIZE); + * + * Eventually close DSO's fd: + * dso__data_close(dso); + * + * It is not necessary to close the DSO object data file. Each time new + * DSO data file is opened, the limit (RLIMIT_NOFILE/2) is checked. Once + * it is crossed, the oldest opened DSO object is closed. + * + * The dso__delete function calls close_dso function to ensure the + * data file descriptor gets closed/unmapped before the dso object + * is freed. + * + * TODO +*/ int dso__data_fd(struct dso *dso, struct machine *machine); +void dso__data_close(struct dso *dso); + ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size); ssize_t dso__data_read_addr(struct dso *dso, struct map *map, diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 65795b835b39..d0281bdfa582 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,4 +1,5 @@ #include <linux/types.h> +#include <sys/mman.h> #include "event.h" #include "debug.h" #include "hist.h" @@ -178,13 +179,14 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, return -1; } - event->header.type = PERF_RECORD_MMAP; + event->header.type = PERF_RECORD_MMAP2; while (1) { char bf[BUFSIZ]; char prot[5]; char execname[PATH_MAX]; char anonstr[] = "//anon"; + unsigned int ino; size_t size; ssize_t n; @@ -195,15 +197,20 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n", - &event->mmap.start, &event->mmap.len, prot, - &event->mmap.pgoff, - execname); + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + &event->mmap2.start, &event->mmap2.len, prot, + &event->mmap2.pgoff, &event->mmap2.maj, + &event->mmap2.min, + &ino, execname); + /* * Anon maps don't have the execname. */ - if (n < 4) + if (n < 7) continue; + + event->mmap2.ino = (u64)ino; + /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c */ @@ -212,6 +219,21 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, else event->header.misc = PERF_RECORD_MISC_GUEST_USER; + /* map protection and flags bits */ + event->mmap2.prot = 0; + event->mmap2.flags = 0; + if (prot[0] == 'r') + event->mmap2.prot |= PROT_READ; + if (prot[1] == 'w') + event->mmap2.prot |= PROT_WRITE; + if (prot[2] == 'x') + event->mmap2.prot |= PROT_EXEC; + + if (prot[3] == 's') + event->mmap2.flags |= MAP_SHARED; + else + event->mmap2.flags |= MAP_PRIVATE; + if (prot[2] != 'x') { if (!mmap_data || prot[0] != 'r') continue; @@ -223,15 +245,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, anonstr); size = strlen(execname) + 1; - memcpy(event->mmap.filename, execname, size); + memcpy(event->mmap2.filename, execname, size); size = PERF_ALIGN(size, sizeof(u64)); - event->mmap.len -= event->mmap.start; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.pid = tgid; - event->mmap.tid = pid; + event->mmap2.len -= event->mmap.start; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + event->mmap2.header.size += machine->id_hdr_size; + event->mmap2.pid = tgid; + event->mmap2.tid = pid; if (process(tool, event, &synth_sample, machine) != 0) { rc = -1; @@ -612,12 +634,15 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 - " %02x:%02x %"PRIu64" %"PRIu64"]: %c %s\n", + " %02x:%02x %"PRIu64" %"PRIu64"]: %c%c%c%c %s\n", event->mmap2.pid, event->mmap2.tid, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, event->mmap2.min, event->mmap2.ino, event->mmap2.ino_generation, - (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) ? 'r' : 'x', + (event->mmap2.prot & PROT_READ) ? 'r' : '-', + (event->mmap2.prot & PROT_WRITE) ? 'w' : '-', + (event->mmap2.prot & PROT_EXEC) ? 'x' : '-', + (event->mmap2.flags & MAP_SHARED) ? 's' : 'p', event->mmap2.filename); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d970232cb270..e5dd40addb30 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -7,6 +7,7 @@ #include "../perf.h" #include "map.h" #include "build-id.h" +#include "perf_regs.h" struct mmap_event { struct perf_event_header header; @@ -27,6 +28,8 @@ struct mmap2_event { u32 min; u64 ino; u64 ino_generation; + u32 prot; + u32 flags; char filename[PATH_MAX]; }; @@ -87,6 +90,10 @@ struct regs_dump { u64 abi; u64 mask; u64 *regs; + + /* Cached values/mask filled by first register access. */ + u64 cache_regs[PERF_REGS_MAX]; + u64 cache_mask; }; struct stack_dump { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5c28d82b76c4..8606175fe1e8 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -589,10 +589,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) } /* - * We default some events to a 1 default interval. But keep + * We default some events to have a default interval. But keep * it a weak assumption overridable by the user. */ - if (!attr->sample_period || (opts->user_freq != UINT_MAX && + if (!attr->sample_period || (opts->user_freq != UINT_MAX || opts->user_interval != ULLONG_MAX)) { if (opts->freq) { perf_evsel__set_sample_bit(evsel, PERIOD); @@ -659,6 +659,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) perf_evsel__set_sample_bit(evsel, WEIGHT); attr->mmap = track; + attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; if (opts->sample_transaction) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b262b44b7a65..30df6187ee02 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -4,6 +4,7 @@ #include "session.h" #include "sort.h" #include "evsel.h" +#include "annotate.h" #include <math.h> static bool hists__filter_entry_by_dso(struct hists *hists, @@ -127,6 +128,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) + unresolved_col_width + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); + hists__new_col_len(hists, HISTC_MEM_DCACHELINE, + symlen + 1); } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, @@ -231,6 +234,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) return true; he_stat__decay(&he->stat); + if (symbol_conf.cumulate_callchain) + he_stat__decay(he->stat_acc); diff = prev_period - he->stat.period; @@ -276,14 +281,31 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) * histogram, sorted on item, collects periods */ -static struct hist_entry *hist_entry__new(struct hist_entry *template) +static struct hist_entry *hist_entry__new(struct hist_entry *template, + bool sample_self) { - size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; - struct hist_entry *he = zalloc(sizeof(*he) + callchain_size); + size_t callchain_size = 0; + struct hist_entry *he; + + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) + callchain_size = sizeof(struct callchain_root); + + he = zalloc(sizeof(*he) + callchain_size); if (he != NULL) { *he = *template; + if (symbol_conf.cumulate_callchain) { + he->stat_acc = malloc(sizeof(he->stat)); + if (he->stat_acc == NULL) { + free(he); + return NULL; + } + memcpy(he->stat_acc, &he->stat, sizeof(he->stat)); + if (!sample_self) + memset(&he->stat, 0, sizeof(he->stat)); + } + if (he->ms.map) he->ms.map->referenced = true; @@ -295,6 +317,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) */ he->branch_info = malloc(sizeof(*he->branch_info)); if (he->branch_info == NULL) { + free(he->stat_acc); free(he); return NULL; } @@ -333,7 +356,8 @@ static u8 symbol__parent_filter(const struct symbol *parent) static struct hist_entry *add_hist_entry(struct hists *hists, struct hist_entry *entry, - struct addr_location *al) + struct addr_location *al, + bool sample_self) { struct rb_node **p; struct rb_node *parent = NULL; @@ -357,7 +381,10 @@ static struct hist_entry *add_hist_entry(struct hists *hists, cmp = hist_entry__cmp(he, entry); if (!cmp) { - he_stat__add_period(&he->stat, period, weight); + if (sample_self) + he_stat__add_period(&he->stat, period, weight); + if (symbol_conf.cumulate_callchain) + he_stat__add_period(he->stat_acc, period, weight); /* * This mem info was allocated from sample__resolve_mem @@ -385,14 +412,17 @@ static struct hist_entry *add_hist_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(entry); + he = hist_entry__new(entry, sample_self); if (!he) return NULL; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); out: - he_stat__add_cpumode_period(&he->stat, al->cpumode, period); + if (sample_self) + he_stat__add_cpumode_period(&he->stat, al->cpumode, period); + if (symbol_conf.cumulate_callchain) + he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); return he; } @@ -401,7 +431,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, - u64 period, u64 weight, u64 transaction) + u64 period, u64 weight, u64 transaction, + bool sample_self) { struct hist_entry entry = { .thread = al->thread, @@ -410,9 +441,10 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .map = al->map, .sym = al->sym, }, - .cpu = al->cpu, - .ip = al->addr, - .level = al->level, + .cpu = al->cpu, + .cpumode = al->cpumode, + .ip = al->addr, + .level = al->level, .stat = { .nr_events = 1, .period = period, @@ -426,7 +458,429 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .transaction = transaction, }; - return add_hist_entry(hists, &entry, al); + return add_hist_entry(hists, &entry, al, sample_self); +} + +static int +iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct perf_sample *sample = iter->sample; + struct mem_info *mi; + + mi = sample__resolve_mem(sample, al); + if (mi == NULL) + return -ENOMEM; + + iter->priv = mi; + return 0; +} + +static int +iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + u64 cost; + struct mem_info *mi = iter->priv; + struct hist_entry *he; + + if (mi == NULL) + return -EINVAL; + + cost = iter->sample->weight; + if (!cost) + cost = 1; + + /* + * must pass period=weight in order to get the correct + * sorting from hists__collapse_resort() which is solely + * based on periods. We want sorting be done on nr_events * weight + * and this is indirectly achieved by passing period=weight here + * and the he_stat__add_period() function. + */ + he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi, + cost, cost, 0, true); + if (!he) + return -ENOMEM; + + iter->he = he; + return 0; +} + +static int +iter_finish_mem_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + struct perf_evsel *evsel = iter->evsel; + struct hist_entry *he = iter->he; + int err = -EINVAL; + + if (he == NULL) + goto out; + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + err = hist_entry__append_callchain(he, iter->sample); + +out: + /* + * We don't need to free iter->priv (mem_info) here since + * the mem info was either already freed in add_hist_entry() or + * passed to a new hist entry by hist_entry__new(). + */ + iter->priv = NULL; + + iter->he = NULL; + return err; +} + +static int +iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi; + struct perf_sample *sample = iter->sample; + + bi = sample__resolve_bstack(sample, al); + if (!bi) + return -ENOMEM; + + iter->curr = 0; + iter->total = sample->branch_stack->nr; + + iter->priv = bi; + return 0; +} + +static int +iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + /* to avoid calling callback function */ + iter->he = NULL; + + return 0; +} + +static int +iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi = iter->priv; + int i = iter->curr; + + if (bi == NULL) + return 0; + + if (iter->curr >= iter->total) + return 0; + + al->map = bi[i].to.map; + al->sym = bi[i].to.sym; + al->addr = bi[i].to.addr; + return 1; +} + +static int +iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi; + struct perf_evsel *evsel = iter->evsel; + struct hist_entry *he = NULL; + int i = iter->curr; + int err = 0; + + bi = iter->priv; + + if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) + goto out; + + /* + * The report shows the percentage of total branches captured + * and not events sampled. Thus we use a pseudo period of 1. + */ + he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL, + 1, 1, 0, true); + if (he == NULL) + return -ENOMEM; + + hists__inc_nr_samples(&evsel->hists, he->filtered); + +out: + iter->he = he; + iter->curr++; + return err; +} + +static int +iter_finish_branch_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + zfree(&iter->priv); + iter->he = NULL; + + return iter->curr >= iter->total ? 0 : -1; +} + +static int +iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry *he; + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction, true); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + return 0; +} + +static int +iter_finish_normal_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + + if (he == NULL) + return 0; + + iter->he = NULL; + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + return hist_entry__append_callchain(he, sample); +} + +static int +iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + struct hist_entry **he_cache; + + callchain_cursor_commit(&callchain_cursor); + + /* + * This is for detecting cycles or recursions so that they're + * cumulated only one time to prevent entries more than 100% + * overhead. + */ + he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1)); + if (he_cache == NULL) + return -ENOMEM; + + iter->priv = he_cache; + iter->curr = 0; + + return 0; +} + +static int +iter_add_single_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry **he_cache = iter->priv; + struct hist_entry *he; + int err = 0; + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction, true); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + he_cache[iter->curr++] = he; + + callchain_append(he->callchain, &callchain_cursor, sample->period); + + /* + * We need to re-initialize the cursor since callchain_append() + * advanced the cursor to the end. + */ + callchain_cursor_commit(&callchain_cursor); + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + return err; +} + +static int +iter_next_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct callchain_cursor_node *node; + + node = callchain_cursor_current(&callchain_cursor); + if (node == NULL) + return 0; + + return fill_callchain_info(al, node, iter->hide_unresolved); +} + +static int +iter_add_next_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry **he_cache = iter->priv; + struct hist_entry *he; + struct hist_entry he_tmp = { + .cpu = al->cpu, + .thread = al->thread, + .comm = thread__comm(al->thread), + .ip = al->addr, + .ms = { + .map = al->map, + .sym = al->sym, + }, + .parent = iter->parent, + }; + int i; + struct callchain_cursor cursor; + + callchain_cursor_snapshot(&cursor, &callchain_cursor); + + callchain_cursor_advance(&callchain_cursor); + + /* + * Check if there's duplicate entries in the callchain. + * It's possible that it has cycles or recursive calls. + */ + for (i = 0; i < iter->curr; i++) { + if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) { + /* to avoid calling callback function */ + iter->he = NULL; + return 0; + } + } + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction, false); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + he_cache[iter->curr++] = he; + + callchain_append(he->callchain, &cursor, sample->period); + return 0; +} + +static int +iter_finish_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + zfree(&iter->priv); + iter->he = NULL; + + return 0; +} + +const struct hist_iter_ops hist_iter_mem = { + .prepare_entry = iter_prepare_mem_entry, + .add_single_entry = iter_add_single_mem_entry, + .next_entry = iter_next_nop_entry, + .add_next_entry = iter_add_next_nop_entry, + .finish_entry = iter_finish_mem_entry, +}; + +const struct hist_iter_ops hist_iter_branch = { + .prepare_entry = iter_prepare_branch_entry, + .add_single_entry = iter_add_single_branch_entry, + .next_entry = iter_next_branch_entry, + .add_next_entry = iter_add_next_branch_entry, + .finish_entry = iter_finish_branch_entry, +}; + +const struct hist_iter_ops hist_iter_normal = { + .prepare_entry = iter_prepare_normal_entry, + .add_single_entry = iter_add_single_normal_entry, + .next_entry = iter_next_nop_entry, + .add_next_entry = iter_add_next_nop_entry, + .finish_entry = iter_finish_normal_entry, +}; + +const struct hist_iter_ops hist_iter_cumulative = { + .prepare_entry = iter_prepare_cumulative_entry, + .add_single_entry = iter_add_single_cumulative_entry, + .next_entry = iter_next_cumulative_entry, + .add_next_entry = iter_add_next_cumulative_entry, + .finish_entry = iter_finish_cumulative_entry, +}; + +int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, + struct perf_evsel *evsel, struct perf_sample *sample, + int max_stack_depth, void *arg) +{ + int err, err2; + + err = sample__resolve_callchain(sample, &iter->parent, evsel, al, + max_stack_depth); + if (err) + return err; + + iter->evsel = evsel; + iter->sample = sample; + + err = iter->ops->prepare_entry(iter, al); + if (err) + goto out; + + err = iter->ops->add_single_entry(iter, al); + if (err) + goto out; + + if (iter->he && iter->add_entry_cb) { + err = iter->add_entry_cb(iter, al, true, arg); + if (err) + goto out; + } + + while (iter->ops->next_entry(iter, al)) { + err = iter->ops->add_next_entry(iter, al); + if (err) + break; + + if (iter->he && iter->add_entry_cb) { + err = iter->add_entry_cb(iter, al, false, arg); + if (err) + goto out; + } + } + +out: + err2 = iter->ops->finish_entry(iter, al); + if (!err) + err = err2; + + return err; } int64_t @@ -469,6 +923,7 @@ void hist_entry__free(struct hist_entry *he) { zfree(&he->branch_info); zfree(&he->mem_info); + zfree(&he->stat_acc); free_srcline(he->srcline); free(he); } @@ -494,6 +949,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + if (symbol_conf.cumulate_callchain) + he_stat__add_stat(iter->stat_acc, he->stat_acc); if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); @@ -800,6 +1257,13 @@ void hists__inc_nr_events(struct hists *hists, u32 type) events_stats__inc(&hists->stats, type); } +void hists__inc_nr_samples(struct hists *hists, bool filtered) +{ + events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE); + if (!filtered) + hists->stats.nr_non_filtered_samples++; +} + static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { @@ -831,7 +1295,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(pair); + he = hist_entry__new(pair, true); if (he) { memset(&he->stat, 0, sizeof(he->stat)); he->hists = hists; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index a8418d19808d..742f49a85725 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -72,6 +72,7 @@ enum hist_column { HISTC_MEM_TLB, HISTC_MEM_LVL, HISTC_MEM_SNOOP, + HISTC_MEM_DCACHELINE, HISTC_TRANSACTION, HISTC_NR_COLS, /* Last entry */ }; @@ -96,12 +97,50 @@ struct hists { u16 col_len[HISTC_NR_COLS]; }; +struct hist_entry_iter; + +struct hist_iter_ops { + int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *); + int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *); + int (*next_entry)(struct hist_entry_iter *, struct addr_location *); + int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *); + int (*finish_entry)(struct hist_entry_iter *, struct addr_location *); +}; + +struct hist_entry_iter { + int total; + int curr; + + bool hide_unresolved; + + struct perf_evsel *evsel; + struct perf_sample *sample; + struct hist_entry *he; + struct symbol *parent; + void *priv; + + const struct hist_iter_ops *ops; + /* user-defined callback function (optional) */ + int (*add_entry_cb)(struct hist_entry_iter *iter, + struct addr_location *al, bool single, void *arg); +}; + +extern const struct hist_iter_ops hist_iter_normal; +extern const struct hist_iter_ops hist_iter_branch; +extern const struct hist_iter_ops hist_iter_mem; +extern const struct hist_iter_ops hist_iter_cumulative; + struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *parent, struct branch_info *bi, struct mem_info *mi, u64 period, - u64 weight, u64 transaction); + u64 weight, u64 transaction, + bool sample_self); +int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, + struct perf_evsel *evsel, struct perf_sample *sample, + int max_stack_depth, void *arg); + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); @@ -119,6 +158,7 @@ u64 hists__total_period(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *hists, u32 type); +void hists__inc_nr_samples(struct hists *hists, bool filtered); void events_stats__inc(struct events_stats *stats, u32 type); size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); @@ -166,6 +206,7 @@ struct perf_hpp_fmt { struct list_head list; struct list_head sort_list; + bool elide; }; extern struct list_head perf_hpp__list; @@ -192,6 +233,7 @@ enum { PERF_HPP__OVERHEAD_US, PERF_HPP__OVERHEAD_GUEST_SYS, PERF_HPP__OVERHEAD_GUEST_US, + PERF_HPP__OVERHEAD_ACC, PERF_HPP__SAMPLES, PERF_HPP__PERIOD, @@ -200,7 +242,11 @@ enum { void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); +void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__column_enable(unsigned col); +void perf_hpp__column_disable(unsigned col); +void perf_hpp__cancel_cumulate(void); + void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); void perf_hpp__setup_output_field(void); void perf_hpp__reset_output_field(void); @@ -208,7 +254,12 @@ void perf_hpp__append_sort_keys(void); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); -bool perf_hpp__should_skip(struct perf_hpp_fmt *format); + +static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format) +{ + return format->elide; +} + void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists); typedef u64 (*hpp_field_fn)(struct hist_entry *he); @@ -218,6 +269,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...); int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent); +int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he, + hpp_field_fn get_field, const char *fmt, + hpp_snprint_fn print_fn, bool fmt_percent); static inline void advance_hpp(struct perf_hpp *hpp, int inc) { diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7409ac8de51c..0e5fea95d596 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1060,6 +1060,8 @@ int machine__process_mmap2_event(struct machine *machine, event->mmap2.pid, event->mmap2.maj, event->mmap2.min, event->mmap2.ino, event->mmap2.ino_generation, + event->mmap2.prot, + event->mmap2.flags, event->mmap2.filename, type); if (map == NULL) @@ -1105,7 +1107,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(&machine->user_dsos, event->mmap.start, event->mmap.len, event->mmap.pgoff, - event->mmap.pid, 0, 0, 0, 0, + event->mmap.pid, 0, 0, 0, 0, 0, 0, event->mmap.filename, type); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8ccbb32eda25..25c571f4cba6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -138,7 +138,7 @@ void map__init(struct map *map, enum map_type type, struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, char *filename, + u64 ino_gen, u32 prot, u32 flags, char *filename, enum map_type type) { struct map *map = malloc(sizeof(*map)); @@ -157,6 +157,8 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, map->min = d_min; map->ino = ino; map->ino_generation = ino_gen; + map->prot = prot; + map->flags = flags; if ((anon || no_dso) && type == MAP__FUNCTION) { snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index ae2d45110588..7758c72522ef 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -35,6 +35,8 @@ struct map { bool referenced; bool erange_warned; u32 priv; + u32 prot; + u32 flags; u64 pgoff; u64 reloc; u32 maj, min; /* only valid for MMAP2 record */ @@ -118,7 +120,7 @@ void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, + u64 ino_gen, u32 prot, u32 flags, char *filename, enum map_type type); struct map *map__new2(u64 start, struct dso *dso, enum map_type type); void map__delete(struct map *map); diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a3539ef30b15..43168fb0d9a2 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -1,11 +1,15 @@ #include <errno.h> #include "perf_regs.h" +#include "event.h" int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; u64 mask = regs->mask; + if (regs->cache_mask & (1 << id)) + goto out; + if (!(mask & (1 << id))) return -EINVAL; @@ -14,6 +18,10 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) idx++; } - *valp = regs->regs[idx]; + regs->cache_mask |= (1 << id); + regs->cache_regs[id] = regs->regs[idx]; + +out: + *valp = regs->cache_regs[id]; return 0; } diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 79c78f74e0cf..980dbf76bc98 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -2,7 +2,8 @@ #define __PERF_REGS_H #include <linux/types.h> -#include "event.h" + +struct regs_dump; #ifdef HAVE_PERF_REGS_SUPPORT #include <perf_regs.h> @@ -11,6 +12,7 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); #else #define PERF_REGS_MASK 0 +#define PERF_REGS_MAX 0 static inline const char *perf_reg_name(int id __maybe_unused) { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0d1542f33d87..9a0a1839a377 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -628,11 +628,11 @@ static int __show_line_range(struct line_range *lr, const char *module) ret = debuginfo__find_line_range(dinfo, lr); debuginfo__delete(dinfo); - if (ret == 0) { + if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); return -ENOENT; } else if (ret < 0) { - pr_warning("Debuginfo analysis failed. (%d)\n", ret); + pr_warning("Debuginfo analysis failed.\n"); return ret; } @@ -641,7 +641,7 @@ static int __show_line_range(struct line_range *lr, const char *module) ret = get_real_path(tmp, lr->comp_dir, &lr->path); free(tmp); /* Free old path */ if (ret < 0) { - pr_warning("Failed to find source file. (%d)\n", ret); + pr_warning("Failed to find source file path.\n"); return ret; } @@ -721,9 +721,14 @@ static int show_available_vars_at(struct debuginfo *dinfo, ret = debuginfo__find_available_vars_at(dinfo, pev, &vls, max_vls, externs); if (ret <= 0) { - pr_err("Failed to find variables at %s (%d)\n", buf, ret); + if (ret == 0 || ret == -ENOENT) { + pr_err("Failed to find the address of %s\n", buf); + ret = -ENOENT; + } else + pr_warning("Debuginfo analysis failed.\n"); goto end; } + /* Some variables are found */ fprintf(stdout, "Available variables at %s\n", buf); for (i = 0; i < ret; i++) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 9d8eb26f0533..98e304766416 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -573,14 +573,13 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) { /* Search again in global variables */ if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) + pr_warning("Failed to find '%s' in this function.\n", + pf->pvar->var); ret = -ENOENT; } if (ret >= 0) ret = convert_variable(&vr_die, pf); - if (ret < 0) - pr_warning("Failed to find '%s' in this function.\n", - pf->pvar->var); return ret; } @@ -1281,7 +1280,11 @@ out: return ret; } -/* Find available variables at given probe point */ +/* + * Find available variables at given probe point + * Return the number of found probe points. Return 0 if there is no + * matched probe point. Return <0 if an error occurs. + */ int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct perf_probe_event *pev, struct variable_list **vls, diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index e108207c5de0..af7da565a750 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -215,6 +215,7 @@ static void define_event_symbols(struct event_format *event, case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: case PRINT_STRING: + case PRINT_BITMASK: break; case PRINT_TYPE: define_event_symbols(event, ev_name, args->typecast.item); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index cd9774df3750..1c419321f707 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -197,6 +197,7 @@ static void define_event_symbols(struct event_format *event, case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: case PRINT_FUNC: + case PRINT_BITMASK: /* we should warn... */ return; } @@ -622,6 +623,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "%s=", f->name); if (f->flags & FIELD_IS_STRING || f->flags & FIELD_IS_FLAG || + f->flags & FIELD_IS_ARRAY || f->flags & FIELD_IS_SYMBOLIC) fprintf(ofp, "%%s"); else if (f->flags & FIELD_IS_SIGNED) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 901b9bece2ee..1ec57dd82284 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1,3 +1,4 @@ +#include <sys/mman.h> #include "sort.h" #include "hist.h" #include "comm.h" @@ -784,6 +785,104 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*s", width, out); } +static inline u64 cl_address(u64 address) +{ + /* return the cacheline of the address */ + return (address & ~(cacheline_size - 1)); +} + +static int64_t +sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) +{ + u64 l, r; + struct map *l_map, *r_map; + + if (!left->mem_info) return -1; + if (!right->mem_info) return 1; + + /* group event types together */ + if (left->cpumode > right->cpumode) return -1; + if (left->cpumode < right->cpumode) return 1; + + l_map = left->mem_info->daddr.map; + r_map = right->mem_info->daddr.map; + + /* if both are NULL, jump to sort on al_addr instead */ + if (!l_map && !r_map) + goto addr; + + if (!l_map) return -1; + if (!r_map) return 1; + + if (l_map->maj > r_map->maj) return -1; + if (l_map->maj < r_map->maj) return 1; + + if (l_map->min > r_map->min) return -1; + if (l_map->min < r_map->min) return 1; + + if (l_map->ino > r_map->ino) return -1; + if (l_map->ino < r_map->ino) return 1; + + if (l_map->ino_generation > r_map->ino_generation) return -1; + if (l_map->ino_generation < r_map->ino_generation) return 1; + + /* + * Addresses with no major/minor numbers are assumed to be + * anonymous in userspace. Sort those on pid then address. + * + * The kernel and non-zero major/minor mapped areas are + * assumed to be unity mapped. Sort those on address. + */ + + if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && + (!(l_map->flags & MAP_SHARED)) && + !l_map->maj && !l_map->min && !l_map->ino && + !l_map->ino_generation) { + /* userspace anonymous */ + + if (left->thread->pid_ > right->thread->pid_) return -1; + if (left->thread->pid_ < right->thread->pid_) return 1; + } + +addr: + /* al_addr does all the right addr - start + offset calculations */ + l = cl_address(left->mem_info->daddr.al_addr); + r = cl_address(right->mem_info->daddr.al_addr); + + if (l > r) return -1; + if (l < r) return 1; + + return 0; +} + +static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + + uint64_t addr = 0; + struct map *map = NULL; + struct symbol *sym = NULL; + char level = he->level; + + if (he->mem_info) { + addr = cl_address(he->mem_info->daddr.al_addr); + map = he->mem_info->daddr.map; + sym = he->mem_info->daddr.sym; + + /* print [s] for shared data mmaps */ + if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && + map && (map->type == MAP__VARIABLE) && + (map->flags & MAP_SHARED) && + (map->maj || map->min || map->ino || + map->ino_generation)) + level = 's'; + else if (!map) + level = 'X'; + } + return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size, + width); +} + struct sort_entry sort_mispredict = { .se_header = "Branch Mispredicted", .se_cmp = sort__mispredict_cmp, @@ -876,6 +975,13 @@ struct sort_entry sort_mem_snoop = { .se_width_idx = HISTC_MEM_SNOOP, }; +struct sort_entry sort_mem_dcacheline = { + .se_header = "Data Cacheline", + .se_cmp = sort__dcacheline_cmp, + .se_snprintf = hist_entry__dcacheline_snprintf, + .se_width_idx = HISTC_MEM_DCACHELINE, +}; + static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1043,6 +1149,7 @@ static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb), DIM(SORT_MEM_LVL, "mem", sort_mem_lvl), DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), + DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), }; #undef DIM @@ -1061,6 +1168,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), + DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"), DIM(PERF_HPP__SAMPLES, "sample"), DIM(PERF_HPP__PERIOD, "period"), }; @@ -1156,6 +1264,7 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); + hse->hpp.elide = false; return hse; } @@ -1363,27 +1472,64 @@ static int __setup_sorting(void) return ret; } -bool perf_hpp__should_skip(struct perf_hpp_fmt *format) +void perf_hpp__set_elide(int idx, bool elide) { - if (perf_hpp__is_sort_entry(format)) { - struct hpp_sort_entry *hse; + struct perf_hpp_fmt *fmt; + struct hpp_sort_entry *hse; + + perf_hpp__for_each_format(fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; - hse = container_of(format, struct hpp_sort_entry, hpp); - return hse->se->elide; + hse = container_of(fmt, struct hpp_sort_entry, hpp); + if (hse->se->se_width_idx == idx) { + fmt->elide = elide; + break; + } } - return false; } -static void sort_entry__setup_elide(struct sort_entry *se, - struct strlist *list, - const char *list_name, FILE *fp) +static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp) { if (list && strlist__nr_entries(list) == 1) { if (fp != NULL) fprintf(fp, "# %s: %s\n", list_name, strlist__entry(list, 0)->s); - se->elide = true; + return true; } + return false; +} + +static bool get_elide(int idx, FILE *output) +{ + switch (idx) { + case HISTC_SYMBOL: + return __get_elide(symbol_conf.sym_list, "symbol", output); + case HISTC_DSO: + return __get_elide(symbol_conf.dso_list, "dso", output); + case HISTC_COMM: + return __get_elide(symbol_conf.comm_list, "comm", output); + default: + break; + } + + if (sort__mode != SORT_MODE__BRANCH) + return false; + + switch (idx) { + case HISTC_SYMBOL_FROM: + return __get_elide(symbol_conf.sym_from_list, "sym_from", output); + case HISTC_SYMBOL_TO: + return __get_elide(symbol_conf.sym_to_list, "sym_to", output); + case HISTC_DSO_FROM: + return __get_elide(symbol_conf.dso_from_list, "dso_from", output); + case HISTC_DSO_TO: + return __get_elide(symbol_conf.dso_to_list, "dso_to", output); + default: + break; + } + + return false; } void sort__setup_elide(FILE *output) @@ -1391,39 +1537,12 @@ void sort__setup_elide(FILE *output) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "dso", output); - sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, - "comm", output); - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, - "symbol", output); - - if (sort__mode == SORT_MODE__BRANCH) { - sort_entry__setup_elide(&sort_dso_from, - symbol_conf.dso_from_list, - "dso_from", output); - sort_entry__setup_elide(&sort_dso_to, - symbol_conf.dso_to_list, - "dso_to", output); - sort_entry__setup_elide(&sort_sym_from, - symbol_conf.sym_from_list, - "sym_from", output); - sort_entry__setup_elide(&sort_sym_to, - symbol_conf.sym_to_list, - "sym_to", output); - } else if (sort__mode == SORT_MODE__MEMORY) { - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "symbol_daddr", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "dso_daddr", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "mem", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "local_weight", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "tlb", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "snoop", output); + perf_hpp__for_each_format(fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + fmt->elide = get_elide(hse->se->se_width_idx, output); } /* @@ -1434,8 +1553,7 @@ void sort__setup_elide(FILE *output) if (!perf_hpp__is_sort_entry(fmt)) continue; - hse = container_of(fmt, struct hpp_sort_entry, hpp); - if (!hse->se->elide) + if (!fmt->elide) return; } @@ -1443,8 +1561,7 @@ void sort__setup_elide(FILE *output) if (!perf_hpp__is_sort_entry(fmt)) continue; - hse = container_of(fmt, struct hpp_sort_entry, hpp); - hse->se->elide = false; + fmt->elide = false; } } @@ -1581,6 +1698,9 @@ void reset_output_field(void) sort__has_sym = 0; sort__has_dso = 0; + field_order = NULL; + sort_order = NULL; + reset_dimensions(); perf_hpp__reset_output_field(); } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 5f38d925e92f..041f0c9cea2b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -20,7 +20,7 @@ #include "parse-options.h" #include "parse-events.h" - +#include "hist.h" #include "thread.h" extern regex_t parent_regex; @@ -82,12 +82,14 @@ struct hist_entry { struct list_head head; } pairs; struct he_stat stat; + struct he_stat *stat_acc; struct map_symbol ms; struct thread *thread; struct comm *comm; u64 ip; u64 transaction; s32 cpu; + u8 cpumode; struct hist_entry_diff diff; @@ -130,6 +132,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair, list_add_tail(&pair->pairs.node, &he->pairs.head); } +static inline float hist_entry__get_percent_limit(struct hist_entry *he) +{ + u64 period = he->stat.period; + u64 total_period = hists__total_period(he->hists); + + if (unlikely(total_period == 0)) + return 0; + + if (symbol_conf.cumulate_callchain) + period = he->stat_acc->period; + + return period * 100.0 / total_period; +} + + enum sort_mode { SORT_MODE__NORMAL, SORT_MODE__BRANCH, @@ -169,6 +186,7 @@ enum sort_type { SORT_MEM_TLB, SORT_MEM_LVL, SORT_MEM_SNOOP, + SORT_MEM_DCACHELINE, }; /* @@ -186,7 +204,6 @@ struct sort_entry { int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, unsigned int width); u8 se_width_idx; - bool elide; }; extern struct sort_entry sort_thread; @@ -197,6 +214,7 @@ int setup_output_field(void); void reset_output_field(void); extern int sort_dimension__add(const char *); void sort__setup_elide(FILE *fp); +void perf_hpp__set_elide(int idx, bool elide); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 95e249779931..7b9096f29cdb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -29,11 +29,12 @@ int vmlinux_path__nr_entries; char **vmlinux_path; struct symbol_conf symbol_conf = { - .use_modules = true, - .try_vmlinux_path = true, - .annotate_src = true, - .demangle = true, - .symfs = "", + .use_modules = true, + .try_vmlinux_path = true, + .annotate_src = true, + .demangle = true, + .cumulate_callchain = true, + .symfs = "", }; static enum dso_binary_type binary_type_symtab[] = { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 33ede53fa6b9..615c752dd767 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -109,6 +109,7 @@ struct symbol_conf { show_nr_samples, show_total_period, use_callchain, + cumulate_callchain, exclude_other, show_cpu_utilization, initialized, diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index bd5768d74f01..25578b98f5c5 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -250,7 +250,6 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, /* Check the .eh_frame section for unwinding info */ offset = elf_section_offset(fd, ".eh_frame_hdr"); - close(fd); if (offset) ret = unwind_spec_ehframe(dso, machine, offset, @@ -271,7 +270,6 @@ static int read_unwind_spec_debug_frame(struct dso *dso, /* Check the .debug_frame section for unwinding info */ *offset = elf_section_offset(fd, ".debug_frame"); - close(fd); if (*offset) return 0; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7fff6be07f07..95aefa78bb07 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -17,6 +17,7 @@ * XXX We need to find a better place for these things... */ unsigned int page_size; +int cacheline_size; bool test_attr__enabled; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b03da44e94e4..66864364ccb4 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -304,6 +304,7 @@ char *rtrim(char *s); void dump_stack(void); extern unsigned int page_size; +extern int cacheline_size; void get_term_dimensions(struct winsize *ws); |