diff options
Diffstat (limited to 'tools/perf/util')
96 files changed, 3740 insertions, 2805 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f9df1df1eec0..96f4ea1d45c5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,4 +1,5 @@ perf-y += arm64-frame-pointer-unwind-support.o +perf-y += addr_location.o perf-y += annotate.o perf-y += block-info.o perf-y += block-range.o @@ -11,7 +12,6 @@ perf-y += db-export.o perf-y += env.o perf-y += event.o perf-y += evlist.o -perf-y += evlist-hybrid.o perf-y += sideband_evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o @@ -24,7 +24,6 @@ perf-y += llvm-utils.o perf-y += mmap.o perf-y += memswap.o perf-y += parse-events.o -perf-y += parse-events-hybrid.o perf-y += print-events.o perf-y += tracepoint.o perf-y += perf_regs.o @@ -75,7 +74,6 @@ perf-y += pmu.o perf-y += pmus.o perf-y += pmu-flex.o perf-y += pmu-bison.o -perf-y += pmu-hybrid.o perf-y += svghelper.o perf-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o perf-y += trace-event-scripting.o @@ -147,6 +145,7 @@ perf-y += mem2node.o perf-y += clockid.o perf-y += list_sort.o perf-y += mutex.o +perf-y += sharded_mutex.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c new file mode 100644 index 000000000000..51825ef8c0ab --- /dev/null +++ b/tools/perf/util/addr_location.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "addr_location.h" +#include "map.h" +#include "maps.h" +#include "thread.h" + +void addr_location__init(struct addr_location *al) +{ + al->thread = NULL; + al->maps = NULL; + al->map = NULL; + al->sym = NULL; + al->srcline = NULL; + al->addr = 0; + al->level = 0; + al->filtered = 0; + al->cpumode = 0; + al->cpu = 0; + al->socket = 0; +} + +/* + * The preprocess_sample method will return with reference counts for the + * in it, when done using (and perhaps getting ref counts if needing to + * keep a pointer to one of those entries) it must be paired with + * addr_location__put(), so that the refcounts can be decremented. + */ +void addr_location__exit(struct addr_location *al) +{ + map__zput(al->map); + thread__zput(al->thread); + maps__zput(al->maps); +} + +void addr_location__copy(struct addr_location *dst, struct addr_location *src) +{ + thread__put(dst->thread); + maps__put(dst->maps); + map__put(dst->map); + *dst = *src; + dst->thread = thread__get(src->thread); + dst->maps = maps__get(src->maps); + dst->map = map__get(src->map); +} diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h new file mode 100644 index 000000000000..d8ac0428dff2 --- /dev/null +++ b/tools/perf/util/addr_location.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ADDR_LOCATION +#define __PERF_ADDR_LOCATION 1 + +#include <linux/types.h> + +struct thread; +struct maps; +struct map; +struct symbol; + +struct addr_location { + struct thread *thread; + struct maps *maps; + struct map *map; + struct symbol *sym; + const char *srcline; + u64 addr; + char level; + u8 filtered; + u8 cpumode; + s32 cpu; + s32 socket; +}; + +void addr_location__init(struct addr_location *al); +void addr_location__exit(struct addr_location *al); + +void addr_location__copy(struct addr_location *dst, struct addr_location *src); + +#endif /* __PERF_ADDR_LOCATION */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 11992cfe271c..ba988a13dacb 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -32,6 +32,7 @@ #include "block-range.h" #include "string2.h" #include "util/event.h" +#include "util/sharded_mutex.h" #include "arch/common.h" #include "namespaces.h" #include <regex.h> @@ -61,6 +62,10 @@ static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); +static int call__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); +static int jump__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); struct arch { const char *name; @@ -70,6 +75,7 @@ struct arch { struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name); bool sorted_instructions; bool initialized; + const char *insn_suffix; void *priv; unsigned int model; unsigned int family; @@ -179,6 +185,7 @@ static struct arch architectures[] = { .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), + .insn_suffix = "bwlq", .objdump = { .comment_char = '#', }, @@ -321,7 +328,7 @@ static struct ins_ops call_ops = { bool ins__is_call(const struct ins *ins) { - return ins->ops == &call_ops || ins->ops == &s390_call_ops; + return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; } /* @@ -462,7 +469,7 @@ static struct ins_ops jump_ops = { bool ins__is_jump(const struct ins *ins) { - return ins->ops == &jump_ops; + return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; } static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) @@ -558,13 +565,26 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy return -1; *s = '\0'; + + /* + * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) + * then it needs to have the closing parenthesis. + */ + if (strchr(ops->raw, '(')) { + *s = ','; + s = strchr(ops->raw, ')'); + if (s == NULL || s[1] != ',') + return -1; + *++s = '\0'; + } + ops->source.raw = strdup(ops->raw); *s = ','; if (ops->source.raw == NULL) return -1; - target = ++s; + target = skip_spaces(++s); comment = strchr(s, arch->objdump.comment_char); if (comment != NULL) @@ -707,6 +727,26 @@ static struct ins_ops *__ins__find(struct arch *arch, const char *name) } ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + if (ins) + return ins->ops; + + if (arch->insn_suffix) { + char tmp[32]; + char suffix; + size_t len = strlen(name); + + if (len == 0 || len >= sizeof(tmp)) + return NULL; + + suffix = name[len - 1]; + if (strchr(arch->insn_suffix, suffix) == NULL) + return NULL; + + strcpy(tmp, name); + tmp[len - 1] = '\0'; /* remove the suffix and check again */ + + ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + } return ins ? ins->ops : NULL; } @@ -821,7 +861,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); - mutex_lock(¬es->lock); + annotation__lock(notes); if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); @@ -829,7 +869,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) memset(notes->src->cycles_hist, 0, symbol__size(sym) * sizeof(struct cyc_hist)); } - mutex_unlock(¬es->lock); + annotation__unlock(notes); } static int __symbol__account_cycles(struct cyc_hist *ch, @@ -1086,7 +1126,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) notes->hit_insn = 0; notes->cover_insn = 0; - mutex_lock(¬es->lock); + annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; @@ -1105,7 +1145,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) notes->have_cycles = true; } } - mutex_unlock(¬es->lock); + annotation__unlock(notes); } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1183,7 +1223,7 @@ static void annotation_line__init(struct annotation_line *al, static void annotation_line__exit(struct annotation_line *al) { - free_srcline(al->path); + zfree_srcline(&al->path); zfree(&al->line); } @@ -1256,17 +1296,64 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); } -void annotation__init(struct annotation *notes) +void annotation__exit(struct annotation *notes) +{ + annotated_source__delete(notes->src); +} + +static struct sharded_mutex *sharded_mutex; + +static void annotation__init_sharded_mutex(void) { - mutex_init(¬es->lock); + /* As many mutexes as there are CPUs. */ + sharded_mutex = sharded_mutex__new(cpu__max_present_cpu().cpu); } -void annotation__exit(struct annotation *notes) +static size_t annotation__hash(const struct annotation *notes) { - annotated_source__delete(notes->src); - mutex_destroy(¬es->lock); + return (size_t)notes; } +static struct mutex *annotation__get_mutex(const struct annotation *notes) +{ + static pthread_once_t once = PTHREAD_ONCE_INIT; + + pthread_once(&once, annotation__init_sharded_mutex); + if (!sharded_mutex) + return NULL; + + return sharded_mutex__get_mutex(sharded_mutex, annotation__hash(notes)); +} + +void annotation__lock(struct annotation *notes) + NO_THREAD_SAFETY_ANALYSIS +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (mutex) + mutex_lock(mutex); +} + +void annotation__unlock(struct annotation *notes) + NO_THREAD_SAFETY_ANALYSIS +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (mutex) + mutex_unlock(mutex); +} + +bool annotation__trylock(struct annotation *notes) +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (!mutex) + return false; + + return mutex_trylock(mutex); +} + + static void annotation_line__add(struct annotation_line *al, struct list_head *head) { list_add_tail(&al->node, head); @@ -1511,6 +1598,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); + free(*fileloc); *fileloc = strdup(parsed_line); return 0; } @@ -1559,7 +1647,6 @@ static int symbol__parse_objdump_line(struct symbol *sym, } annotation_line__add(&dl->al, ¬es->src->source); - return 0; } @@ -1696,7 +1783,10 @@ fallback: * cache, or is just a kallsyms file, well, lets hope that this * DSO is the same as when 'perf record' ran. */ - __symbol__join_symfs(filename, filename_size, dso->long_name); + if (dso->kernel && dso->long_name[0] == '/') + snprintf(filename, filename_size, "%s", dso->long_name); + else + __symbol__join_symfs(filename, filename_size, dso->long_name); mutex_lock(&dso->lock); if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) { @@ -2101,6 +2191,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) nline++; } free(line); + free(fileloc); err = finish_command(&objdump_process); if (err) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 1c6335b8333a..962780559176 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -271,8 +271,7 @@ struct annotated_source { struct sym_hist *histograms; }; -struct annotation { - struct mutex lock; +struct LOCKABLE annotation { u64 max_coverage; u64 start; u64 hit_cycles; @@ -298,9 +297,15 @@ struct annotation { struct annotated_source *src; }; -void annotation__init(struct annotation *notes); +static inline void annotation__init(struct annotation *notes __maybe_unused) +{ +} void annotation__exit(struct annotation *notes); +void annotation__lock(struct annotation *notes) EXCLUSIVE_LOCK_FUNCTION(*notes); +void annotation__unlock(struct annotation *notes) UNLOCK_FUNCTION(*notes); +bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(true, *notes); + static inline int annotation__cycles_width(struct annotation *notes) { if (notes->have_cycles && notes->options->show_minmax_cycle) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 7b36ba6b4079..afbd5869f6bf 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -254,9 +254,9 @@ static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, } if (speq->thread) { - speq->pid = speq->thread->pid_; + speq->pid = thread__pid(speq->thread); if (queue->cpu == -1) - speq->cpu = speq->thread->cpu; + speq->cpu = thread__cpu(speq->thread); } } diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 16a7b4adcf18..591fc1edd385 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -296,8 +296,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && - (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { + if (start_line != SRCLINE_UNKNOWN && + end_line != SRCLINE_UNKNOWN) { scnprintf(buf, sizeof(buf), "[%s -> %s]", start_line, end_line); } else { @@ -305,8 +305,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, bi->start, bi->end); } - free_srcline(start_line); - free_srcline(end_line); + zfree_srcline(&start_line); + zfree_srcline(&end_line); return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); } diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 06a8cd88cbef..36728222a5b4 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -58,9 +58,11 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, return -1; } + addr_location__init(&al); if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) map__dso(al.map)->hit = 1; + addr_location__exit(&al); thread__put(thread); return 0; } diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 9f2e36ef5072..0b61840d4226 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -26,6 +26,6 @@ static inline int is_absolute_path(const char *path) return path[0] == '/'; } -char *mkpath(const char *fmt, ...) __printf(1, 2); +char *mkpath(char *path_buf, size_t sz, const char *fmt, ...) __printf(3, 4); #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index b0dafc758173..aee937d14fbb 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -58,7 +58,8 @@ struct callchain_param callchain_param_default = { CALLCHAIN_PARAM_DEFAULT }; -__thread struct callchain_cursor callchain_cursor; +/* Used for thread-local struct callchain_cursor. */ +static pthread_key_t callchain_cursor; int parse_callchain_record_opt(const char *arg, struct callchain_param *param) { @@ -590,6 +591,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->ip = cursor_node->ip; call->ms = cursor_node->ms; call->ms.map = map__get(call->ms.map); + call->ms.maps = maps__get(call->ms.maps); call->srcline = cursor_node->srcline; if (cursor_node->branch) { @@ -649,6 +651,7 @@ add_child(struct callchain_node *parent, list_for_each_entry_safe(call, tmp, &new->val, list) { list_del_init(&call->list); map__zput(call->ms.map); + maps__zput(call->ms.maps); free(call); } free(new); @@ -984,6 +987,9 @@ int callchain_append(struct callchain_root *root, struct callchain_cursor *cursor, u64 period) { + if (cursor == NULL) + return -1; + if (!cursor->nr) return 0; @@ -1010,10 +1016,16 @@ merge_chain_branch(struct callchain_cursor *cursor, int err = 0; list_for_each_entry_safe(list, next_list, &src->val, list) { - callchain_cursor_append(cursor, list->ip, &list->ms, - false, NULL, 0, 0, 0, list->srcline); + struct map_symbol ms = { + .maps = maps__get(list->ms.maps), + .map = map__get(list->ms.map), + }; + callchain_cursor_append(cursor, list->ip, &ms, false, NULL, 0, 0, 0, list->srcline); list_del_init(&list->list); + map__zput(ms.map); + maps__zput(ms.maps); map__zput(list->ms.map); + maps__zput(list->ms.maps); free(list); } @@ -1065,9 +1077,11 @@ int callchain_cursor_append(struct callchain_cursor *cursor, } node->ip = ip; + maps__zput(node->ms.maps); map__zput(node->ms.map); node->ms = *ms; - node->ms.map = map__get(node->ms.map); + node->ms.maps = maps__get(ms->maps); + node->ms.map = map__get(ms->map); node->branch = branch; node->nr_loop_iter = nr_loop_iter; node->iter_cycles = iter_cycles; @@ -1106,7 +1120,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp if ((!symbol_conf.use_callchain || sample->callchain == NULL) && !symbol_conf.show_branchflag_count) return 0; - return callchain_append(he->callchain, &callchain_cursor, sample->period); + return callchain_append(he->callchain, get_tls_callchain_cursor(), sample->period); } int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, @@ -1114,7 +1128,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * { struct machine *machine = maps__machine(node->ms.maps); - al->maps = node->ms.maps; + maps__put(al->maps); + al->maps = maps__get(node->ms.maps); map__put(al->map); al->map = map__get(node->ms.map); al->sym = node->ms.sym; @@ -1127,7 +1142,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * if (al->map == NULL) goto out; } - if (al->maps == machine__kernel_maps(machine)) { + if (RC_CHK_ACCESS(al->maps) == RC_CHK_ACCESS(machine__kernel_maps(machine))) { if (machine__is_host(machine)) { al->cpumode = PERF_RECORD_MISC_KERNEL; al->level = 'k'; @@ -1460,12 +1475,14 @@ static void free_callchain_node(struct callchain_node *node) list_for_each_entry_safe(list, tmp, &node->parent_val, list) { list_del_init(&list->list); map__zput(list->ms.map); + maps__zput(list->ms.maps); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { list_del_init(&list->list); map__zput(list->ms.map); + maps__zput(list->ms.maps); free(list); } @@ -1551,11 +1568,49 @@ out: list_for_each_entry_safe(chain, new, &head, list) { list_del_init(&chain->list); map__zput(chain->ms.map); + maps__zput(chain->ms.maps); free(chain); } return -ENOMEM; } +static void callchain_cursor__delete(void *vcursor) +{ + struct callchain_cursor *cursor = vcursor; + struct callchain_cursor_node *node, *next; + + callchain_cursor_reset(cursor); + for (node = cursor->first; node != NULL; node = next) { + next = node->next; + free(node); + } + free(cursor); +} + +static void init_callchain_cursor_key(void) +{ + if (pthread_key_create(&callchain_cursor, callchain_cursor__delete)) { + pr_err("callchain cursor creation failed"); + abort(); + } +} + +struct callchain_cursor *get_tls_callchain_cursor(void) +{ + static pthread_once_t once_control = PTHREAD_ONCE_INIT; + struct callchain_cursor *cursor; + + pthread_once(&once_control, init_callchain_cursor_key); + cursor = pthread_getspecific(callchain_cursor); + if (!cursor) { + cursor = zalloc(sizeof(*cursor)); + if (!cursor) + pr_debug3("%s: not enough memory\n", __func__); + pthread_setspecific(callchain_cursor, cursor); + } + return cursor; +} + int callchain_cursor__copy(struct callchain_cursor *dst, struct callchain_cursor *src) { @@ -1596,8 +1651,10 @@ void callchain_cursor_reset(struct callchain_cursor *cursor) cursor->nr = 0; cursor->last = &cursor->first; - for (node = cursor->first; node != NULL; node = node->next) + for (node = cursor->first; node != NULL; node = node->next) { map__zput(node->ms.map); + maps__zput(node->ms.maps); + } } void callchain_param_setup(u64 sample_type, const char *arch) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index d95615daed73..d2618a47deca 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -168,8 +168,6 @@ struct callchain_cursor { struct callchain_cursor_node *curr; }; -extern __thread struct callchain_cursor callchain_cursor; - static inline void callchain_init(struct callchain_root *root) { INIT_LIST_HEAD(&root->node.val); @@ -211,6 +209,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, /* Close a cursor writing session. Initialize for the reader */ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) { + if (cursor == NULL) + return; cursor->curr = cursor->first; cursor->pos = 0; } @@ -219,7 +219,7 @@ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) static inline struct callchain_cursor_node * callchain_cursor_current(struct callchain_cursor *cursor) { - if (cursor->pos == cursor->nr) + if (cursor == NULL || cursor->pos == cursor->nr) return NULL; return cursor->curr; @@ -231,6 +231,8 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor) cursor->pos++; } +struct callchain_cursor *get_tls_callchain_cursor(void); + int callchain_cursor__copy(struct callchain_cursor *dst, struct callchain_cursor *src); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 658170b8dcef..46f144c46827 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -19,6 +19,7 @@ #include "util/llvm-utils.h" /* perf_llvm_config */ #include "util/stat.h" /* perf_stat__set_big_num */ #include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */ +#include "util/srcline.h" /* addr2line_timeout_ms */ #include "build-id.h" #include "debug.h" #include "config.h" @@ -434,12 +435,14 @@ static int perf_buildid_config(const char *var, const char *value) return 0; } -static int perf_default_core_config(const char *var __maybe_unused, - const char *value __maybe_unused) +static int perf_default_core_config(const char *var, const char *value) { if (!strcmp(var, "core.proc-map-timeout")) proc_map_timeout = strtoul(value, NULL, 10); + if (!strcmp(var, "core.addr2line-timeout")) + addr2line_timeout_ms = strtoul(value, NULL, 10); + /* Add other config variables here. */ return 0; } @@ -543,6 +546,7 @@ static char *home_perfconfig(void) const char *home = NULL; char *config; struct stat st; + char path[PATH_MAX]; home = getenv("HOME"); @@ -554,7 +558,7 @@ static char *home_perfconfig(void) if (!home || !*home || !perf_config_global()) return NULL; - config = strdup(mkpath("%s/.perfconfig", home)); + config = strdup(mkpath(path, sizeof(path), "%s/.perfconfig", home)); if (config == NULL) { pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.\n", home); return NULL; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 75d9c73e0184..0e090e8bc334 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -222,6 +222,10 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) return a->socket - b->socket; else if (a->die != b->die) return a->die - b->die; + else if (a->cache_lvl != b->cache_lvl) + return a->cache_lvl - b->cache_lvl; + else if (a->cache != b->cache) + return a->cache - b->cache; else if (a->core != b->core) return a->core - b->core; else @@ -663,9 +667,9 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) return ptr - buf; } -const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ +struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ { - static const struct perf_cpu_map *online = NULL; + static struct perf_cpu_map *online; if (!online) online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ @@ -679,6 +683,8 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b a->node == b->node && a->socket == b->socket && a->die == b->die && + a->cache_lvl == b->cache_lvl && + a->cache == b->cache && a->core == b->core && a->cpu.cpu == b->cpu.cpu; } @@ -689,6 +695,8 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) a->node == -1 && a->socket == -1 && a->die == -1 && + a->cache_lvl == -1 && + a->cache == -1 && a->core == -1 && a->cpu.cpu == -1; } @@ -700,6 +708,8 @@ struct aggr_cpu_id aggr_cpu_id__empty(void) .node = -1, .socket = -1, .die = -1, + .cache_lvl = -1, + .cache = -1, .core = -1, .cpu = (struct perf_cpu){ .cpu = -1 }, }; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index e3426541e0aa..9df2aeb34d3d 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -20,6 +20,13 @@ struct aggr_cpu_id { int socket; /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */ int die; + /** The cache level as read from /sys/devices/system/cpu/cpuX/cache/indexY/level */ + int cache_lvl; + /** + * The cache instance ID, which is the first CPU in the + * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list + */ + int cache; /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */ int core; /** CPU aggregation, note there is one CPU for each SMT thread. */ @@ -48,7 +55,7 @@ struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *da size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); -const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ +struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ int cpu__setup_cpunode_map(void); @@ -59,7 +66,7 @@ struct perf_cpu cpu__max_present_cpu(void); /** * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1. */ -static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus) +static inline bool cpu_map__is_dummy(const struct perf_cpu_map *cpus) { return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1; } diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index ca1d833a0c26..81cfc85f4668 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -12,7 +12,8 @@ #include "cpumap.h" #include "debug.h" #include "env.h" -#include "pmu-hybrid.h" +#include "pmu.h" +#include "pmus.h" #define PACKAGE_CPUS_FMT \ "%s/devices/system/cpu/cpu%d/topology/package_cpus_list" @@ -469,12 +470,11 @@ err: struct hybrid_topology *hybrid_topology__new(void) { - struct perf_pmu *pmu; + struct perf_pmu *pmu = NULL; struct hybrid_topology *tp = NULL; - u32 nr, i = 0; + int nr = perf_pmus__num_core_pmus(), i = 0; - nr = perf_pmu__hybrid_pmu_num(); - if (nr == 0) + if (nr <= 1) return NULL; tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0]) * nr); @@ -482,7 +482,7 @@ struct hybrid_topology *hybrid_topology__new(void) return NULL; tp->nr = nr; - perf_pmu__for_each_hybrid_pmu(pmu) { + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { if (load_hybrid_node(&tp->nodes[i], pmu)) { hybrid_topology__delete(tp); return NULL; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 82a27ab90c8b..e917985bbbe6 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -52,15 +52,15 @@ struct cs_etm_decoder { static u32 cs_etm_decoder__mem_access(const void *context, const ocsd_vaddr_t address, - const ocsd_mem_space_acc_t mem_space __maybe_unused, + const ocsd_mem_space_acc_t mem_space, const u8 trace_chan_id, const u32 req_size, u8 *buffer) { struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; - return decoder->mem_access(decoder->data, trace_chan_id, - address, req_size, buffer); + return decoder->mem_access(decoder->data, trace_chan_id, address, + req_size, buffer, mem_space); } int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, @@ -541,44 +541,33 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, const uint8_t trace_chan_id) { pid_t tid = -1; - static u64 pid_fmt; - int ret; - - /* - * As all the ETMs run at the same exception level, the system should - * have the same PID format crossing CPUs. So cache the PID format - * and reuse it for sequential decoding. - */ - if (!pid_fmt) { - ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt); - if (ret) - return OCSD_RESP_FATAL_SYS_ERR; - } /* * Process the PE_CONTEXT packets if we have a valid contextID or VMID. * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. */ - switch (pid_fmt) { - case BIT(ETM_OPT_CTXTID): + switch (cs_etm__get_pid_fmt(etmq)) { + case CS_ETM_PIDFMT_CTXTID: if (elem->context.ctxt_id_valid) tid = elem->context.context_id; break; - case BIT(ETM_OPT_CTXTID2): + case CS_ETM_PIDFMT_CTXTID2: if (elem->context.vmid_valid) tid = elem->context.vmid; break; + case CS_ETM_PIDFMT_NONE: default: break; } + if (cs_etm__etmq_set_tid_el(etmq, tid, trace_chan_id, + elem->context.exception_level)) + return OCSD_RESP_FATAL_SYS_ERR; + if (tid == -1) return OCSD_RESP_CONT; - if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) - return OCSD_RESP_FATAL_SYS_ERR; - /* * A timestamp is generated after a PE_CONTEXT element so make sure * to rely on that coming one. diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 21d403f55d96..272c2efe78ee 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -11,6 +11,7 @@ #define INCLUDE__CS_ETM_DECODER_H__ #include <linux/types.h> +#include <opencsd/ocsd_if_types.h> #include <stdio.h> struct cs_etm_decoder; @@ -19,7 +20,8 @@ struct cs_etm_packet_queue; struct cs_etm_queue; -typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *); +typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *, + const ocsd_mem_space_acc_t); struct cs_etmv3_trace_params { u32 reg_ctrl; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 91299cc56bf7..1419b40dfbe8 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -14,7 +14,6 @@ #include <linux/types.h> #include <linux/zalloc.h> -#include <opencsd/ocsd_if_types.h> #include <stdlib.h> #include "auxtrace.h" @@ -46,8 +45,6 @@ struct cs_etm_auxtrace { struct auxtrace_heap heap; struct itrace_synth_opts synth_opts; struct perf_session *session; - struct machine *machine; - struct thread *unknown_thread; struct perf_tsc_conversion tc; /* @@ -80,15 +77,18 @@ struct cs_etm_auxtrace { u64 instructions_id; u64 **metadata; unsigned int pmu_type; + enum cs_etm_pid_fmt pid_fmt; }; struct cs_etm_traceid_queue { u8 trace_chan_id; - pid_t pid, tid; u64 period_instructions; size_t last_branch_pos; union perf_event *event_buf; struct thread *thread; + struct thread *prev_packet_thread; + ocsd_ex_level prev_packet_el; + ocsd_ex_level el; struct branch_stack *last_branch; struct branch_stack *last_branch_rb; struct cs_etm_packet *prev_packet; @@ -172,44 +172,46 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) } /* - * The returned PID format is presented by two bits: + * The returned PID format is presented as an enum: * - * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced; - * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced. + * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. + * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. + * CS_ETM_PIDFMT_NONE: No context IDs * * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 * are enabled at the same time when the session runs on an EL2 kernel. * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be * recorded in the trace data, the tool will selectively use * CONTEXTIDR_EL2 as PID. + * + * The result is cached in etm->pid_fmt so this function only needs to be called + * when processing the aux info. */ -int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) +static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) { - struct int_node *inode; - u64 *metadata, val; - - inode = intlist__find(traceid_list, trace_chan_id); - if (!inode) - return -EINVAL; - - metadata = inode->priv; + u64 val; if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { val = metadata[CS_ETM_ETMCR]; /* CONTEXTIDR is traced */ if (val & BIT(ETM_OPT_CTXTID)) - *pid_fmt = BIT(ETM_OPT_CTXTID); + return CS_ETM_PIDFMT_CTXTID; } else { val = metadata[CS_ETMV4_TRCCONFIGR]; /* CONTEXTIDR_EL2 is traced */ if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) - *pid_fmt = BIT(ETM_OPT_CTXTID2); + return CS_ETM_PIDFMT_CTXTID2; /* CONTEXTIDR_EL1 is traced */ else if (val & BIT(ETM4_CFG_BIT_CTXTID)) - *pid_fmt = BIT(ETM_OPT_CTXTID); + return CS_ETM_PIDFMT_CTXTID; } - return 0; + return CS_ETM_PIDFMT_NONE; +} + +enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) +{ + return etmq->etm->pid_fmt; } static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata) @@ -480,9 +482,11 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, cs_etm__clear_packet_queue(&tidq->packet_queue); queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; - tidq->tid = queue->tid; - tidq->pid = -1; tidq->trace_chan_id = trace_chan_id; + tidq->el = tidq->prev_packet_el = ocsd_EL_unknown; + tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1, + queue->tid); + tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host); tidq->packet = zalloc(sizeof(struct cs_etm_packet)); if (!tidq->packet) @@ -615,10 +619,21 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. + * + * Threads and exception levels are also tracked for both the + * previous and current packets. This is because the previous + * packet is used for the 'from' IP for branch samples, so the + * thread at that time must also be assigned to that sample. + * Across discontinuity packets the thread can change, so by + * tracking the thread for the previous packet the branch sample + * will have the correct info. */ tmp = tidq->packet; tidq->packet = tidq->prev_packet; tidq->prev_packet = tmp; + tidq->prev_packet_el = tidq->el; + thread__put(tidq->prev_packet_thread); + tidq->prev_packet_thread = thread__get(tidq->thread); } } @@ -794,6 +809,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) /* Free this traceid_queue from the array */ tidq = etmq->traceid_queues[idx]; thread__zput(tidq->thread); + thread__zput(tidq->prev_packet_thread); zfree(&tidq->event_buf); zfree(&tidq->last_branch); zfree(&tidq->last_branch_rb); @@ -863,7 +879,6 @@ static void cs_etm__free(struct perf_session *session) for (i = 0; i < aux->num_cpu; i++) zfree(&aux->metadata[i]); - thread__zput(aux->unknown_thread); zfree(&aux->metadata); zfree(&aux); } @@ -878,11 +893,43 @@ static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == aux->pmu_type; } -static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) +static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq, + ocsd_ex_level el) { - struct machine *machine; + enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq); + + /* + * For any virtualisation based on nVHE (e.g. pKVM), or host kernels + * running at EL1 assume everything is the host. + */ + if (pid_fmt == CS_ETM_PIDFMT_CTXTID) + return &etmq->etm->session->machines.host; + + /* + * Not perfect, but otherwise assume anything in EL1 is the default + * guest, and everything else is the host. Distinguishing between guest + * and host userspaces isn't currently supported either. Neither is + * multiple guest support. All this does is reduce the likeliness of + * decode errors where we look into the host kernel maps when it should + * have been the guest maps. + */ + switch (el) { + case ocsd_EL1: + return machines__find_guest(&etmq->etm->session->machines, + DEFAULT_GUEST_KERNEL_ID); + case ocsd_EL3: + case ocsd_EL2: + case ocsd_EL0: + case ocsd_EL_unknown: + default: + return &etmq->etm->session->machines.host; + } +} - machine = etmq->etm->machine; +static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, + ocsd_ex_level el) +{ + struct machine *machine = cs_etm__get_machine(etmq, el); if (address >= machine__kernel_start(machine)) { if (machine__is_host(machine)) @@ -892,57 +939,74 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) } else { if (machine__is_host(machine)) return PERF_RECORD_MISC_USER; - else if (perf_guest) + else { + /* + * Can't really happen at the moment because + * cs_etm__get_machine() will always return + * machines.host for any non EL1 trace. + */ return PERF_RECORD_MISC_GUEST_USER; - else - return PERF_RECORD_MISC_HYPERVISOR; + } } } static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, - u64 address, size_t size, u8 *buffer) + u64 address, size_t size, u8 *buffer, + const ocsd_mem_space_acc_t mem_space) { u8 cpumode; u64 offset; int len; - struct thread *thread; - struct machine *machine; struct addr_location al; struct dso *dso; struct cs_etm_traceid_queue *tidq; + int ret = 0; if (!etmq) return 0; - machine = etmq->etm->machine; - cpumode = cs_etm__cpu_mode(etmq, address); + addr_location__init(&al); tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); if (!tidq) - return 0; + goto out; - thread = tidq->thread; - if (!thread) { - if (cpumode != PERF_RECORD_MISC_KERNEL) - return 0; - thread = etmq->etm->unknown_thread; + /* + * We've already tracked EL along side the PID in cs_etm__set_thread() + * so double check that it matches what OpenCSD thinks as well. It + * doesn't distinguish between EL0 and EL1 for this mem access callback + * so we had to do the extra tracking. Skip validation if it's any of + * the 'any' values. + */ + if (!(mem_space == OCSD_MEM_SPACE_ANY || + mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { + if (mem_space & OCSD_MEM_SPACE_EL1N) { + /* Includes both non secure EL1 and EL0 */ + assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0); + } else if (mem_space & OCSD_MEM_SPACE_EL2) + assert(tidq->el == ocsd_EL2); + else if (mem_space & OCSD_MEM_SPACE_EL3) + assert(tidq->el == ocsd_EL3); } - if (!thread__find_map(thread, cpumode, address, &al)) - return 0; + cpumode = cs_etm__cpu_mode(etmq, address, tidq->el); + + if (!thread__find_map(tidq->thread, cpumode, address, &al)) + goto out; dso = map__dso(al.map); if (!dso) - return 0; + goto out; if (dso->data.status == DSO_DATA_STATUS_ERROR && dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) - return 0; + goto out; offset = map__map_ip(al.map, address); map__load(al.map); - len = dso__data_read_offset(dso, machine, offset, buffer, size); + len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)), + offset, buffer, size); if (len <= 0) { ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" @@ -953,10 +1017,12 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, dso->long_name ? dso->long_name : "Unknown"); dso->auxtrace_warned = true; } - return 0; + goto out; } - - return len; + ret = len; +out: + addr_location__exit(&al); + return ret; } static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, @@ -1172,8 +1238,8 @@ static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, { u8 instrBytes[2]; - cs_etm__mem_access(etmq, trace_chan_id, addr, - ARRAY_SIZE(instrBytes), instrBytes); + cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes), + instrBytes, 0); /* * T32 instruction size is indicated by bits[15:11] of the first * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 @@ -1303,39 +1369,34 @@ cs_etm__get_trace(struct cs_etm_queue *etmq) return etmq->buf_len; } -static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, - struct cs_etm_traceid_queue *tidq) +static void cs_etm__set_thread(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, pid_t tid, + ocsd_ex_level el) { - if ((!tidq->thread) && (tidq->tid != -1)) - tidq->thread = machine__find_thread(etm->machine, -1, - tidq->tid); + struct machine *machine = cs_etm__get_machine(etmq, el); - if (tidq->thread) - tidq->pid = tidq->thread->pid_; + if (tid != -1) { + thread__zput(tidq->thread); + tidq->thread = machine__find_thread(machine, -1, tid); + } + + /* Couldn't find a known thread */ + if (!tidq->thread) + tidq->thread = machine__idle_thread(machine); + + tidq->el = el; } -int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, - pid_t tid, u8 trace_chan_id) +int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, + u8 trace_chan_id, ocsd_ex_level el) { - int cpu, err = -EINVAL; - struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_traceid_queue *tidq; tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); if (!tidq) - return err; - - if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) - return err; - - err = machine__set_current_tid(etm->machine, cpu, tid, tid); - if (err) - return err; - - tidq->tid = tid; - thread__zput(tidq->thread); + return -EINVAL; - cs_etm__set_pid_tid_cpu(etm, tidq); + cs_etm__set_thread(etmq, tidq, tid, el); return 0; } @@ -1369,8 +1430,8 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq, else sample->insn_len = 4; - cs_etm__mem_access(etmq, trace_chan_id, sample->ip, - sample->insn_len, (void *)sample->insn); + cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len, + (void *)sample->insn, 0); } u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) @@ -1405,15 +1466,15 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct perf_sample sample = {.ip = 0,}; event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); + event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); event->sample.header.size = sizeof(struct perf_event_header); /* Set time field based on etm auxtrace config. */ sample.time = cs_etm__resolve_sample_time(etmq, tidq); sample.ip = addr; - sample.pid = tidq->pid; - sample.tid = tidq->tid; + sample.pid = thread__pid(tidq->thread); + sample.tid = thread__tid(tidq->thread); sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id; sample.period = period; @@ -1464,15 +1525,16 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, ip = cs_etm__last_executed_instr(tidq->prev_packet); event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); + event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, + tidq->prev_packet_el); event->sample.header.size = sizeof(struct perf_event_header); /* Set time field based on etm auxtrace config. */ sample.time = cs_etm__resolve_sample_time(etmq, tidq); sample.ip = ip; - sample.pid = tidq->pid; - sample.tid = tidq->tid; + sample.pid = thread__pid(tidq->prev_packet_thread); + sample.tid = thread__tid(tidq->prev_packet_thread); sample.addr = cs_etm__first_executed_instr(tidq->packet); sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; @@ -1922,8 +1984,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * so below only read 2 bytes as instruction size for T32. */ addr = end_addr - 2; - cs_etm__mem_access(etmq, trace_chan_id, addr, - sizeof(instr16), (u8 *)&instr16); + cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16), + (u8 *)&instr16, 0); if ((instr16 & 0xFF00) == 0xDF00) return true; @@ -1938,8 +2000,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * +---------+---------+-------------------------+ */ addr = end_addr - 4; - cs_etm__mem_access(etmq, trace_chan_id, addr, - sizeof(instr32), (u8 *)&instr32); + cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), + (u8 *)&instr32, 0); if ((instr32 & 0x0F000000) == 0x0F000000 && (instr32 & 0xF0000000) != 0xF0000000) return true; @@ -1955,8 +2017,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * +-----------------------+---------+-----------+ */ addr = end_addr - 4; - cs_etm__mem_access(etmq, trace_chan_id, addr, - sizeof(instr32), (u8 *)&instr32); + cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), + (u8 *)&instr32, 0); if ((instr32 & 0xFFE0001F) == 0xd4000001) return true; @@ -2466,11 +2528,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, if (!etmq) continue; - /* - * Per-cpu mode has contextIDs in the trace and the decoder - * calls cs_etm__set_pid_tid_cpu() automatically so no need - * to do this here - */ if (etm->per_thread_decoding) { tidq = cs_etm__etmq_get_traceid_queue( etmq, CS_ETM_PER_THREAD_TRACEID); @@ -2478,10 +2535,8 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, if (!tidq) continue; - if ((tid == -1) || (tidq->tid == tid)) { - cs_etm__set_pid_tid_cpu(etm, tidq); + if (tid == -1 || thread__tid(tidq->thread) == tid) cs_etm__run_per_thread_timeless_decoder(etmq); - } } else cs_etm__run_per_cpu_timeless_decoder(etmq); } @@ -2611,10 +2666,12 @@ static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, return 0; /* - * Add the tid/pid to the log so that we can get a match when - * we get a contextID from the decoder. + * Add the tid/pid to the log so that we can get a match when we get a + * contextID from the decoder. Only track for the host: only kernel + * trace is supported for guests which wouldn't need pids so this should + * be fine. */ - th = machine__findnew_thread(etm->machine, + th = machine__findnew_thread(&etm->session->machines.host, event->itrace_start.pid, event->itrace_start.tid); if (!th) @@ -2647,10 +2704,12 @@ static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, return 0; /* - * Add the tid/pid to the log so that we can get a match when - * we get a contextID from the decoder. + * Add the tid/pid to the log so that we can get a match when we get a + * contextID from the decoder. Only track for the host: only kernel + * trace is supported for guests which wouldn't need pids so this should + * be fine. */ - th = machine__findnew_thread(etm->machine, + th = machine__findnew_thread(&etm->session->machines.host, event->context_switch.next_prev_pid, event->context_switch.next_prev_tid); if (!th) @@ -3246,6 +3305,13 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, goto err_free_metadata; } + /* + * As all the ETMs run at the same exception level, the system should + * have the same PID format crossing CPUs. So cache the PID format + * and reuse it for sequential decoding. + */ + etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); + err = auxtrace_queues__init(&etm->queues); if (err) goto err_free_etm; @@ -3259,7 +3325,6 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, } etm->session = session; - etm->machine = &session->machines.host; etm->num_cpu = num_cpu; etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); @@ -3286,27 +3351,6 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, if (err) return err; - etm->unknown_thread = thread__new(999999999, 999999999); - if (!etm->unknown_thread) { - err = -ENOMEM; - goto err_free_queues; - } - - /* - * Initialize list node so that at thread__zput() we can avoid - * segmentation fault at list_del_init(). - */ - INIT_LIST_HEAD(&etm->unknown_thread->node); - - err = thread__set_comm(etm->unknown_thread, "unknown", 0); - if (err) - goto err_delete_thread; - - if (thread__init_maps(etm->unknown_thread, etm->machine)) { - err = -ENOMEM; - goto err_delete_thread; - } - etm->tc.time_shift = tc->time_shift; etm->tc.time_mult = tc->time_mult; etm->tc.time_zero = tc->time_zero; @@ -3318,7 +3362,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, } err = cs_etm__synth_events(etm, session); if (err) - goto err_delete_thread; + goto err_free_queues; /* * Map Trace ID values to CPU metadata. @@ -3348,7 +3392,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, session->header.data_size, cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); if (err) - goto err_delete_thread; + goto err_free_queues; /* if HW ID found then clear any unused metadata ID values */ if (aux_hw_id_found) @@ -3358,17 +3402,15 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); if (err) - goto err_delete_thread; + goto err_free_queues; err = cs_etm__queue_aux_records(session); if (err) - goto err_delete_thread; + goto err_free_queues; etm->data_queued = etm->queues.populated; return 0; -err_delete_thread: - thread__zput(etm->unknown_thread); err_free_queues: auxtrace_queues__free(&etm->queues); session->auxtrace = NULL; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index ecca40787ac9..7cca37887917 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -244,11 +244,18 @@ int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu); +enum cs_etm_pid_fmt { + CS_ETM_PIDFMT_NONE, + CS_ETM_PIDFMT_CTXTID, + CS_ETM_PIDFMT_CTXTID2 +}; + #ifdef HAVE_CSTRACE_SUPPORT +#include <opencsd/ocsd_if_types.h> int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); -int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt); -int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, - pid_t tid, u8 trace_chan_id); +enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq); +int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, + u8 trace_chan_id, ocsd_ex_level el); bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id); diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 653709ab867a..5bb3c2ba95ca 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -154,12 +154,14 @@ static int process_sample_event(struct perf_tool *tool, { struct convert_json *c = container_of(tool, struct convert_json, tool); FILE *out = c->out; - struct addr_location al, tal; + struct addr_location al; u64 sample_type = __evlist__combined_sample_type(evsel->evlist); u8 cpumode = PERF_RECORD_MISC_USER; + addr_location__init(&al); if (machine__resolve(machine, &al, sample) < 0) { pr_err("Sample resolution failed!\n"); + addr_location__exit(&al); return -1; } @@ -172,13 +174,13 @@ static int process_sample_event(struct perf_tool *tool, output_json_format(out, false, 2, "{"); output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time); - output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_); - output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid); + output_json_key_format(out, true, 3, "pid", "%i", thread__pid(al.thread)); + output_json_key_format(out, true, 3, "tid", "%i", thread__tid(al.thread)); if ((sample_type & PERF_SAMPLE_CPU)) output_json_key_format(out, true, 3, "cpu", "%i", sample->cpu); - else if (al.thread->cpu >= 0) - output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu); + else if (thread__cpu(al.thread) >= 0) + output_json_key_format(out, true, 3, "cpu", "%i", thread__cpu(al.thread)); output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread)); @@ -190,6 +192,7 @@ static int process_sample_event(struct perf_tool *tool, for (i = 0; i < sample->callchain->nr; ++i) { u64 ip = sample->callchain->ips[i]; + struct addr_location tal; if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -215,8 +218,10 @@ static int process_sample_event(struct perf_tool *tool, else fputc(',', out); + addr_location__init(&tal); ok = thread__find_symbol(al.thread, cpumode, ip, &tal); output_sample_callchain_entry(tool, ip, ok ? &tal : NULL); + addr_location__exit(&tal); } } else { output_sample_callchain_entry(tool, sample->ip, &al); @@ -245,6 +250,7 @@ static int process_sample_event(struct perf_tool *tool, } #endif output_json_format(out, false, 2, "}"); + addr_location__exit(&al); return 0; } diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 84c970c11794..b9fb71ab7a73 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -64,13 +64,13 @@ int db_export__thread(struct db_export *dbe, struct thread *thread, { u64 main_thread_db_id = 0; - if (thread->db_id) + if (thread__db_id(thread)) return 0; - thread->db_id = ++dbe->thread_last_db_id; + thread__set_db_id(thread, ++dbe->thread_last_db_id); if (main_thread) - main_thread_db_id = main_thread->db_id; + main_thread_db_id = thread__db_id(main_thread); if (dbe->export_thread) return dbe->export_thread(dbe, thread, main_thread_db_id, @@ -215,6 +215,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, u64 kernel_start = machine__kernel_start(machine); struct call_path *current = &dbe->cpr->call_path; enum chain_order saved_order = callchain_param.order; + struct callchain_cursor *cursor; int err; if (!symbol_conf.use_callchain || !sample->callchain) @@ -226,32 +227,34 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, * the callchain starting with the root node and ending with the leaf. */ callchain_param.order = ORDER_CALLER; - err = thread__resolve_callchain(thread, &callchain_cursor, evsel, + cursor = get_tls_callchain_cursor(); + err = thread__resolve_callchain(thread, cursor, evsel, sample, NULL, NULL, PERF_MAX_STACK_DEPTH); if (err) { callchain_param.order = saved_order; return NULL; } - callchain_cursor_commit(&callchain_cursor); + callchain_cursor_commit(cursor); while (1) { struct callchain_cursor_node *node; struct addr_location al; u64 dso_db_id = 0, sym_db_id = 0, offset = 0; - memset(&al, 0, sizeof(al)); - node = callchain_cursor_current(&callchain_cursor); + node = callchain_cursor_current(cursor); if (!node) break; + /* * Handle export of symbol and dso for this node by * constructing an addr_location struct and then passing it to * db_ids_from_al() to perform the export. */ + addr_location__init(&al); al.sym = node->ms.sym; al.map = node->ms.map; - al.maps = thread->maps; + al.maps = thread__maps(thread); al.addr = node->ip; if (al.map && !al.sym) @@ -264,7 +267,8 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, al.sym, node->ip, kernel_start); - callchain_cursor_advance(&callchain_cursor); + callchain_cursor_advance(cursor); + addr_location__exit(&al); } /* Reset the callchain order to its prior value. */ @@ -321,7 +325,7 @@ static int db_export__threads(struct db_export *dbe, struct thread *thread, * For a non-main thread, db_export__comm_thread() must be * called only if thread has not previously been exported. */ - bool export_comm_thread = comm && !thread->db_id; + bool export_comm_thread = comm && !thread__db_id(thread); err = db_export__thread(dbe, thread, machine, main_thread); if (err) @@ -529,16 +533,16 @@ static int db_export__pid_tid(struct db_export *dbe, struct machine *machine, struct thread *main_thread; int err = 0; - if (!thread || !thread->comm_set) + if (!thread || !thread__comm_set(thread)) goto out_put; - *is_idle = !thread->pid_ && !thread->tid; + *is_idle = !thread__pid(thread) && !thread__tid(thread); main_thread = thread__main_thread(machine, thread); err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr); - *db_id = thread->db_id; + *db_id = thread__db_id(thread); thread__put(main_thread); out_put: diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 16238f823a5e..46f74b2344db 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -197,8 +197,8 @@ static const __u8 *dlfilter__insn(void *ctx, __u32 *len) if (!al->thread && machine__resolve(d->machine, al, d->sample) < 0) return NULL; - if (al->thread->maps) { - struct machine *machine = maps__machine(al->thread->maps); + if (thread__maps(al->thread)) { + struct machine *machine = maps__machine(thread__maps(al->thread)); if (machine) script_fetch_insn(d->sample, al->thread, machine); @@ -258,6 +258,7 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) struct addr_location a; struct map *map; u64 offset; + __s32 ret; if (!d->ctx_valid) return -1; @@ -272,16 +273,22 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip)) goto have_map; + addr_location__init(&a); thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a); - if (!a.map) - return -1; + if (!a.map) { + ret = -1; + goto out; + } map = a.map; have_map: offset = map__map_ip(map, ip); if (ip + len >= map__end(map)) len = map__end(map) - ip; - return dso__data_read_offset(map__dso(map), d->machine, offset, buf, len); + ret = dso__data_read_offset(map__dso(map), d->machine, offset, buf, len); +out: + addr_location__exit(&a); + return ret; } static const struct perf_dlfilter_fns perf_dlfilter_fns = { diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index a86614599269..046fbfcfdaab 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -67,6 +67,39 @@ char dso__symtab_origin(const struct dso *dso) return origin[dso->symtab_type]; } +bool dso__is_object_file(const struct dso *dso) +{ + switch (dso->binary_type) { + case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__JAVA_JIT: + case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: + return false; + case DSO_BINARY_TYPE__VMLINUX: + case DSO_BINARY_TYPE__GUEST_VMLINUX: + case DSO_BINARY_TYPE__DEBUGLINK: + case DSO_BINARY_TYPE__BUILD_ID_CACHE: + case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: + case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: + case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + case DSO_BINARY_TYPE__GUEST_KMODULE: + case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: + case DSO_BINARY_TYPE__KCORE: + case DSO_BINARY_TYPE__GUEST_KCORE: + case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + case DSO_BINARY_TYPE__NOT_FOUND: + default: + return true; + } +} + int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 0b7c7633b9f6..b23a157c914d 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -379,23 +379,25 @@ void dso__reset_find_symbol_cache(struct dso *dso); size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp); size_t dso__fprintf(struct dso *dso, FILE *fp); -static inline bool dso__is_vmlinux(struct dso *dso) +static inline bool dso__is_vmlinux(const struct dso *dso) { return dso->binary_type == DSO_BINARY_TYPE__VMLINUX || dso->binary_type == DSO_BINARY_TYPE__GUEST_VMLINUX; } -static inline bool dso__is_kcore(struct dso *dso) +static inline bool dso__is_kcore(const struct dso *dso) { return dso->binary_type == DSO_BINARY_TYPE__KCORE || dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE; } -static inline bool dso__is_kallsyms(struct dso *dso) +static inline bool dso__is_kallsyms(const struct dso *dso) { return dso->kernel && dso->long_name[0] != '/'; } +bool dso__is_object_file(const struct dso *dso); + void dso__free_a2l(struct dso *dso); enum dso_type dso__type(struct dso *dso, struct machine *machine); diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index b07414409771..45e018c0ebf5 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1074,16 +1074,18 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) /* Function pointer */ return strbuf_add(buf, "(function_type)", 15); } else { - if (!dwarf_diename(&type)) - return -ENOENT; + const char *name = dwarf_diename(&type); + if (tag == DW_TAG_union_type) tmp = "union "; else if (tag == DW_TAG_structure_type) tmp = "struct "; else if (tag == DW_TAG_enumeration_type) tmp = "enum "; + else if (name == NULL) + return -ENOENT; /* Write a base name */ - return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type)); + return strbuf_addf(buf, "%s%s", tmp, name ?: ""); } ret = die_get_typename(&type, buf); return ret ? ret : strbuf_addstr(buf, tmp); @@ -1103,7 +1105,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) ret = die_get_typename(vr_die, buf); if (ret < 0) { pr_debug("Failed to get type, make it unknown.\n"); - ret = strbuf_add(buf, " (unknown_type)", 14); + ret = strbuf_add(buf, "(unknown_type)", 14); } return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 4a4fdad820d6..9eabf3ec56e9 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -10,6 +10,7 @@ #include <sys/utsname.h> #include <stdlib.h> #include <string.h> +#include "pmus.h" #include "strbuf.h" struct perf_env perf_env; @@ -323,7 +324,7 @@ int perf_env__read_pmu_mappings(struct perf_env *env) u32 pmu_num = 0; struct strbuf sb; - while ((pmu = perf_pmu__scan(pmu))) { + while ((pmu = perf_pmus__scan(pmu))) { if (!pmu->name) continue; pmu_num++; @@ -337,7 +338,7 @@ int perf_env__read_pmu_mappings(struct perf_env *env) if (strbuf_init(&sb, 128 * pmu_num) < 0) return -ENOMEM; - while ((pmu = perf_pmu__scan(pmu))) { + while ((pmu = perf_pmus__scan(pmu))) { if (!pmu->name) continue; if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 8ae742e32e3c..3860b0c74829 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -135,9 +135,10 @@ void perf_event__read_stat_config(struct perf_stat_config *config, config->__val = event->data[i].val; \ break; - CASE(AGGR_MODE, aggr_mode) - CASE(SCALE, scale) - CASE(INTERVAL, interval) + CASE(AGGR_MODE, aggr_mode) + CASE(SCALE, scale) + CASE(INTERVAL, interval) + CASE(AGGR_LEVEL, aggr_level) #undef CASE default: pr_warning("unknown stat config term %" PRI_lu64 "\n", @@ -485,6 +486,7 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma if (machine) { struct addr_location al; + addr_location__init(&al); al.map = map__get(maps__find(machine__kernel_maps(machine), tp->addr)); if (al.map && map__load(al.map) >= 0) { al.addr = map__map_ip(al.map, tp->addr); @@ -492,7 +494,7 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma if (al.sym) ret += symbol__fprintf_symname_offs(al.sym, &al, fp); } - map__put(al.map); + addr_location__exit(&al); } ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len); old = true; @@ -572,12 +574,14 @@ int perf_event__process(struct perf_tool *tool __maybe_unused, struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { - struct maps *maps = thread->maps; + struct maps *maps = thread__maps(thread); struct machine *machine = maps__machine(maps); bool load_map = false; - al->maps = maps; - al->thread = thread; + maps__zput(al->maps); + map__zput(al->map); + thread__zput(al->thread); + al->addr = addr; al->cpumode = cpumode; al->filtered = 0; @@ -589,13 +593,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; - al->maps = maps = machine__kernel_maps(machine); + maps = machine__kernel_maps(machine); load_map = true; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; - al->maps = maps = machine__kernel_maps(machine); + maps = machine__kernel_maps(machine); load_map = true; } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) { al->level = 'u'; @@ -614,7 +618,8 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return NULL; } - + al->maps = maps__get(maps); + al->thread = thread__get(thread); al->map = map__get(maps__find(maps, al->addr)); if (al->map != NULL) { /* @@ -638,7 +643,7 @@ struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { struct map *map = thread__find_map(thread, cpumode, addr, al); - struct machine *machine = maps__machine(thread->maps); + struct machine *machine = maps__machine(thread__maps(thread)); u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr); if (map || addr_cpumode == cpumode) @@ -695,7 +700,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, if (thread == NULL) return -1; - dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); + dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread__tid(thread)); thread__find_map(thread, sample->cpumode, sample->ip, al); dso = al->map ? map__dso(al->map) : NULL; dump_printf(" ...... dso: %s\n", @@ -706,6 +711,9 @@ int machine__resolve(struct machine *machine, struct addr_location *al, if (thread__is_filtered(thread)) al->filtered |= (1 << HIST_FILTER__THREAD); + thread__put(thread); + thread = NULL; + al->sym = NULL; al->cpu = sample->cpu; al->socket = -1; @@ -766,18 +774,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al, return 0; } -/* - * The preprocess_sample method will return with reference counts for the - * in it, when done using (and perhaps getting ref counts if needing to - * keep a pointer to one of those entries) it must be paired with - * addr_location__put(), so that the refcounts can be decremented. - */ -void addr_location__put(struct addr_location *al) -{ - map__zput(al->map); - thread__zput(al->thread); -} - bool is_bts_event(struct perf_event_attr *attr) { return attr->type == PERF_TYPE_HARDWARE && diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c deleted file mode 100644 index 57f02beef023..000000000000 --- a/tools/perf/util/evlist-hybrid.c +++ /dev/null @@ -1,162 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include <errno.h> -#include <inttypes.h> -#include "cpumap.h" -#include "evlist.h" -#include "evsel.h" -#include "../perf.h" -#include "util/pmu-hybrid.h" -#include "util/evlist-hybrid.h" -#include "debug.h" -#include <unistd.h> -#include <stdlib.h> -#include <linux/err.h> -#include <linux/string.h> -#include <perf/evlist.h> -#include <perf/evsel.h> -#include <perf/cpumap.h> - -int evlist__add_default_hybrid(struct evlist *evlist, bool precise) -{ - struct evsel *evsel; - struct perf_pmu *pmu; - __u64 config; - struct perf_cpu_map *cpus; - - perf_pmu__for_each_hybrid_pmu(pmu) { - config = PERF_COUNT_HW_CPU_CYCLES | - ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT); - evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE, - config); - if (!evsel) - return -ENOMEM; - - cpus = perf_cpu_map__get(pmu->cpus); - evsel->core.cpus = cpus; - evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->pmu_name = strdup(pmu->name); - evlist__add(evlist, evsel); - } - - return 0; -} - -static bool group_hybrid_conflict(struct evsel *leader) -{ - struct evsel *pos, *prev = NULL; - - for_each_group_evsel(pos, leader) { - if (!evsel__is_hybrid(pos)) - continue; - - if (prev && strcmp(prev->pmu_name, pos->pmu_name)) - return true; - - prev = pos; - } - - return false; -} - -void evlist__warn_hybrid_group(struct evlist *evlist) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel__is_group_leader(evsel) && - evsel->core.nr_members > 1 && - group_hybrid_conflict(evsel)) { - pr_warning("WARNING: events in group from " - "different hybrid PMUs!\n"); - return; - } - } -} - -bool evlist__has_hybrid(struct evlist *evlist) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel->pmu_name && - perf_pmu__is_hybrid(evsel->pmu_name)) { - return true; - } - } - - return false; -} - -int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list) -{ - struct perf_cpu_map *cpus; - struct evsel *evsel, *tmp; - struct perf_pmu *pmu; - int ret, unmatched_count = 0, events_nr = 0; - - if (!perf_pmu__has_hybrid() || !cpu_list) - return 0; - - cpus = perf_cpu_map__new(cpu_list); - if (!cpus) - return -1; - - /* - * The evsels are created with hybrid pmu's cpus. But now we - * need to check and adjust the cpus of evsel by cpu_list because - * cpu_list may cause conflicts with cpus of evsel. For example, - * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need - * to adjust the cpus of evsel to cpu6-7. And then propatate maps - * in evlist__create_maps(). - */ - evlist__for_each_entry_safe(evlist, tmp, evsel) { - struct perf_cpu_map *matched_cpus, *unmatched_cpus; - char buf1[128], buf2[128]; - - pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name); - if (!pmu) - continue; - - ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus, - &unmatched_cpus); - if (ret) - goto out; - - events_nr++; - - if (perf_cpu_map__nr(matched_cpus) > 0 && - (perf_cpu_map__nr(unmatched_cpus) > 0 || - perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) || - perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) { - perf_cpu_map__put(evsel->core.cpus); - perf_cpu_map__put(evsel->core.own_cpus); - evsel->core.cpus = perf_cpu_map__get(matched_cpus); - evsel->core.own_cpus = perf_cpu_map__get(matched_cpus); - - if (perf_cpu_map__nr(unmatched_cpus) > 0) { - cpu_map__snprint(matched_cpus, buf1, sizeof(buf1)); - pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n", - buf1, pmu->name, evsel->name); - } - } - - if (perf_cpu_map__nr(matched_cpus) == 0) { - evlist__remove(evlist, evsel); - evsel__delete(evsel); - - cpu_map__snprint(cpus, buf1, sizeof(buf1)); - cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2)); - pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n", - buf1, pmu->name, pmu->name, buf2); - unmatched_count++; - } - - perf_cpu_map__put(matched_cpus); - perf_cpu_map__put(unmatched_cpus); - } - if (events_nr) - ret = (unmatched_count == events_nr) ? -1 : 0; -out: - perf_cpu_map__put(cpus); - return ret; -} diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h deleted file mode 100644 index aacdb1b0f948..000000000000 --- a/tools/perf/util/evlist-hybrid.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_EVLIST_HYBRID_H -#define __PERF_EVLIST_HYBRID_H - -#include <linux/compiler.h> -#include <linux/kernel.h> -#include "evlist.h" -#include <unistd.h> - -int evlist__add_default_hybrid(struct evlist *evlist, bool precise); -void evlist__warn_hybrid_group(struct evlist *evlist); -bool evlist__has_hybrid(struct evlist *evlist); -int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list); - -#endif /* __PERF_EVLIST_HYBRID_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a0504316b06f..7ef43f72098e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -28,10 +28,10 @@ #include "util/string2.h" #include "util/perf_api_probe.h" #include "util/evsel_fprintf.h" -#include "util/evlist-hybrid.h" #include "util/pmu.h" #include "util/sample.h" #include "util/bpf-filter.h" +#include "util/stat.h" #include "util/util.h" #include <signal.h> #include <unistd.h> @@ -93,8 +93,15 @@ struct evlist *evlist__new(void) struct evlist *evlist__new_default(void) { struct evlist *evlist = evlist__new(); + bool can_profile_kernel; + int err; + + if (!evlist) + return NULL; - if (evlist && evlist__add_default(evlist)) { + can_profile_kernel = perf_event_paranoid_check(1); + err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); + if (err) { evlist__delete(evlist); evlist = NULL; } @@ -165,6 +172,7 @@ void evlist__delete(struct evlist *evlist) if (evlist == NULL) return; + evlist__free_stats(evlist); evlist__munmap(evlist); evlist__close(evlist); evlist__purge(evlist); @@ -237,19 +245,6 @@ static void evlist__set_leader(struct evlist *evlist) perf_evlist__set_leader(&evlist->core); } -int __evlist__add_default(struct evlist *evlist, bool precise) -{ - struct evsel *evsel; - - evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE, - PERF_COUNT_HW_CPU_CYCLES); - if (evsel == NULL) - return -ENOMEM; - - evlist__add(evlist, evsel); - return 0; -} - static struct evsel *evlist__dummy_event(struct evlist *evlist) { struct perf_event_attr attr = { @@ -1067,7 +1062,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) if (!cpus) goto out_delete_threads; - evlist->core.has_user_cpus = !!target->cpu_list && !target->hybrid; + evlist->core.has_user_cpus = !!target->cpu_list; perf_evlist__set_maps(&evlist->core, cpus, threads); @@ -2465,3 +2460,42 @@ void evlist__check_mem_load_aux(struct evlist *evlist) } } } + +/** + * evlist__warn_user_requested_cpus() - Check each evsel against requested CPUs + * and warn if the user CPU list is inapplicable for the event's PMU's + * CPUs. Not core PMUs list a CPU in sysfs, but this may be overwritten by a + * user requested CPU and so any online CPU is applicable. Core PMUs handle + * events on the CPUs in their list and otherwise the event isn't supported. + * @evlist: The list of events being checked. + * @cpu_list: The user provided list of CPUs. + */ +void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list) +{ + struct perf_cpu_map *user_requested_cpus; + struct evsel *pos; + + if (!cpu_list) + return; + + user_requested_cpus = perf_cpu_map__new(cpu_list); + if (!user_requested_cpus) + return; + + evlist__for_each_entry(evlist, pos) { + struct perf_cpu_map *intersect, *to_test; + const struct perf_pmu *pmu = evsel__find_pmu(pos); + + to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online(); + intersect = perf_cpu_map__intersect(to_test, user_requested_cpus); + if (!perf_cpu_map__equal(intersect, user_requested_cpus)) { + char buf[128]; + + cpu_map__snprint(to_test, buf, sizeof(buf)); + pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n", + cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos)); + } + perf_cpu_map__put(intersect); + } + perf_cpu_map__put(user_requested_cpus); +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 46cf402add93..664c6bf7b3e0 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -67,7 +67,6 @@ struct evlist { struct evsel *selected; struct events_stats stats; struct perf_env *env; - const char *hybrid_pmu_name; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); @@ -101,13 +100,6 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int __evlist__add_default(struct evlist *evlist, bool precise); - -static inline int evlist__add_default(struct evlist *evlist) -{ - return __evlist__add_default(evlist, true); -} - int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); int __evlist__add_default_attrs(struct evlist *evlist, @@ -448,4 +440,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); void evlist__check_mem_load_aux(struct evlist *evlist); +void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c2dbb5647e75..f607b5bddc76 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -46,8 +46,9 @@ #include "memswap.h" #include "util.h" #include "util/hashmap.h" -#include "pmu-hybrid.h" #include "off_cpu.h" +#include "pmu.h" +#include "pmus.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include "util/bpf-filter.h" @@ -291,6 +292,7 @@ void evsel__init(struct evsel *evsel, evsel->per_pkg_mask = NULL; evsel->collect_stat = false; evsel->pmu_name = NULL; + evsel->group_pmu_name = NULL; evsel->skippable = false; } @@ -317,48 +319,6 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) return evsel; } -static bool perf_event_can_profile_kernel(void) -{ - return perf_event_paranoid_check(1); -} - -struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config) -{ - struct perf_event_attr attr = { - .type = type, - .config = config, - .exclude_kernel = !perf_event_can_profile_kernel(), - }; - struct evsel *evsel; - - event_attr_init(&attr); - - /* - * Now let the usual logic to set up the perf_event_attr defaults - * to kick in when we return and before perf_evsel__open() is called. - */ - evsel = evsel__new(&attr); - if (evsel == NULL) - goto out; - - arch_evsel__fixup_new_cycles(&evsel->core.attr); - - evsel->precise_max = true; - - /* use asprintf() because free(evsel) assumes name is allocated */ - if (asprintf(&evsel->name, "cycles%s%s%.*s", - (attr.precise_ip || attr.exclude_kernel) ? ":" : "", - attr.exclude_kernel ? "u" : "", - attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0) - goto error_free; -out: - return evsel; -error_free: - evsel__delete(evsel); - evsel = NULL; - goto out; -} - int copy_config_terms(struct list_head *dst, struct list_head *src) { struct evsel_config_term *pos, *tmp; @@ -416,6 +376,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; evsel->core.requires_cpu = orig->core.requires_cpu; + evsel->core.is_pmu_core = orig->core.is_pmu_core; if (orig->name) { evsel->name = strdup(orig->name); @@ -432,6 +393,11 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->pmu_name == NULL) goto out_err; } + if (orig->group_pmu_name) { + evsel->group_pmu_name = strdup(orig->group_pmu_name); + if (evsel->group_pmu_name == NULL) + goto out_err; + } if (orig->filter) { evsel->filter = strdup(orig->filter); if (evsel->filter == NULL) @@ -828,30 +794,6 @@ bool evsel__name_is(struct evsel *evsel, const char *name) return !strcmp(evsel__name(evsel), name); } -const char *evsel__group_pmu_name(const struct evsel *evsel) -{ - struct evsel *leader = evsel__leader(evsel); - struct evsel *pos; - - /* - * Software events may be in a group with other uncore PMU events. Use - * the pmu_name of the first non-software event to avoid breaking the - * software event out of the group. - * - * Aux event leaders, like intel_pt, expect a group with events from - * other PMUs, so substitute the AUX event's PMU in this case. - */ - if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) { - /* Starting with the leader, find the first event with a named PMU. */ - for_each_group_evsel(pos, leader) { - if (pos->pmu_name) - return pos->pmu_name; - } - } - - return evsel->pmu_name ?: "cpu"; -} - const char *evsel__metric_id(const struct evsel *evsel) { if (evsel->metric_id) @@ -1131,10 +1073,6 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel) evsel__set_sample_bit(evsel, WEIGHT); } -void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused) -{ -} - void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused, struct perf_event_attr *attr __maybe_unused) { @@ -1537,6 +1475,7 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->group_pmu_name); zfree(&evsel->unit); zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); @@ -2100,6 +2039,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, fallback_missing_features: evsel__disable_missing_features(evsel); + pr_debug3("Opening: %s\n", evsel__name(evsel)); display_attr(&evsel->core.attr); for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { @@ -3178,9 +3118,17 @@ void evsel__zero_per_pkg(struct evsel *evsel) } } +/** + * evsel__is_hybrid - does the evsel have a known PMU that is hybrid. Note, this + * will be false on hybrid systems for hardware and legacy + * cache events. + */ bool evsel__is_hybrid(const struct evsel *evsel) { - return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); + if (perf_pmus__num_core_pmus() == 1) + return false; + + return evsel->core.is_pmu_core; } struct evsel *evsel__leader(const struct evsel *evsel) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0f54f28a69c2..9f06d6cd5379 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -10,6 +10,7 @@ #include <internal/evsel.h> #include <perf/evsel.h> #include "symbol_conf.h" +#include "pmus.h" struct bpf_object; struct cgroup; @@ -72,6 +73,7 @@ struct evsel { char *name; char *group_name; const char *pmu_name; + const char *group_pmu_name; #ifdef HAVE_LIBTRACEEVENT struct tep_event *tp_format; #endif @@ -129,6 +131,7 @@ struct evsel { bool reset_group; bool errored; bool needs_auxtrace_mmap; + bool default_metricgroup; /* A member of the Default metricgroup */ struct hashmap *per_pkg_mask; int err; struct { @@ -241,8 +244,6 @@ static inline struct evsel *evsel__newtp(const char *sys, const char *name) } #endif -struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config); - #ifdef HAVE_LIBTRACEEVENT struct tep_event *event_format__new(const char *sys, const char *name); #endif @@ -287,7 +288,6 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size); int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *evsel__name(struct evsel *evsel); bool evsel__name_is(struct evsel *evsel, const char *name); -const char *evsel__group_pmu_name(const struct evsel *evsel); const char *evsel__metric_id(const struct evsel *evsel); static inline bool evsel__is_tool(const struct evsel *evsel) @@ -310,7 +310,6 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); -void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); @@ -353,9 +352,19 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); -#define evsel__match(evsel, t, c) \ - (evsel->core.attr.type == PERF_TYPE_##t && \ - evsel->core.attr.config == PERF_COUNT_##c) +static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) +{ + if (evsel->core.attr.type != type) + return false; + + if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && + perf_pmus__supports_extended_type()) + return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config; + + return evsel->core.attr.config == config; +} + +#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c) static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) { @@ -460,16 +469,24 @@ static inline int evsel__group_idx(struct evsel *evsel) } /* Iterates group WITHOUT the leader. */ -#define for_each_group_member(_evsel, _leader) \ -for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \ - (_evsel) && (_evsel)->core.leader == (&_leader->core); \ - (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) +#define for_each_group_member_head(_evsel, _leader, _head) \ +for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \ + (_evsel) && &(_evsel)->core.node != (_head) && \ + (_evsel)->core.leader == &(_leader)->core; \ + (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) + +#define for_each_group_member(_evsel, _leader) \ + for_each_group_member_head(_evsel, _leader, &(_leader)->evlist->core.entries) /* Iterates group WITH the leader. */ -#define for_each_group_evsel(_evsel, _leader) \ -for ((_evsel) = _leader; \ - (_evsel) && (_evsel)->core.leader == (&_leader->core); \ - (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) +#define for_each_group_evsel_head(_evsel, _leader, _head) \ +for ((_evsel) = _leader; \ + (_evsel) && &(_evsel)->core.node != (_head) && \ + (_evsel)->core.leader == &(_leader)->core; \ + (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) + +#define for_each_group_evsel(_evsel, _leader) \ + for_each_group_evsel_head(_evsel, _leader, &(_leader)->evlist->core.entries) static inline bool evsel__has_branch_callstack(const struct evsel *evsel) { diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index cc80ec554c0a..8719b3cb5646 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -2,6 +2,7 @@ #include <inttypes.h> #include <stdio.h> #include <stdbool.h> +#include "util/evlist.h" #include "evsel.h" #include "util/evsel_fprintf.h" #include "util/event.h" @@ -116,6 +117,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, int print_ip = print_opts & EVSEL__PRINT_IP; int print_sym = print_opts & EVSEL__PRINT_SYM; int print_dso = print_opts & EVSEL__PRINT_DSO; + int print_dsoff = print_opts & EVSEL__PRINT_DSOFF; int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET; int print_oneline = print_opts & EVSEL__PRINT_ONELINE; int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; @@ -125,9 +127,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, char s = print_oneline ? ' ' : '\t'; bool first = true; - if (sample->callchain) { - struct addr_location node_al; + if (cursor == NULL) + return fprintf(fp, "<not enough memory for the callchain cursor>%s", print_oneline ? "" : "\n"); + if (sample->callchain) { callchain_cursor_commit(cursor); while (1) { @@ -157,9 +160,12 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); if (print_sym) { + struct addr_location node_al; + + addr_location__init(&node_al); printed += fprintf(fp, " "); node_al.addr = addr; - node_al.map = map; + node_al.map = map__get(map); if (print_symoffset) { printed += __symbol__fprintf_symname_offs(sym, &node_al, @@ -169,13 +175,11 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, printed += __symbol__fprintf_symname(sym, &node_al, print_unknown_as_addr, fp); } + addr_location__exit(&node_al); } - if (print_dso && (!sym || !sym->inlined)) { - printed += fprintf(fp, " ("); - printed += map__fprintf_dsoname(map, fp); - printed += fprintf(fp, ")"); - } + if (print_dso && (!sym || !sym->inlined)) + printed += map__fprintf_dsoname_dsoff(map, print_dsoff, addr, fp); if (print_srcline) printed += map__fprintf_srcline(map, addr, "\n ", fp); @@ -209,6 +213,7 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, int print_ip = print_opts & EVSEL__PRINT_IP; int print_sym = print_opts & EVSEL__PRINT_SYM; int print_dso = print_opts & EVSEL__PRINT_DSO; + int print_dsoff = print_opts & EVSEL__PRINT_DSOFF; int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET; int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; @@ -234,11 +239,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, } } - if (print_dso) { - printed += fprintf(fp, " ("); - printed += map__fprintf_dsoname(al->map, fp); - printed += fprintf(fp, ")"); - } + if (print_dso) + printed += map__fprintf_dsoname_dsoff(al->map, print_dsoff, al->addr, fp); if (print_srcline) printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h index 3093d096c29f..c8a9fac2f2dd 100644 --- a/tools/perf/util/evsel_fprintf.h +++ b/tools/perf/util/evsel_fprintf.h @@ -26,6 +26,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE #define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) #define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) #define EVSEL__PRINT_SKIP_IGNORED (1<<8) +#define EVSEL__PRINT_DSOFF (1<<9) struct addr_location; struct perf_event_attr; diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 4ce931cccb63..f04963eb6be0 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -123,20 +123,6 @@ static struct ids handle_id(struct expr_parse_ctx *ctx, char *id, * constant value using OP. Its invariant that there are no ids. If computing * ids for non-constants union the set of IDs that must be computed. */ -#define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \ - if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ - assert(LHS.ids == NULL); \ - assert(RHS.ids == NULL); \ - if (isnan(LHS.val) || isnan(RHS.val)) { \ - RESULT.val = NAN; \ - } else { \ - RESULT.val = (long)LHS.val OP (long)RHS.val; \ - } \ - RESULT.ids = NULL; \ - } else { \ - RESULT = union_expr(LHS, RHS); \ - } - #define BINARY_OP(RESULT, OP, LHS, RHS) \ if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ assert(LHS.ids == NULL); \ @@ -213,9 +199,75 @@ expr: NUMBER } | ID { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); } | SOURCE_COUNT '(' ID ')' { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); } -| expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } -| expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } -| expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); } +| expr '|' expr +{ + if (is_const($1.val) && is_const($3.val)) { + assert($1.ids == NULL); + assert($3.ids == NULL); + $$.ids = NULL; + $$.val = (fpclassify($1.val) == FP_ZERO && fpclassify($3.val) == FP_ZERO) ? 0 : 1; + } else if (is_const($1.val)) { + assert($1.ids == NULL); + if (fpclassify($1.val) == FP_ZERO) { + $$ = $3; + } else { + $$.val = 1; + $$.ids = NULL; + ids__free($3.ids); + } + } else if (is_const($3.val)) { + assert($3.ids == NULL); + if (fpclassify($3.val) == FP_ZERO) { + $$ = $1; + } else { + $$.val = 1; + $$.ids = NULL; + ids__free($1.ids); + } + } else { + $$ = union_expr($1, $3); + } +} +| expr '&' expr +{ + if (is_const($1.val) && is_const($3.val)) { + assert($1.ids == NULL); + assert($3.ids == NULL); + $$.val = (fpclassify($1.val) != FP_ZERO && fpclassify($3.val) != FP_ZERO) ? 1 : 0; + $$.ids = NULL; + } else if (is_const($1.val)) { + assert($1.ids == NULL); + if (fpclassify($1.val) != FP_ZERO) { + $$ = $3; + } else { + $$.val = 0; + $$.ids = NULL; + ids__free($3.ids); + } + } else if (is_const($3.val)) { + assert($3.ids == NULL); + if (fpclassify($3.val) != FP_ZERO) { + $$ = $1; + } else { + $$.val = 0; + $$.ids = NULL; + ids__free($1.ids); + } + } else { + $$ = union_expr($1, $3); + } +} +| expr '^' expr +{ + if (is_const($1.val) && is_const($3.val)) { + assert($1.ids == NULL); + assert($3.ids == NULL); + $$.val = (fpclassify($1.val) == FP_ZERO) != (fpclassify($3.val) == FP_ZERO) ? 1 : 0; + $$.ids = NULL; + } else { + $$ = union_expr($1, $3); + } +} | expr '<' expr { BINARY_OP($$, <, $1, $3); } | expr '>' expr { BINARY_OP($$, >, $1, $3); } | expr '+' expr { BINARY_OP($$, +, $1, $3); } diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c index aa5dcc56b2ac..8588b3e35e00 100644 --- a/tools/perf/util/genelf_debug.c +++ b/tools/perf/util/genelf_debug.c @@ -337,6 +337,9 @@ static void emit_lineno_info(struct buffer_ext *be, { size_t i; + /* as described in the jitdump format */ + const char repeated_name_marker[] = {'\xff', '\0'}; + /* * Machine state at start of a statement program * address = 0 @@ -363,7 +366,8 @@ static void emit_lineno_info(struct buffer_ext *be, /* * check if filename changed, if so add it */ - if (!cur_filename || strcmp(cur_filename, ent->name)) { + if ((!cur_filename || strcmp(cur_filename, ent->name)) && + strcmp(repeated_name_marker, ent->name)) { emit_lne_define_filename(be, ent->name); cur_filename = ent->name; emit_set_file(be, ++cur_file_idx); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 276870221ce0..52fbf526fe74 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -24,6 +24,7 @@ #include <bpf/libbpf.h> #endif #include <perf/cpumap.h> +#include <tools/libc_compat.h> // reallocarray #include "dso.h" #include "evlist.h" @@ -37,6 +38,7 @@ #include "debug.h" #include "cpumap.h" #include "pmu.h" +#include "pmus.h" #include "vdso.h" #include "strbuf.h" #include "build-id.h" @@ -51,7 +53,6 @@ #include "bpf-event.h" #include "bpf-utils.h" #include "clockid.h" -#include "pmu-hybrid.h" #include <linux/ctype.h> #include <internal/lib.h> @@ -745,7 +746,7 @@ static int write_pmu_mappings(struct feat_fd *ff, * Do a first pass to count number of pmu to avoid lseek so this * works in pipe mode as well. */ - while ((pmu = perf_pmu__scan(pmu))) { + while ((pmu = perf_pmus__scan(pmu))) { if (!pmu->name) continue; pmu_num++; @@ -755,7 +756,7 @@ static int write_pmu_mappings(struct feat_fd *ff, if (ret < 0) return ret; - while ((pmu = perf_pmu__scan(pmu))) { + while ((pmu = perf_pmus__scan(pmu))) { if (!pmu->name) continue; @@ -1213,38 +1214,54 @@ static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c) fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map); } -#define MAX_CACHE_LVL 4 - -static int build_caches(struct cpu_cache_level caches[], u32 *cntp) +/* + * Build caches levels for a particular CPU from the data in + * /sys/devices/system/cpu/cpu<cpu>/cache/ + * The cache level data is stored in caches[] from index at + * *cntp. + */ +int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp) { - u32 i, cnt = 0; - u32 nr, cpu; u16 level; - nr = cpu__max_cpu().cpu; + for (level = 0; level < MAX_CACHE_LVL; level++) { + struct cpu_cache_level c; + int err; + u32 i; - for (cpu = 0; cpu < nr; cpu++) { - for (level = 0; level < MAX_CACHE_LVL; level++) { - struct cpu_cache_level c; - int err; + err = cpu_cache_level__read(&c, cpu, level); + if (err < 0) + return err; - err = cpu_cache_level__read(&c, cpu, level); - if (err < 0) - return err; + if (err == 1) + break; - if (err == 1) + for (i = 0; i < *cntp; i++) { + if (cpu_cache_level__cmp(&c, &caches[i])) break; + } - for (i = 0; i < cnt; i++) { - if (cpu_cache_level__cmp(&c, &caches[i])) - break; - } + if (i == *cntp) { + caches[*cntp] = c; + *cntp = *cntp + 1; + } else + cpu_cache_level__free(&c); + } - if (i == cnt) - caches[cnt++] = c; - else - cpu_cache_level__free(&c); - } + return 0; +} + +static int build_caches(struct cpu_cache_level caches[], u32 *cntp) +{ + u32 nr, cpu, cnt = 0; + + nr = cpu__max_cpu().cpu; + + for (cpu = 0; cpu < nr; cpu++) { + int ret = build_caches_for_cpu(cpu, caches, &cnt); + + if (ret) + return ret; } *cntp = cnt; return 0; @@ -1372,6 +1389,14 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) return 0; } +static void memory_node__delete_nodes(struct memory_node *nodesp, u64 cnt) +{ + for (u64 i = 0; i < cnt; i++) + bitmap_free(nodesp[i].set); + + free(nodesp); +} + static int memory_node__sort(const void *a, const void *b) { const struct memory_node *na = a; @@ -1380,13 +1405,14 @@ static int memory_node__sort(const void *a, const void *b) return na->node - nb->node; } -static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) +static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) { char path[PATH_MAX]; struct dirent *ent; DIR *dir; - u64 cnt = 0; int ret = 0; + size_t cnt = 0, size = 0; + struct memory_node *nodes = NULL; scnprintf(path, PATH_MAX, "%s/devices/system/node/", sysfs__mountpoint()); @@ -1410,26 +1436,32 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) if (r != 1) continue; - if (WARN_ONCE(cnt >= size, - "failed to write MEM_TOPOLOGY, way too many nodes\n")) { - closedir(dir); - return -1; - } + if (cnt >= size) { + struct memory_node *new_nodes = + reallocarray(nodes, cnt + 4, sizeof(*nodes)); + if (!new_nodes) { + pr_err("Failed to write MEM_TOPOLOGY, size %zd nodes\n", size); + ret = -ENOMEM; + goto out; + } + nodes = new_nodes; + size += 4; + } ret = memory_node__read(&nodes[cnt++], idx); } - - *cntp = cnt; +out: closedir(dir); - - if (!ret) + if (!ret) { + *cntp = cnt; + *nodesp = nodes; qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort); + } else + memory_node__delete_nodes(nodes, cnt); return ret; } -#define MAX_MEMORY_NODES 2000 - /* * The MEM_TOPOLOGY holds physical memory map for every * node in system. The format of data is as follows: @@ -1448,8 +1480,8 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) static int write_mem_topology(struct feat_fd *ff __maybe_unused, struct evlist *evlist __maybe_unused) { - static struct memory_node nodes[MAX_MEMORY_NODES]; - u64 bsize, version = 1, i, nr; + struct memory_node *nodes = NULL; + u64 bsize, version = 1, i, nr = 0; int ret; ret = sysfs__read_xll("devices/system/memory/block_size_bytes", @@ -1457,7 +1489,7 @@ static int write_mem_topology(struct feat_fd *ff __maybe_unused, if (ret) return ret; - ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr); + ret = build_mem_topology(&nodes, &nr); if (ret) return ret; @@ -1492,6 +1524,7 @@ static int write_mem_topology(struct feat_fd *ff __maybe_unused, } out: + memory_node__delete_nodes(nodes, nr); return ret; } @@ -1551,7 +1584,7 @@ static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu, static int write_cpu_pmu_caps(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - struct perf_pmu *cpu_pmu = perf_pmu__find("cpu"); + struct perf_pmu *cpu_pmu = perf_pmus__find("cpu"); int ret; if (!cpu_pmu) @@ -1571,7 +1604,7 @@ static int write_pmu_caps(struct feat_fd *ff, int nr_pmu = 0; int ret; - while ((pmu = perf_pmu__scan(pmu))) { + while ((pmu = perf_pmus__scan(pmu))) { if (!pmu->name || !strcmp(pmu->name, "cpu") || perf_pmu__caps_parse(pmu) <= 0) continue; @@ -1589,17 +1622,18 @@ static int write_pmu_caps(struct feat_fd *ff, * Write hybrid pmu caps first to maintain compatibility with * older perf tool. */ - pmu = NULL; - perf_pmu__for_each_hybrid_pmu(pmu) { - ret = __write_pmu_caps(ff, pmu, true); - if (ret < 0) - return ret; + if (perf_pmus__num_core_pmus() > 1) { + pmu = NULL; + while ((pmu = perf_pmus__scan_core(pmu))) { + ret = __write_pmu_caps(ff, pmu, true); + if (ret < 0) + return ret; + } } pmu = NULL; - while ((pmu = perf_pmu__scan(pmu))) { - if (!pmu->name || !strcmp(pmu->name, "cpu") || - !pmu->nr_caps || perf_pmu__is_hybrid(pmu->name)) + while ((pmu = perf_pmus__scan(pmu))) { + if (pmu->is_core || !pmu->nr_caps) continue; ret = __write_pmu_caps(ff, pmu, true); @@ -2810,7 +2844,7 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) i = nr = 0; evlist__for_each_entry(session->evlist, evsel) { - if (evsel->core.idx == (int) desc[i].leader_idx) { + if (i < nr_groups && evsel->core.idx == (int) desc[i].leader_idx) { evsel__set_leader(evsel, evsel); /* {anon_group} is a dummy name */ if (strcmp(desc[i].name, "{anon_group}")) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 59eeb4a32ac5..7c16a250e738 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -179,7 +179,11 @@ int do_write(struct feat_fd *fd, const void *buf, size_t size); int write_padded(struct feat_fd *fd, const void *bf, size_t count, size_t count_aligned); +#define MAX_CACHE_LVL 4 + int is_cpu_online(unsigned int cpu); +int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp); + /* * arch specific callback */ diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index ab9e16123626..eab99ea6ac01 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -92,6 +92,7 @@ const char *help_unknown_cmd(const char *cmd) main_cmds.names[0] = NULL; clean_cmdnames(&main_cmds); + clean_cmdnames(&other_cmds); fprintf(stderr, "WARNING: You called a perf program named '%s', " "which does not exist.\n" "Continuing under the assumption that you meant '%s'\n", @@ -114,5 +115,7 @@ const char *help_unknown_cmd(const char *cmd) fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); } end: + clean_cmdnames(&main_cmds); + clean_cmdnames(&other_cmds); exit(1); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 3c9301a26dfc..3dc8a4968beb 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -450,6 +450,7 @@ static int hist_entry__init(struct hist_entry *he, memset(&he->stat, 0, sizeof(he->stat)); } + he->ms.maps = maps__get(he->ms.maps); he->ms.map = map__get(he->ms.map); if (he->branch_info) { @@ -483,7 +484,7 @@ static int hist_entry__init(struct hist_entry *he, goto err_infos; } - if (he->srcline) { + if (he->srcline && he->srcline != SRCLINE_UNKNOWN) { he->srcline = strdup(he->srcline); if (he->srcline == NULL) goto err_rawdata; @@ -497,7 +498,7 @@ static int hist_entry__init(struct hist_entry *he, } INIT_LIST_HEAD(&he->pairs.node); - thread__get(he->thread); + he->thread = thread__get(he->thread); he->hroot_in = RB_ROOT_CACHED; he->hroot_out = RB_ROOT_CACHED; @@ -523,6 +524,7 @@ err_infos: map__put(he->mem_info->daddr.ms.map); } err: + maps__zput(he->ms.maps); map__zput(he->ms.map); zfree(&he->stat_acc); return -ENOMEM; @@ -588,7 +590,7 @@ static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *entry, - struct addr_location *al, + const struct addr_location *al, bool sample_self) { struct rb_node **p; @@ -611,7 +613,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, * keys were used. */ cmp = hist_entry__cmp(he, entry); - if (!cmp) { if (sample_self) { he_stat__add_period(&he->stat, period); @@ -927,8 +928,10 @@ iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) if (iter->curr >= iter->total) return 0; - al->maps = bi[i].to.ms.maps; - al->map = bi[i].to.ms.map; + maps__put(al->maps); + al->maps = maps__get(bi[i].to.ms.maps); + map__put(al->map); + al->map = map__get(bi[i].to.ms.map); al->sym = bi[i].to.ms.sym; al->addr = bi[i].to.addr; return 1; @@ -1026,15 +1029,19 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { struct hist_entry **he_cache; + struct callchain_cursor *cursor = get_tls_callchain_cursor(); + + if (cursor == NULL) + return -ENOMEM; - callchain_cursor_commit(&callchain_cursor); + callchain_cursor_commit(cursor); /* * This is for detecting cycles or recursions so that they're * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1)); + he_cache = malloc(sizeof(*he_cache) * (cursor->nr + 1)); if (he_cache == NULL) return -ENOMEM; @@ -1069,7 +1076,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, * We need to re-initialize the cursor since callchain_append() * advanced the cursor to the end. */ - callchain_cursor_commit(&callchain_cursor); + callchain_cursor_commit(get_tls_callchain_cursor()); hists__inc_nr_samples(hists, he->filtered); @@ -1082,7 +1089,7 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter, { struct callchain_cursor_node *node; - node = callchain_cursor_current(&callchain_cursor); + node = callchain_cursor_current(get_tls_callchain_cursor()); if (node == NULL) return 0; @@ -1128,12 +1135,15 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, .raw_size = sample->raw_size, }; int i; - struct callchain_cursor cursor; + struct callchain_cursor cursor, *tls_cursor = get_tls_callchain_cursor(); bool fast = hists__has(he_tmp.hists, sym); - callchain_cursor_snapshot(&cursor, &callchain_cursor); + if (tls_cursor == NULL) + return -ENOMEM; + + callchain_cursor_snapshot(&cursor, tls_cursor); - callchain_cursor_advance(&callchain_cursor); + callchain_cursor_advance(tls_cursor); /* * Check if there's duplicate entries in the callchain. @@ -1219,7 +1229,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (al) alm = map__get(al->map); - err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, + err = sample__resolve_callchain(iter->sample, get_tls_callchain_cursor(), &iter->parent, iter->evsel, al, max_stack_depth); if (err) { map__put(alm); @@ -1307,13 +1317,14 @@ void hist_entry__delete(struct hist_entry *he) struct hist_entry_ops *ops = he->ops; thread__zput(he->thread); + maps__zput(he->ms.maps); map__zput(he->ms.map); if (he->branch_info) { map__zput(he->branch_info->from.ms.map); map__zput(he->branch_info->to.ms.map); - free_srcline(he->branch_info->srcline_from); - free_srcline(he->branch_info->srcline_to); + zfree_srcline(&he->branch_info->srcline_from); + zfree_srcline(&he->branch_info->srcline_to); zfree(&he->branch_info); } @@ -1331,7 +1342,7 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->res_samples); zfree(&he->stat_acc); - free_srcline(he->srcline); + zfree_srcline(&he->srcline); if (he->srcfile && he->srcfile[0]) zfree(&he->srcfile); free_callchain(he->callchain); @@ -1564,8 +1575,13 @@ static int hists__hierarchy_insert_entry(struct hists *hists, if (hist_entry__has_callchains(new_he) && symbol_conf.use_callchain) { - callchain_cursor_reset(&callchain_cursor); - if (callchain_merge(&callchain_cursor, + struct callchain_cursor *cursor = get_tls_callchain_cursor(); + + if (cursor == NULL) + return -1; + + callchain_cursor_reset(cursor); + if (callchain_merge(cursor, new_he->callchain, he->callchain) < 0) ret = -1; @@ -1606,11 +1622,15 @@ static int hists__collapse_insert_entry(struct hists *hists, he_stat__add_stat(iter->stat_acc, he->stat_acc); if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { - callchain_cursor_reset(&callchain_cursor); - if (callchain_merge(&callchain_cursor, - iter->callchain, - he->callchain) < 0) - ret = -1; + struct callchain_cursor *cursor = get_tls_callchain_cursor(); + + if (cursor != NULL) { + callchain_cursor_reset(cursor); + if (callchain_merge(cursor, iter->callchain, he->callchain) < 0) + ret = -1; + } else { + ret = 0; + } } hist_entry__delete(he); return ret; @@ -2122,7 +2142,7 @@ static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he) { if (hists->thread_filter != NULL && - he->thread != hists->thread_filter) { + RC_CHK_ACCESS(he->thread) != RC_CHK_ACCESS(hists->thread_filter)) { he->filtered |= (1 << HIST_FILTER__THREAD); return true; } @@ -2778,12 +2798,12 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh if (hists__has(hists, thread)) { printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", - (thread->comm_set ? thread__comm_str(thread) : ""), - thread->tid); + (thread__comm_set(thread) ? thread__comm_str(thread) : ""), + thread__tid(thread)); } else { printed += scnprintf(bf + printed, size - printed, ", Thread: %s", - (thread->comm_set ? thread__comm_str(thread) : "")); + (thread__comm_set(thread) ? thread__comm_str(thread) : "")); } } if (dso) diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 2c8147a62203..ec1b3bd9f530 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -456,7 +456,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) thread = machine__find_thread(btsq->bts->machine, -1, btsq->tid); if (thread) - btsq->pid = thread->pid_; + btsq->pid = thread__pid(thread); } else { thread = machine__findnew_thread(btsq->bts->machine, btsq->pid, btsq->tid); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index fe893c9bab3f..dbf0bc71a63b 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -754,13 +754,15 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, struct addr_location al; unsigned char buf[INTEL_PT_INSN_BUF_SZ]; ssize_t len; - int x86_64; + int x86_64, ret = 0; u8 cpumode; u64 offset, start_offset, start_ip; u64 insn_cnt = 0; bool one_map = true; bool nr; + + addr_location__init(&al); intel_pt_insn->length = 0; if (to_ip && *ip == to_ip) @@ -773,19 +775,22 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (ptq->pt->have_guest_sideband) { if (!ptq->guest_machine || ptq->guest_machine_pid != ptq->pid) { intel_pt_log("ERROR: guest sideband but no guest machine\n"); - return -EINVAL; + ret = -EINVAL; + goto out_ret; } } else if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) || intel_pt_get_guest(ptq)) { intel_pt_log("ERROR: no guest machine\n"); - return -EINVAL; + ret = -EINVAL; + goto out_ret; } machine = ptq->guest_machine; thread = ptq->guest_thread; if (!thread) { if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) { intel_pt_log("ERROR: no guest thread\n"); - return -EINVAL; + ret = -EINVAL; + goto out_ret; } thread = ptq->unknown_guest_thread; } @@ -794,7 +799,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (!thread) { if (cpumode != PERF_RECORD_MISC_KERNEL) { intel_pt_log("ERROR: no thread\n"); - return -EINVAL; + ret = -EINVAL; + goto out_ret; } thread = ptq->pt->unknown_thread; } @@ -808,13 +814,17 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, intel_pt_log("ERROR: thread has no dso for %#" PRIx64 "\n", *ip); else intel_pt_log("ERROR: thread has no map for %#" PRIx64 "\n", *ip); - return -EINVAL; + addr_location__exit(&al); + ret = -EINVAL; + goto out_ret; } dso = map__dso(al.map); if (dso->data.status == DSO_DATA_STATUS_ERROR && - dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) - return -ENOENT; + dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) { + ret = -ENOENT; + goto out_ret; + } offset = map__map_ip(al.map, *ip); @@ -833,7 +843,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, intel_pt_insn->rel = e->rel; memcpy(intel_pt_insn->buf, e->insn, INTEL_PT_INSN_BUF_SZ); intel_pt_log_insn_no_data(intel_pt_insn, *ip); - return 0; + ret = 0; + goto out_ret; } } @@ -854,11 +865,14 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, offset); if (intel_pt_enable_logging) dso__fprintf(dso, intel_pt_log_fp()); - return -EINVAL; + ret = -EINVAL; + goto out_ret; } - if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) - return -EINVAL; + if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) { + ret = -EINVAL; + goto out_ret; + } intel_pt_log_insn(intel_pt_insn, *ip); @@ -909,17 +923,20 @@ out: e = intel_pt_cache_lookup(map__dso(al.map), machine, start_offset); if (e) - return 0; + goto out_ret; } /* Ignore cache errors */ intel_pt_cache_add(map__dso(al.map), machine, start_offset, insn_cnt, *ip - start_ip, intel_pt_insn); - return 0; +out_ret: + addr_location__exit(&al); + return ret; out_no_cache: *insn_cnt_ptr = insn_cnt; + addr_location__exit(&al); return 0; } @@ -968,6 +985,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) struct addr_location al; u8 cpumode; u64 offset; + int res; if (ptq->state->to_nr) { if (intel_pt_guest_kernel_ip(ip)) @@ -984,12 +1002,15 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) if (!thread) return -EINVAL; + addr_location__init(&al); if (!thread__find_map(thread, cpumode, ip, &al) || !map__dso(al.map)) return -EINVAL; offset = map__map_ip(al.map, ip); - return intel_pt_match_pgd_ip(ptq->pt, ip, offset, map__dso(al.map)->long_name); + res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, map__dso(al.map)->long_name); + addr_location__exit(&al); + return res; } static bool intel_pt_pgd_ip(uint64_t ip, void *data) @@ -1259,6 +1280,7 @@ static void intel_pt_add_br_stack(struct intel_pt *pt, pt->kernel_start); sample->branch_stack = pt->br_stack; + thread__put(thread); } /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ @@ -1428,13 +1450,13 @@ static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq) ptq->guest_machine = machine; } - vcpu = ptq->thread ? ptq->thread->guest_cpu : -1; + vcpu = ptq->thread ? thread__guest_cpu(ptq->thread) : -1; if (vcpu < 0) return -1; tid = machine__get_current_tid(machine, vcpu); - if (ptq->guest_thread && ptq->guest_thread->tid != tid) + if (ptq->guest_thread && thread__tid(ptq->guest_thread) != tid) thread__zput(ptq->guest_thread); if (!ptq->guest_thread) { @@ -1444,7 +1466,7 @@ static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq) } ptq->guest_machine_pid = machine_pid; - ptq->guest_pid = ptq->guest_thread->pid_; + ptq->guest_pid = thread__pid(ptq->guest_thread); ptq->guest_tid = tid; ptq->vcpu = vcpu; @@ -1467,9 +1489,9 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); if (ptq->thread) { - ptq->pid = ptq->thread->pid_; + ptq->pid = thread__pid(ptq->thread); if (queue->cpu == -1) - ptq->cpu = ptq->thread->cpu; + ptq->cpu = thread__cpu(ptq->thread); } if (pt->have_guest_sideband && intel_pt_get_guest_from_sideband(ptq)) { @@ -3074,7 +3096,7 @@ static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq, if (ptq->pid == -1) { ptq->thread = machine__find_thread(m, -1, ptq->tid); if (ptq->thread) - ptq->pid = ptq->thread->pid_; + ptq->pid = thread__pid(ptq->thread); return; } @@ -3372,20 +3394,22 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) /* Assume text poke begins in a basic block no more than 4096 bytes */ int cnt = 4096 + event->text_poke.new_len; struct thread *thread = pt->unknown_thread; - struct addr_location al = { .map = NULL }; + struct addr_location al; struct machine *machine = pt->machine; struct intel_pt_cache_entry *e; u64 offset; + int ret = 0; + addr_location__init(&al); if (!event->text_poke.new_len) - return 0; + goto out; for (; cnt; cnt--, addr--) { struct dso *dso; if (intel_pt_find_map(thread, cpumode, addr, &al)) { if (addr < event->text_poke.addr) - return 0; + goto out; continue; } @@ -3406,15 +3430,16 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) * branch instruction before the text poke address. */ if (e->branch != INTEL_PT_BR_NO_BRANCH) - return 0; + goto out; } else { intel_pt_cache_invalidate(dso, machine, offset); intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", dso->long_name, addr); } } - - return 0; +out: + addr_location__exit(&al); + return ret; } static int intel_pt_process_event(struct perf_session *session, @@ -3556,6 +3581,7 @@ static void intel_pt_free(struct perf_session *session) zfree(&pt->chain); zfree(&pt->filter); zfree(&pt->time_ranges); + zfree(&pt->br_stack); free(pt); } @@ -4311,14 +4337,6 @@ int intel_pt_process_auxtrace_info(union perf_event *event, goto err_free_queues; } - /* - * Since this thread will not be kept in any rbtree not in a - * list, initialize its list node so that at thread__put() the - * current thread lifetime assumption is kept and we don't segfault - * at list_del_init(). - */ - INIT_LIST_HEAD(&pt->unknown_thread->node); - err = thread__set_comm(pt->unknown_thread, "unknown", 0); if (err) goto err_delete_thread; diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 28e49502db5e..6b2b96c16ccd 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -799,17 +799,21 @@ static void jit_add_pid(struct machine *machine, pid_t pid) return; } - thread->priv = (void *)1; + thread__set_priv(thread, (void *)true); + thread__put(thread); } static bool jit_has_pid(struct machine *machine, pid_t pid) { struct thread *thread = machine__find_thread(machine, pid, pid); + void *priv; if (!thread) - return 0; + return false; - return (bool)thread->priv; + priv = thread__priv(thread); + thread__put(thread); + return (bool)priv; } int @@ -833,7 +837,7 @@ jit_process(struct perf_session *session, return 0; } - nsi = nsinfo__get(thread->nsinfo); + nsi = nsinfo__get(thread__nsinfo(thread)); thread__put(thread); /* diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 9e02e19c1b7a..4e62843d51b7 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -43,7 +43,8 @@ #include <linux/string.h> #include <linux/zalloc.h> -static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); +static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd, + struct thread *th, bool lock); static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip); static struct dso *machine__kernel_dso(struct machine *machine) @@ -72,6 +73,22 @@ static void machine__threads_init(struct machine *machine) } } +static int thread_rb_node__cmp_tid(const void *key, const struct rb_node *nd) +{ + int to_find = (int) *((pid_t *)key); + + return to_find - (int)thread__tid(rb_entry(nd, struct thread_rb_node, rb_node)->thread); +} + +static struct thread_rb_node *thread_rb_node__find(const struct thread *th, + struct rb_root *tree) +{ + pid_t to_find = thread__tid(th); + struct rb_node *nd = rb_find(&to_find, tree, thread_rb_node__cmp_tid); + + return rb_entry(nd, struct thread_rb_node, rb_node); +} + static int machine__set_mmap_name(struct machine *machine) { if (machine__is_host(machine)) @@ -214,10 +231,10 @@ void machine__delete_threads(struct machine *machine) down_write(&threads->lock); nd = rb_first_cached(&threads->entries); while (nd) { - struct thread *t = rb_entry(nd, struct thread, rb_node); + struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node); nd = rb_next(nd); - __machine__remove_thread(machine, t, false); + __machine__remove_thread(machine, trb, trb->thread, false); } up_write(&threads->lock); } @@ -231,7 +248,7 @@ void machine__exit(struct machine *machine) return; machine__destroy_kernel_maps(machine); - maps__delete(machine->kmaps); + maps__zput(machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); @@ -239,19 +256,9 @@ void machine__exit(struct machine *machine) zfree(&machine->current_tid); zfree(&machine->kallsyms_filename); + machine__delete_threads(machine); for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - struct thread *thread, *n; - /* - * Forget about the dead, at this point whatever threads were - * left in the dead lists better have a reference count taken - * by who is using them, and then, when they drop those references - * and it finally hits zero, thread__put() will check and see that - * its not in the dead threads list and will not try to remove it - * from there, just calling thread__delete() straight away. - */ - list_for_each_entry_safe(thread, n, &threads->dead, node) - list_del_init(&thread->node); exit_rwsem(&threads->lock); } @@ -435,7 +442,7 @@ static struct thread *findnew_guest_code(struct machine *machine, return NULL; /* Assume maps are set up if there are any */ - if (maps__nr_maps(thread->maps)) + if (maps__nr_maps(thread__maps(thread))) return thread; host_thread = machine__find_thread(host_machine, -1, pid); @@ -448,7 +455,7 @@ static struct thread *findnew_guest_code(struct machine *machine, * Guest code can be found in hypervisor process at the same address * so copy host maps. */ - err = maps__clone(thread, host_thread->maps); + err = maps__clone(thread, thread__maps(host_thread)); thread__put(host_thread); if (err) goto out_err; @@ -513,45 +520,45 @@ static void machine__update_thread_pid(struct machine *machine, { struct thread *leader; - if (pid == th->pid_ || pid == -1 || th->pid_ != -1) + if (pid == thread__pid(th) || pid == -1 || thread__pid(th) != -1) return; - th->pid_ = pid; + thread__set_pid(th, pid); - if (th->pid_ == th->tid) + if (thread__pid(th) == thread__tid(th)) return; - leader = __machine__findnew_thread(machine, th->pid_, th->pid_); + leader = __machine__findnew_thread(machine, thread__pid(th), thread__pid(th)); if (!leader) goto out_err; - if (!leader->maps) - leader->maps = maps__new(machine); + if (!thread__maps(leader)) + thread__set_maps(leader, maps__new(machine)); - if (!leader->maps) + if (!thread__maps(leader)) goto out_err; - if (th->maps == leader->maps) - return; + if (thread__maps(th) == thread__maps(leader)) + goto out_put; - if (th->maps) { + if (thread__maps(th)) { /* * Maps are created from MMAP events which provide the pid and * tid. Consequently there never should be any maps on a thread * with an unknown pid. Just print an error if there are. */ - if (!maps__empty(th->maps)) + if (!maps__empty(thread__maps(th))) pr_err("Discarding thread maps for %d:%d\n", - th->pid_, th->tid); - maps__put(th->maps); + thread__pid(th), thread__tid(th)); + maps__put(thread__maps(th)); } - th->maps = maps__get(leader->maps); + thread__set_maps(th, maps__get(thread__maps(leader))); out_put: thread__put(leader); return; out_err: - pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid); + pr_err("Failed to join map groups for %d:%d\n", thread__pid(th), thread__tid(th)); goto out_put; } @@ -568,11 +575,11 @@ __threads__get_last_match(struct threads *threads, struct machine *machine, th = threads->last_match; if (th != NULL) { - if (th->tid == tid) { + if (thread__tid(th) == tid) { machine__update_thread_pid(machine, th, pid); return thread__get(th); } - + thread__put(threads->last_match); threads->last_match = NULL; } @@ -594,7 +601,8 @@ threads__get_last_match(struct threads *threads, struct machine *machine, static void __threads__set_last_match(struct threads *threads, struct thread *th) { - threads->last_match = th; + thread__put(threads->last_match); + threads->last_match = thread__get(th); } static void @@ -616,6 +624,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, struct rb_node **p = &threads->entries.rb_root.rb_node; struct rb_node *parent = NULL; struct thread *th; + struct thread_rb_node *nd; bool leftmost = true; th = threads__get_last_match(threads, machine, pid, tid); @@ -624,15 +633,15 @@ static struct thread *____machine__findnew_thread(struct machine *machine, while (*p != NULL) { parent = *p; - th = rb_entry(parent, struct thread, rb_node); + th = rb_entry(parent, struct thread_rb_node, rb_node)->thread; - if (th->tid == tid) { + if (thread__tid(th) == tid) { threads__set_last_match(threads, th); machine__update_thread_pid(machine, th, pid); return thread__get(th); } - if (tid < th->tid) + if (tid < thread__tid(th)) p = &(*p)->rb_left; else { p = &(*p)->rb_right; @@ -644,32 +653,40 @@ static struct thread *____machine__findnew_thread(struct machine *machine, return NULL; th = thread__new(pid, tid); - if (th != NULL) { - rb_link_node(&th->rb_node, parent, p); - rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost); + if (th == NULL) + return NULL; - /* - * We have to initialize maps separately after rb tree is updated. - * - * The reason is that we call machine__findnew_thread - * within thread__init_maps to find the thread - * leader and that would screwed the rb tree. - */ - if (thread__init_maps(th, machine)) { - rb_erase_cached(&th->rb_node, &threads->entries); - RB_CLEAR_NODE(&th->rb_node); - thread__put(th); - return NULL; - } - /* - * It is now in the rbtree, get a ref - */ - thread__get(th); - threads__set_last_match(threads, th); - ++threads->nr; + nd = malloc(sizeof(*nd)); + if (nd == NULL) { + thread__put(th); + return NULL; } + nd->thread = th; - return th; + rb_link_node(&nd->rb_node, parent, p); + rb_insert_color_cached(&nd->rb_node, &threads->entries, leftmost); + /* + * We have to initialize maps separately after rb tree is updated. + * + * The reason is that we call machine__findnew_thread within + * thread__init_maps to find the thread leader and that would screwed + * the rb tree. + */ + if (thread__init_maps(th, machine)) { + pr_err("Thread init failed thread %d\n", pid); + rb_erase_cached(&nd->rb_node, &threads->entries); + RB_CLEAR_NODE(&nd->rb_node); + free(nd); + thread__put(th); + return NULL; + } + /* + * It is now in the rbtree, get a ref + */ + threads__set_last_match(threads, th); + ++threads->nr; + + return thread__get(th); } struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) @@ -1120,7 +1137,7 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) for (nd = rb_first_cached(&threads->entries); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); + struct thread *pos = rb_entry(nd, struct thread_rb_node, rb_node)->thread; ret += thread__fprintf(pos, fp); } @@ -1780,7 +1797,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct extra_kernel_map *xm, struct build_id *bid) { - struct map *map; enum dso_space_type dso_space; bool is_kernel_mmap; const char *mmap_name = machine->mmap_name; @@ -1806,8 +1822,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, } if (xm->name[0] == '/' || (!is_kernel_mmap && xm->name[0] == '[')) { - map = machine__addnew_module_map(machine, xm->start, - xm->name); + struct map *map = machine__addnew_module_map(machine, xm->start, xm->name); + if (map == NULL) goto out_problem; @@ -1816,6 +1832,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (build_id__is_defined(bid)) dso__set_build_id(map__dso(map), bid); + map__put(map); } else if (is_kernel_mmap) { const char *symbol_name = xm->name + strlen(mmap_name); /* @@ -1851,7 +1868,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, continue; - kernel = dso; + kernel = dso__get(dso); break; } @@ -1896,6 +1913,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, */ dso__load(kernel, machine__kernel_map(machine)); } + dso__put(kernel); } else if (perf_event__is_extra_kernel_mmap(machine, xm)) { return machine__process_extra_kernel_map(machine, xm); } @@ -2031,34 +2049,28 @@ out_problem: return 0; } -static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) +static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd, + struct thread *th, bool lock) { - struct threads *threads = machine__threads(machine, th->tid); + struct threads *threads = machine__threads(machine, thread__tid(th)); + + if (!nd) + nd = thread_rb_node__find(th, &threads->entries.rb_root); - if (threads->last_match == th) + if (threads->last_match && RC_CHK_ACCESS(threads->last_match) == RC_CHK_ACCESS(th)) threads__set_last_match(threads, NULL); if (lock) down_write(&threads->lock); - BUG_ON(refcount_read(&th->refcnt) == 0); + BUG_ON(refcount_read(thread__refcnt(th)) == 0); - rb_erase_cached(&th->rb_node, &threads->entries); - RB_CLEAR_NODE(&th->rb_node); + thread__put(nd->thread); + rb_erase_cached(&nd->rb_node, &threads->entries); + RB_CLEAR_NODE(&nd->rb_node); --threads->nr; - /* - * Move it first to the dead_threads list, then drop the reference, - * if this is the last reference, then the thread__delete destructor - * will be called and we will remove it from the dead_threads list. - */ - list_add_tail(&th->node, &threads->dead); - /* - * We need to do the put here because if this is the last refcount, - * then we will be touching the threads->dead head when removing the - * thread. - */ - thread__put(th); + free(nd); if (lock) up_write(&threads->lock); @@ -2066,7 +2078,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, void machine__remove_thread(struct machine *machine, struct thread *th) { - return __machine__remove_thread(machine, th, true); + return __machine__remove_thread(machine, NULL, th, true); } int machine__process_fork_event(struct machine *machine, union perf_event *event, @@ -2090,9 +2102,9 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event * (fork) event that would have removed the thread was lost. Assume the * latter case and continue on as best we can. */ - if (parent->pid_ != (pid_t)event->fork.ppid) { + if (thread__pid(parent) != (pid_t)event->fork.ppid) { dump_printf("removing erroneous parent thread %d/%d\n", - parent->pid_, parent->tid); + thread__pid(parent), thread__tid(parent)); machine__remove_thread(machine, parent); thread__put(parent); parent = machine__findnew_thread(machine, event->fork.ppid, @@ -2145,10 +2157,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); - if (thread != NULL) { - thread__exited(thread); + if (thread != NULL) thread__put(thread); - } return 0; } @@ -2213,7 +2223,7 @@ static void ip__resolve_ams(struct thread *thread, { struct addr_location al; - memset(&al, 0, sizeof(al)); + addr_location__init(&al); /* * We cannot use the header.misc hint to determine whether a * branch stack address is user, kernel, guest, hypervisor. @@ -2226,11 +2236,12 @@ static void ip__resolve_ams(struct thread *thread, ams->addr = ip; ams->al_addr = al.addr; ams->al_level = al.level; - ams->ms.maps = al.maps; + ams->ms.maps = maps__get(al.maps); ams->ms.sym = al.sym; - ams->ms.map = al.map; + ams->ms.map = map__get(al.map); ams->phys_addr = 0; ams->data_page_size = 0; + addr_location__exit(&al); } static void ip__resolve_data(struct thread *thread, @@ -2239,18 +2250,19 @@ static void ip__resolve_data(struct thread *thread, { struct addr_location al; - memset(&al, 0, sizeof(al)); + addr_location__init(&al); thread__find_symbol(thread, m, addr, &al); ams->addr = addr; ams->al_addr = al.addr; ams->al_level = al.level; - ams->ms.maps = al.maps; + ams->ms.maps = maps__get(al.maps); ams->ms.sym = al.sym; - ams->ms.map = al.map; + ams->ms.map = map__get(al.map); ams->phys_addr = phys_addr; ams->data_page_size = daddr_page_size; + addr_location__exit(&al); } struct mem_info *sample__resolve_mem(struct perf_sample *sample, @@ -2309,12 +2321,13 @@ static int add_callchain_ip(struct thread *thread, struct iterations *iter, u64 branch_from) { - struct map_symbol ms; + struct map_symbol ms = {}; struct addr_location al; - int nr_loop_iter = 0, err; + int nr_loop_iter = 0, err = 0; u64 iter_cycles = 0; const char *srcline = NULL; + addr_location__init(&al); al.filtered = 0; al.sym = NULL; al.srcline = NULL; @@ -2340,9 +2353,10 @@ static int add_callchain_ip(struct thread *thread, * Discard all. */ callchain_cursor_reset(cursor); - return 1; + err = 1; + goto out; } - return 0; + goto out; } thread__find_symbol(thread, *cpumode, ip, &al); } @@ -2355,31 +2369,34 @@ static int add_callchain_ip(struct thread *thread, symbol__match_regex(al.sym, &ignore_callees_regex)) { /* Treat this symbol as the root, forgetting its callees. */ - *root_al = al; + addr_location__copy(root_al, &al); callchain_cursor_reset(cursor); } } if (symbol_conf.hide_unresolved && al.sym == NULL) - return 0; + goto out; if (iter) { nr_loop_iter = iter->nr_loop_iter; iter_cycles = iter->cycles; } - ms.maps = al.maps; - ms.map = al.map; + ms.maps = maps__get(al.maps); + ms.map = map__get(al.map); ms.sym = al.sym; if (!branch && append_inlines(cursor, &ms, ip) == 0) - return 0; + goto out; srcline = callchain_srcline(&ms, al.addr); err = callchain_cursor_append(cursor, ip, &ms, branch, flags, nr_loop_iter, iter_cycles, branch_from, srcline); - map__put(al.map); +out: + addr_location__exit(&al); + maps__put(ms.maps); + map__put(ms.map); return err; } @@ -2504,7 +2521,7 @@ static void save_lbr_cursor_node(struct thread *thread, struct callchain_cursor *cursor, int idx) { - struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread); if (!lbr_stitch) return; @@ -2546,7 +2563,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, * in callchain_cursor_commit() when the writing session is closed. * Using curr and pos to track the current cursor node. */ - if (thread->lbr_stitch) { + if (thread__lbr_stitch(thread)) { cursor->curr = NULL; cursor->pos = cursor->nr; if (cursor->nr) { @@ -2574,7 +2591,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, * But does not need to save current cursor node for entry 0. * It's impossible to stitch the whole LBRs of previous sample. */ - if (thread->lbr_stitch && (cursor->pos != cursor->nr)) { + if (thread__lbr_stitch(thread) && (cursor->pos != cursor->nr)) { if (!cursor->curr) cursor->curr = cursor->first; else @@ -2627,7 +2644,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread, struct callchain_cursor *cursor) { - struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread); struct callchain_cursor_node *cnode; struct stitch_list *stitch_node; int err; @@ -2651,7 +2668,7 @@ static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread, static struct stitch_list *get_stitch_node(struct thread *thread) { - struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread); struct stitch_list *stitch_node; if (!list_empty(&lbr_stitch->free_lists)) { @@ -2675,7 +2692,7 @@ static bool has_stitched_lbr(struct thread *thread, struct branch_entry *cur_entries = perf_sample__branch_entries(cur); struct branch_stack *prev_stack = prev->branch_stack; struct branch_entry *prev_entries = perf_sample__branch_entries(prev); - struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread); int i, j, nr_identical_branches = 0; struct stitch_list *stitch_node; u64 cur_base, distance; @@ -2739,27 +2756,29 @@ static bool has_stitched_lbr(struct thread *thread, static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr) { - if (thread->lbr_stitch) + if (thread__lbr_stitch(thread)) return true; - thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch)); - if (!thread->lbr_stitch) + thread__set_lbr_stitch(thread, zalloc(sizeof(struct lbr_stitch))); + if (!thread__lbr_stitch(thread)) goto err; - thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node)); - if (!thread->lbr_stitch->prev_lbr_cursor) + thread__lbr_stitch(thread)->prev_lbr_cursor = + calloc(max_lbr + 1, sizeof(struct callchain_cursor_node)); + if (!thread__lbr_stitch(thread)->prev_lbr_cursor) goto free_lbr_stitch; - INIT_LIST_HEAD(&thread->lbr_stitch->lists); - INIT_LIST_HEAD(&thread->lbr_stitch->free_lists); + INIT_LIST_HEAD(&thread__lbr_stitch(thread)->lists); + INIT_LIST_HEAD(&thread__lbr_stitch(thread)->free_lists); return true; free_lbr_stitch: - zfree(&thread->lbr_stitch); + free(thread__lbr_stitch(thread)); + thread__set_lbr_stitch(thread, NULL); err: pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n"); - thread->lbr_stitch_enable = false; + thread__set_lbr_stitch_enable(thread, false); return false; } @@ -2795,9 +2814,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread, if (i == chain_nr) return 0; - if (thread->lbr_stitch_enable && !sample->no_hw_idx && + if (thread__lbr_stitch_enable(thread) && !sample->no_hw_idx && (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) { - lbr_stitch = thread->lbr_stitch; + lbr_stitch = thread__lbr_stitch(thread); stitched_lbr = has_stitched_lbr(thread, sample, &lbr_stitch->prev_sample, @@ -2877,7 +2896,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread *thread, int usr_idx) { - if (machine__normalized_is(maps__machine(thread->maps), "arm64")) + if (machine__normalized_is(maps__machine(thread__maps(thread)), "arm64")) return get_leaf_frame_caller_aarch64(sample, thread, usr_idx); else return 0; @@ -3072,6 +3091,7 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms struct dso *dso; u64 addr; int ret = 1; + struct map_symbol ilist_ms; if (!symbol_conf.inline_name || !map || !sym) return ret; @@ -3088,18 +3108,20 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms inlines__tree_insert(&dso->inlined_nodes, inline_node); } + ilist_ms = (struct map_symbol) { + .maps = maps__get(ms->maps), + .map = map__get(map), + }; list_for_each_entry(ilist, &inline_node->val, list) { - struct map_symbol ilist_ms = { - .maps = ms->maps, - .map = map, - .sym = ilist->symbol, - }; + ilist_ms.sym = ilist->symbol; ret = callchain_cursor_append(cursor, ip, &ilist_ms, false, NULL, 0, 0, 0, ilist->srcline); if (ret != 0) return ret; } + map__put(ilist_ms.map); + maps__put(ilist_ms.maps); return ret; } @@ -3158,6 +3180,9 @@ int thread__resolve_callchain(struct thread *thread, { int ret = 0; + if (cursor == NULL) + return -ENOMEM; + callchain_cursor_reset(cursor); if (callchain_param.order == ORDER_CALLEE) { @@ -3191,7 +3216,6 @@ int machine__for_each_thread(struct machine *machine, { struct threads *threads; struct rb_node *nd; - struct thread *thread; int rc = 0; int i; @@ -3199,14 +3223,9 @@ int machine__for_each_thread(struct machine *machine, threads = &machine->threads[i]; for (nd = rb_first_cached(&threads->entries); nd; nd = rb_next(nd)) { - thread = rb_entry(nd, struct thread, rb_node); - rc = fn(thread, priv); - if (rc != 0) - return rc; - } + struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node); - list_for_each_entry(thread, &threads->dead, node) { - rc = fn(thread, priv); + rc = fn(trb->thread, priv); if (rc != 0) return rc; } @@ -3264,7 +3283,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, if (!thread) return -ENOMEM; - thread->cpu = cpu; + thread__set_cpu(thread, cpu); thread__put(thread); return 0; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b7f890950909..f30d34903aa4 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -137,7 +137,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, no_dso = is_no_dso_memory(filename); map->prot = prot; map->flags = flags; - nsi = nsinfo__get(thread->nsinfo); + nsi = nsinfo__get(thread__nsinfo(thread)); if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) { snprintf(newfilename, sizeof(newfilename), @@ -431,14 +431,21 @@ size_t map__fprintf(struct map *map, FILE *fp) map__start(map), map__end(map), map__pgoff(map), dso->name); } -size_t map__fprintf_dsoname(struct map *map, FILE *fp) +static bool prefer_dso_long_name(const struct dso *dso, bool print_off) +{ + return dso->long_name && + (symbol_conf.show_kernel_path || + (print_off && (dso->name[0] == '[' || dso__is_kcore(dso)))); +} + +static size_t __map__fprintf_dsoname(struct map *map, bool print_off, FILE *fp) { char buf[symbol_conf.pad_output_len_dso + 1]; const char *dsoname = "[unknown]"; const struct dso *dso = map ? map__dso(map) : NULL; if (dso) { - if (symbol_conf.show_kernel_path && dso->long_name) + if (prefer_dso_long_name(dso, print_off)) dsoname = dso->long_name; else dsoname = dso->name; @@ -452,6 +459,27 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp) return fprintf(fp, "%s", dsoname); } +size_t map__fprintf_dsoname(struct map *map, FILE *fp) +{ + return __map__fprintf_dsoname(map, false, fp); +} + +size_t map__fprintf_dsoname_dsoff(struct map *map, bool print_off, u64 addr, FILE *fp) +{ + const struct dso *dso = map ? map__dso(map) : NULL; + int printed = 0; + + if (print_off && (!dso || !dso__is_object_file(dso))) + print_off = false; + printed += fprintf(fp, " ("); + printed += __map__fprintf_dsoname(map, print_off, fp); + if (print_off) + printed += fprintf(fp, "+0x%" PRIx64, addr); + printed += fprintf(fp, ")"); + + return printed; +} + char *map__srcline(struct map *map, u64 addr, struct symbol *sym) { if (map == NULL) @@ -468,9 +496,9 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (dso) { char *srcline = map__srcline(map, addr, NULL); - if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) + if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); - free_srcline(srcline); + zfree_srcline(&srcline); } return ret; } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 823ab7fc0acf..66a87b3d9965 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -194,6 +194,7 @@ static inline void __map__zput(struct map **map) size_t map__fprintf(struct map *map, FILE *fp); size_t map__fprintf_dsoname(struct map *map, FILE *fp); +size_t map__fprintf_dsoname_dsoff(struct map *map, bool print_off, u64 addr, FILE *fp); char *map__srcline(struct map *map, u64 addr, struct symbol *sym); int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp); diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 1aeb1db58fe5..233438c95b53 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -171,7 +171,7 @@ struct maps *maps__new(struct machine *machine) return result; } -void maps__delete(struct maps *maps) +static void maps__delete(struct maps *maps) { maps__exit(maps); unwind__finish_access(maps); @@ -374,6 +374,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) } put_map: map__put(pos->map); + free(pos); } up_write(maps__lock(maps)); return err; @@ -384,7 +385,7 @@ put_map: */ int maps__clone(struct thread *thread, struct maps *parent) { - struct maps *maps = thread->maps; + struct maps *maps = thread__maps(thread); int err; struct map_rb_node *rb_node; diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index d2963456cfbe..83144e0645ed 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -57,13 +57,20 @@ struct kmap { }; struct maps *maps__new(struct machine *machine); -void maps__delete(struct maps *maps); bool maps__empty(struct maps *maps); int maps__clone(struct thread *thread, struct maps *parent); struct maps *maps__get(struct maps *maps); void maps__put(struct maps *maps); +static inline void __maps__zput(struct maps **map) +{ + maps__put(*map); + *map = NULL; +} + +#define maps__zput(map) __maps__zput(&map) + static inline struct rb_root *maps__entries(struct maps *maps) { return &RC_CHK_ACCESS(maps)->entries; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ed1ee4b05356..c07fe3a90722 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -13,7 +13,7 @@ #include "debug.h" #include "symbol.h" #include "pmu.h" -#include "pmu-hybrid.h" +#include "pmus.h" unsigned int perf_mem_events__loads_ldlat = 30; @@ -120,8 +120,8 @@ int perf_mem_events__init(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); - struct perf_pmu *pmu; char sysfs_name[100]; + struct perf_pmu *pmu = NULL; /* * If the event entry isn't valid, skip initialization @@ -130,16 +130,14 @@ int perf_mem_events__init(void) if (!e->tag) continue; - if (!perf_pmu__has_hybrid()) { - scnprintf(sysfs_name, sizeof(sysfs_name), - e->sysfs_name, "cpu"); - e->supported = perf_mem_event__supported(mnt, sysfs_name); - } else { - perf_pmu__for_each_hybrid_pmu(pmu) { - scnprintf(sysfs_name, sizeof(sysfs_name), - e->sysfs_name, pmu->name); - e->supported |= perf_mem_event__supported(mnt, sysfs_name); - } + /* + * Scan all PMUs not just core ones, since perf mem/c2c on + * platforms like AMD uses IBS OP PMU which is independent + * of core PMU. + */ + while ((pmu = perf_pmus__scan(pmu)) != NULL) { + scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); + e->supported |= perf_mem_event__supported(mnt, sysfs_name); } if (e->supported) @@ -170,9 +168,9 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, { const char *mnt = sysfs__mount(); char sysfs_name[100]; - struct perf_pmu *pmu; + struct perf_pmu *pmu = NULL; - perf_pmu__for_each_hybrid_pmu(pmu) { + while ((pmu = perf_pmus__scan(pmu)) != NULL) { scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); if (!perf_mem_event__supported(mnt, sysfs_name)) { @@ -195,7 +193,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, if (!e->record) continue; - if (!perf_pmu__has_hybrid()) { + if (perf_pmus__num_mem_pmus() == 1) { if (!e->supported) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(j, NULL)); @@ -210,7 +208,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, return -1; } - perf_pmu__for_each_hybrid_pmu(pmu) { + while ((pmu = perf_pmus__scan(pmu)) != NULL) { rec_argv[i++] = "-e"; s = perf_mem_events__name(j, pmu->name); if (s) { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 5e9c657dd3f7..a6a5ed44a679 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -11,7 +11,7 @@ #include "evsel.h" #include "strbuf.h" #include "pmu.h" -#include "pmu-hybrid.h" +#include "pmus.h" #include "print-events.h" #include "smt.h" #include "expr.h" @@ -79,6 +79,7 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused, return NULL; memcpy(me, entry, sizeof(struct metric_event)); me->evsel = ((struct metric_event *)entry)->evsel; + me->is_default = false; INIT_LIST_HEAD(&me->head); return &me->nd; } @@ -123,6 +124,7 @@ struct metric { * within the expression. */ struct expr_parse_ctx *pctx; + const char *pmu; /** The name of the metric such as "IPC". */ const char *metric_name; /** Modifier on the metric such as "u" or NULL for none. */ @@ -136,6 +138,11 @@ struct metric { * output. */ const char *metric_unit; + /** + * Optional name of the metric group reported + * if the Default metric group is being processed. + */ + const char *default_metricgroup_name; /** Optional null terminated array of referenced metrics. */ struct metric_ref *metric_refs; /** @@ -216,7 +223,9 @@ static struct metric *metric__new(const struct pmu_metric *pm, if (!m->pctx) goto out_err; + m->pmu = pm->pmu ?: "cpu"; m->metric_name = pm->metric_name; + m->default_metricgroup_name = pm->default_metricgroup_name; m->modifier = NULL; if (modifier) { m->modifier = strdup(modifier); @@ -259,11 +268,12 @@ static bool contains_metric_id(struct evsel **metric_events, int num_events, /** * setup_metric_events - Find a group of events in metric_evlist that correspond * to the IDs from a parsed metric expression. + * @pmu: The PMU for the IDs. * @ids: the metric IDs to match. * @metric_evlist: the list of perf events. * @out_metric_events: holds the created metric events array. */ -static int setup_metric_events(struct hashmap *ids, +static int setup_metric_events(const char *pmu, struct hashmap *ids, struct evlist *metric_evlist, struct evsel ***out_metric_events) { @@ -271,6 +281,7 @@ static int setup_metric_events(struct hashmap *ids, const char *metric_id; struct evsel *ev; size_t ids_size, matched_events, i; + bool all_pmus = !strcmp(pmu, "all") || perf_pmus__num_core_pmus() == 1 || !is_pmu_core(pmu); *out_metric_events = NULL; ids_size = hashmap__size(ids); @@ -283,6 +294,10 @@ static int setup_metric_events(struct hashmap *ids, evlist__for_each_entry(metric_evlist, ev) { struct expr_id_data *val_ptr; + /* Don't match events for the wrong hybrid PMU. */ + if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) && + strcmp(ev->pmu_name, pmu)) + continue; /* * Check for duplicate events with the same name. For * example, uncore_imc/cas_count_read/ will turn into 6 @@ -298,6 +313,7 @@ static int setup_metric_events(struct hashmap *ids, * about this event. */ if (hashmap__find(ids, metric_id, &val_ptr)) { + pr_debug("Matched metric-id %s to %s\n", metric_id, evsel__name(ev)); metric_events[matched_events++] = ev; if (matched_events >= ids_size) @@ -355,8 +371,13 @@ static bool match_metric(const char *n, const char *list) return false; } -static bool match_pm_metric(const struct pmu_metric *pm, const char *metric) +static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric) { + const char *pm_pmu = pm->pmu ?: "cpu"; + + if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu)) + return false; + return match_metric(pm->metric_group, metric) || match_metric(pm->metric_name, metric); } @@ -475,7 +496,7 @@ static int metricgroup__sys_event_iter(const struct pmu_metric *pm, if (!pm->metric_expr || !pm->compat) return 0; - while ((pmu = perf_pmu__scan(pmu))) { + while ((pmu = perf_pmus__scan(pmu))) { if (!pmu->id || strcmp(pmu->id, pm->compat)) continue; @@ -766,6 +787,7 @@ struct visited_metric { struct metricgroup_add_iter_data { struct list_head *metric_list; + const char *pmu; const char *metric_name; const char *modifier; int *ret; @@ -779,7 +801,8 @@ struct metricgroup_add_iter_data { const struct pmu_metrics_table *table; }; -static bool metricgroup__find_metric(const char *metric, +static bool metricgroup__find_metric(const char *pmu, + const char *metric, const struct pmu_metrics_table *table, struct pmu_metric *pm); @@ -798,6 +821,7 @@ static int add_metric(struct list_head *metric_list, * resolve_metric - Locate metrics within the root metric and recursively add * references to them. * @metric_list: The list the metric is added to. + * @pmu: The PMU name to resolve metrics on, or "all" for all PMUs. * @modifier: if non-null event modifiers like "u". * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the @@ -813,6 +837,7 @@ static int add_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int resolve_metric(struct list_head *metric_list, + const char *pmu, const char *modifier, bool metric_no_group, bool metric_no_threshold, @@ -842,7 +867,7 @@ static int resolve_metric(struct list_head *metric_list, hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { struct pmu_metric pm; - if (metricgroup__find_metric(cur->pkey, table, &pm)) { + if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) @@ -993,9 +1018,12 @@ static int __add_metric(struct list_head *metric_list, } if (!ret) { /* Resolve referenced metrics. */ - ret = resolve_metric(metric_list, modifier, metric_no_group, + const char *pmu = pm->pmu ?: "cpu"; + + ret = resolve_metric(metric_list, pmu, modifier, metric_no_group, metric_no_threshold, user_requested_cpu_list, - system_wide, root_metric, &visited_node, table); + system_wide, root_metric, &visited_node, + table); } if (ret) { if (is_root) @@ -1008,6 +1036,7 @@ static int __add_metric(struct list_head *metric_list, } struct metricgroup__find_metric_data { + const char *pmu; const char *metric; struct pmu_metric *pm; }; @@ -1017,6 +1046,10 @@ static int metricgroup__find_metric_callback(const struct pmu_metric *pm, void *vdata) { struct metricgroup__find_metric_data *data = vdata; + const char *pm_pmu = pm->pmu ?: "cpu"; + + if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu)) + return 0; if (!match_metric(pm->metric_name, data->metric)) return 0; @@ -1025,11 +1058,13 @@ static int metricgroup__find_metric_callback(const struct pmu_metric *pm, return 1; } -static bool metricgroup__find_metric(const char *metric, +static bool metricgroup__find_metric(const char *pmu, + const char *metric, const struct pmu_metrics_table *table, struct pmu_metric *pm) { struct metricgroup__find_metric_data data = { + .pmu = pmu, .metric = metric, .pm = pm, }; @@ -1083,7 +1118,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, struct metricgroup_add_iter_data *d = data; int ret; - if (!match_pm_metric(pm, d->metric_name)) + if (!match_pm_metric(pm, d->pmu, d->metric_name)) return 0; ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, @@ -1126,8 +1161,28 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, return right_count - left_count; } +/** + * default_metricgroup_cmp - Implements complex key for the Default metricgroup + * that first sorts by default_metricgroup_name, then + * metric_name. + */ +static int default_metricgroup_cmp(void *priv __maybe_unused, + const struct list_head *l, + const struct list_head *r) +{ + const struct metric *left = container_of(l, struct metric, nd); + const struct metric *right = container_of(r, struct metric, nd); + int diff = strcmp(right->default_metricgroup_name, left->default_metricgroup_name); + + if (diff) + return diff; + + return strcmp(right->metric_name, left->metric_name); +} + struct metricgroup__add_metric_data { struct list_head *list; + const char *pmu; const char *metric_name; const char *modifier; const char *user_requested_cpu_list; @@ -1144,9 +1199,9 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pm->metric_expr && match_pm_metric(pm, data->metric_name)) { + if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) { bool metric_no_group = data->metric_no_group || - match_metric(data->metric_name, pm->metricgroup_no_group); + match_metric(pm->metricgroup_no_group, data->metric_name); data->has_match = true; ret = add_metric(data->list, pm, data->modifier, metric_no_group, @@ -1159,6 +1214,7 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, /** * metricgroup__add_metric - Find and add a metric, or a metric group. + * @pmu: The PMU name to search for metrics on, or "all" for all PMUs. * @metric_name: The name of the metric or metric group. For example, "IPC" * could be the name of a metric and "TopDownL1" the name of a * metric group. @@ -1172,7 +1228,7 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, * @table: The table that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ -static int metricgroup__add_metric(const char *metric_name, const char *modifier, +static int metricgroup__add_metric(const char *pmu, const char *metric_name, const char *modifier, bool metric_no_group, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, @@ -1186,6 +1242,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier { struct metricgroup__add_metric_data data = { .list = &list, + .pmu = pmu, .metric_name = metric_name, .modifier = modifier, .metric_no_group = metric_no_group, @@ -1210,6 +1267,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier .fn = metricgroup__add_metric_sys_event_iter, .data = (void *) &(struct metricgroup_add_iter_data) { .metric_list = &list, + .pmu = pmu, .metric_name = metric_name, .modifier = modifier, .metric_no_group = metric_no_group, @@ -1239,6 +1297,7 @@ out: /** * metricgroup__add_metric_list - Find and add metrics, or metric groups, * specified in a list. + * @pmu: A pmu to restrict the metrics to, or "all" for all PMUS. * @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1" * would match the IPC and CPI metrics, and TopDownL1 would match all * the metrics in the TopDownL1 group. @@ -1251,7 +1310,8 @@ out: * @table: The table that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ -static int metricgroup__add_metric_list(const char *list, bool metric_no_group, +static int metricgroup__add_metric_list(const char *pmu, const char *list, + bool metric_no_group, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, struct list_head *metric_list, @@ -1270,7 +1330,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, if (modifier) *modifier++ = '\0'; - ret = metricgroup__add_metric(metric_name, modifier, + ret = metricgroup__add_metric(pmu, metric_name, modifier, metric_no_group, metric_no_threshold, user_requested_cpu_list, system_wide, metric_list, table); @@ -1441,8 +1501,8 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, } pr_debug("Parsing metric events '%s'\n", events.buf); parse_events_error__init(&parse_error); - ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu, - /*warn_if_reordered=*/false); + ret = __parse_events(parsed_evlist, events.buf, /*pmu_filter=*/NULL, + &parse_error, fake_pmu, /*warn_if_reordered=*/false); if (ret) { parse_events_error__print(&parse_error, events.buf); goto err_out; @@ -1460,7 +1520,8 @@ err_out: return ret; } -static int parse_groups(struct evlist *perf_evlist, const char *str, +static int parse_groups(struct evlist *perf_evlist, + const char *pmu, const char *str, bool metric_no_group, bool metric_no_merge, bool metric_no_threshold, @@ -1474,11 +1535,12 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, LIST_HEAD(metric_list); struct metric *m; bool tool_events[PERF_TOOL_MAX] = {false}; + bool is_default = !strcmp(str, "Default"); int ret; if (metric_events_list->nr_entries == 0) metricgroup__rblist_init(metric_events_list); - ret = metricgroup__add_metric_list(str, metric_no_group, metric_no_threshold, + ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold, user_requested_cpu_list, system_wide, &metric_list, table); if (ret) @@ -1508,6 +1570,9 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, goto out; } + if (is_default) + list_sort(NULL, &metric_list, default_metricgroup_cmp); + list_for_each_entry(m, &metric_list, nd) { struct metric_event *me; struct evsel **metric_events; @@ -1535,6 +1600,11 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, strcmp(m->modifier, n->modifier))) continue; + if ((!m->pmu && n->pmu) || + (m->pmu && !n->pmu) || + (m->pmu && n->pmu && strcmp(m->pmu, n->pmu))) + continue; + if (expr__subset_of_ids(n->pctx, m->pctx)) { pr_debug("Events in '%s' fully contained within '%s'\n", m->metric_name, n->metric_name); @@ -1552,9 +1622,10 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, metric_evlist = m->evlist; } - ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events); + ret = setup_metric_events(fake_pmu ? "all" : m->pmu, m->pctx->ids, + metric_evlist, &metric_events); if (ret) { - pr_debug("Cannot resolve IDs for %s: %s\n", + pr_err("Cannot resolve IDs for %s: %s\n", m->metric_name, m->metric_expr); goto out; } @@ -1590,6 +1661,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; expr->runtime = m->pctx->sctx.runtime; + expr->default_metricgroup_name = m->default_metricgroup_name; + me->is_default = is_default; list_add(&expr->nd, &me->head); } @@ -1610,6 +1683,7 @@ out: } int metricgroup__parse_groups(struct evlist *perf_evlist, + const char *pmu, const char *str, bool metric_no_group, bool metric_no_merge, @@ -1623,7 +1697,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, if (!table) return -EINVAL; - return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, + return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge, metric_no_threshold, user_requested_cpu_list, system_wide, /*fake_pmu=*/NULL, metric_events, table); } @@ -1633,7 +1707,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, const char *str, struct rblist *metric_events) { - return parse_groups(evlist, str, + return parse_groups(evlist, "all", str, /*metric_no_group=*/false, /*metric_no_merge=*/false, /*metric_no_threshold=*/false, @@ -1642,28 +1716,32 @@ int metricgroup__parse_groups_test(struct evlist *evlist, &perf_pmu__fake, metric_events, table); } +struct metricgroup__has_metric_data { + const char *pmu; + const char *metric; +}; static int metricgroup__has_metric_callback(const struct pmu_metric *pm, const struct pmu_metrics_table *table __maybe_unused, void *vdata) { - const char *metric = vdata; + struct metricgroup__has_metric_data *data = vdata; - if (match_metric(pm->metric_name, metric) || - match_metric(pm->metric_group, metric)) - return 1; - - return 0; + return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0; } -bool metricgroup__has_metric(const char *metric) +bool metricgroup__has_metric(const char *pmu, const char *metric) { const struct pmu_metrics_table *table = pmu_metrics_table__find(); + struct metricgroup__has_metric_data data = { + .pmu = pmu, + .metric = metric, + }; if (!table) return false; - return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, - (void *)metric) ? true : false; + return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, &data) + ? true : false; } static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm, diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 77472e35705e..d5325c6ec8e1 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -22,6 +22,7 @@ struct cgroup; struct metric_event { struct rb_node nd; struct evsel *evsel; + bool is_default; /* the metric evsel from the Default metricgroup */ struct list_head head; /* list of metric_expr */ }; @@ -55,6 +56,8 @@ struct metric_expr { * more human intelligible) and then add "MiB" afterward when displayed. */ const char *metric_unit; + /** Displayed metricgroup name of the Default metricgroup */ + const char *default_metricgroup_name; /** Null terminated array of events used by the metric. */ struct evsel **metric_events; /** Null terminated array of referenced metrics. */ @@ -67,6 +70,7 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, bool create); int metricgroup__parse_groups(struct evlist *perf_evlist, + const char *pmu, const char *str, bool metric_no_group, bool metric_no_merge, @@ -80,7 +84,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, struct rblist *metric_events); void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); -bool metricgroup__has_metric(const char *metric); +bool metricgroup__has_metric(const char *pmu, const char *metric); unsigned int metricgroups__topdown_max_level(void); int arch_get_runtimeparam(const struct pmu_metric *pm); void metricgroup__rblist_exit(struct rblist *metric_events); diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c deleted file mode 100644 index 7c9f9150bad5..000000000000 --- a/tools/perf/util/parse-events-hybrid.c +++ /dev/null @@ -1,214 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/err.h> -#include <linux/zalloc.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/param.h> -#include "evlist.h" -#include "evsel.h" -#include "parse-events.h" -#include "parse-events-hybrid.h" -#include "debug.h" -#include "pmu.h" -#include "pmu-hybrid.h" -#include "perf.h" - -static void config_hybrid_attr(struct perf_event_attr *attr, - int type, int pmu_type) -{ - /* - * attr.config layout for type PERF_TYPE_HARDWARE and - * PERF_TYPE_HW_CACHE - * - * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA - * AA: hardware event ID - * EEEEEEEE: PMU type ID - * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB - * BB: hardware cache ID - * CC: hardware cache op ID - * DD: hardware cache op result ID - * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. - */ - attr->type = type; - attr->config = (attr->config & PERF_HW_EVENT_MASK) | - ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT); -} - -static int create_event_hybrid(__u32 config_type, int *idx, - struct list_head *list, - struct perf_event_attr *attr, const char *name, - const char *metric_id, - struct list_head *config_terms, - struct perf_pmu *pmu) -{ - struct evsel *evsel; - __u32 type = attr->type; - __u64 config = attr->config; - - config_hybrid_attr(attr, config_type, pmu->type); - - /* - * Some hybrid hardware cache events are only available on one CPU - * PMU. For example, the 'L1-dcache-load-misses' is only available - * on cpu_core, while the 'L1-icache-loads' is only available on - * cpu_atom. We need to remove "not supported" hybrid cache events. - */ - if (attr->type == PERF_TYPE_HW_CACHE - && !is_event_supported(attr->type, attr->config)) - return 0; - - evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, - pmu, config_terms); - if (evsel) { - evsel->pmu_name = strdup(pmu->name); - if (!evsel->pmu_name) - return -ENOMEM; - } else - return -ENOMEM; - attr->type = type; - attr->config = config; - return 0; -} - -static int pmu_cmp(struct parse_events_state *parse_state, - struct perf_pmu *pmu) -{ - if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name) - return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name); - - if (parse_state->hybrid_pmu_name) - return strcmp(parse_state->hybrid_pmu_name, pmu->name); - - return 0; -} - -static int add_hw_hybrid(struct parse_events_state *parse_state, - struct list_head *list, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms) -{ - struct perf_pmu *pmu; - int ret; - - perf_pmu__for_each_hybrid_pmu(pmu) { - LIST_HEAD(terms); - - if (pmu_cmp(parse_state, pmu)) - continue; - - copy_config_terms(&terms, config_terms); - ret = create_event_hybrid(PERF_TYPE_HARDWARE, - &parse_state->idx, list, attr, name, - metric_id, &terms, pmu); - free_config_terms(&terms); - if (ret) - return ret; - } - - return 0; -} - -static int create_raw_event_hybrid(int *idx, struct list_head *list, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct list_head *config_terms, - struct perf_pmu *pmu) -{ - struct evsel *evsel; - - attr->type = pmu->type; - evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, - pmu, config_terms); - if (evsel) - evsel->pmu_name = strdup(pmu->name); - else - return -ENOMEM; - - return 0; -} - -static int add_raw_hybrid(struct parse_events_state *parse_state, - struct list_head *list, struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms) -{ - struct perf_pmu *pmu; - int ret; - - perf_pmu__for_each_hybrid_pmu(pmu) { - LIST_HEAD(terms); - - if (pmu_cmp(parse_state, pmu)) - continue; - - copy_config_terms(&terms, config_terms); - ret = create_raw_event_hybrid(&parse_state->idx, list, attr, - name, metric_id, &terms, pmu); - free_config_terms(&terms); - if (ret) - return ret; - } - - return 0; -} - -int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, - struct list_head *list, - struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms, - bool *hybrid) -{ - *hybrid = false; - if (attr->type == PERF_TYPE_SOFTWARE) - return 0; - - if (!perf_pmu__has_hybrid()) - return 0; - - *hybrid = true; - if (attr->type != PERF_TYPE_RAW) { - return add_hw_hybrid(parse_state, list, attr, name, metric_id, - config_terms); - } - - return add_raw_hybrid(parse_state, list, attr, name, metric_id, - config_terms); -} - -int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct list_head *config_terms, - bool *hybrid, - struct parse_events_state *parse_state) -{ - struct perf_pmu *pmu; - int ret; - - *hybrid = false; - if (!perf_pmu__has_hybrid()) - return 0; - - *hybrid = true; - perf_pmu__for_each_hybrid_pmu(pmu) { - LIST_HEAD(terms); - - if (pmu_cmp(parse_state, pmu)) - continue; - - copy_config_terms(&terms, config_terms); - ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, - attr, name, metric_id, &terms, pmu); - free_config_terms(&terms); - if (ret) - return ret; - } - - return 0; -} diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h deleted file mode 100644 index cbc05fec02a2..000000000000 --- a/tools/perf/util/parse-events-hybrid.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_PARSE_EVENTS_HYBRID_H -#define __PERF_PARSE_EVENTS_HYBRID_H - -#include <linux/list.h> -#include <stdbool.h> -#include <linux/types.h> -#include <linux/perf_event.h> -#include <string.h> - -int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, - struct list_head *list, - struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms, - bool *hybrid); - -int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, const char *metric_id, - struct list_head *config_terms, - bool *hybrid, - struct parse_events_state *parse_state); - -#endif /* __PERF_PARSE_EVENTS_HYBRID_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 34ba840ae19a..5dcfbf316bf6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -21,42 +21,23 @@ #include "parse-events-bison.h" #include "parse-events-flex.h" #include "pmu.h" +#include "pmus.h" #include "asm/bug.h" #include "util/parse-branch-options.h" #include "util/evsel_config.h" #include "util/event.h" -#include "util/parse-events-hybrid.h" -#include "util/pmu-hybrid.h" #include "util/bpf-filter.h" #include "util/util.h" #include "tracepoint.h" -#include "thread_map.h" #define MAX_NAME_LEN 100 -struct perf_pmu_event_symbol { - char *symbol; - enum perf_pmu_event_symbol_type type; -}; - #ifdef PARSER_DEBUG extern int parse_events_debug; #endif int parse_events_parse(void *parse_state, void *scanner); static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused); -static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state, - const char *str, char *pmu_name, - struct list_head *list); - -static struct perf_pmu_event_symbol *perf_pmu_events_list; -/* - * The variable indicates the number of supported pmu event symbols. - * 0 means not initialized and ready to init - * -1 means failed to init, don't try anymore - * >0 is the number of supported pmu event symbols - */ -static int perf_pmu_events_list_num; struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { @@ -152,44 +133,6 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { }, }; -bool is_event_supported(u8 type, u64 config) -{ - bool ret = true; - int open_return; - struct evsel *evsel; - struct perf_event_attr attr = { - .type = type, - .config = config, - .disabled = 1, - }; - struct perf_thread_map *tmap = thread_map__new_by_tid(0); - - if (tmap == NULL) - return false; - - evsel = evsel__new(&attr); - if (evsel) { - open_return = evsel__open(evsel, NULL, tmap); - ret = open_return >= 0; - - if (open_return == -EACCES) { - /* - * This happens if the paranoid value - * /proc/sys/kernel/perf_event_paranoid is set to 2 - * Re-run with exclude_kernel set; we don't do that - * by default as some ARM machines do not support it. - * - */ - evsel->core.attr.exclude_kernel = 1; - ret = evsel__open(evsel, NULL, tmap) >= 0; - } - evsel__delete(evsel); - } - - perf_thread_map__put(tmap); - return ret; -} - const char *event_type(int type) { switch (type) { @@ -236,6 +179,57 @@ static char *get_config_name(struct list_head *head_terms) return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); } +/** + * fix_raw - For each raw term see if there is an event (aka alias) in pmu that + * matches the raw's string value. If the string value matches an + * event then change the term to be an event, if not then change it to + * be a config term. For example, "read" may be an event of the PMU or + * a raw hex encoding of 0xead. The fix-up is done late so the PMU of + * the event can be determined and we don't need to scan all PMUs + * ahead-of-time. + * @config_terms: the list of terms that may contain a raw term. + * @pmu: the PMU to scan for events from. + */ +static void fix_raw(struct list_head *config_terms, struct perf_pmu *pmu) +{ + struct parse_events_term *term; + + list_for_each_entry(term, config_terms, list) { + struct perf_pmu_alias *alias; + bool matched = false; + + if (term->type_term != PARSE_EVENTS__TERM_TYPE_RAW) + continue; + + list_for_each_entry(alias, &pmu->aliases, list) { + if (!strcmp(alias->name, term->val.str)) { + free(term->config); + term->config = term->val.str; + term->type_val = PARSE_EVENTS__TERM_TYPE_NUM; + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->val.num = 1; + term->no_value = true; + matched = true; + break; + } + } + if (!matched) { + u64 num; + + free(term->config); + term->config = strdup("config"); + errno = 0; + num = strtoull(term->val.str + 1, NULL, 16); + assert(errno == 0); + free(term->val.str); + term->type_val = PARSE_EVENTS__TERM_TYPE_NUM; + term->type_term = PARSE_EVENTS__TERM_TYPE_CONFIG; + term->val.num = num; + term->no_value = false; + } + } +} + static struct evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, @@ -251,9 +245,16 @@ __add_event(struct list_head *list, int *idx, if (pmu) perf_pmu__warn_invalid_formats(pmu); - if (pmu && attr->type == PERF_TYPE_RAW) - perf_pmu__warn_invalid_config(pmu, attr->config, name); - + if (pmu && (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX)) { + perf_pmu__warn_invalid_config(pmu, attr->config, name, + PERF_PMU_FORMAT_VALUE_CONFIG, "config"); + perf_pmu__warn_invalid_config(pmu, attr->config1, name, + PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); + perf_pmu__warn_invalid_config(pmu, attr->config2, name, + PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); + perf_pmu__warn_invalid_config(pmu, attr->config3, name, + PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + } if (init_attr) event_attr_init(attr); @@ -267,8 +268,10 @@ __add_event(struct list_head *list, int *idx, evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; + evsel->core.is_pmu_core = pmu ? pmu->is_core : false; evsel->auto_merge_stats = auto_merge_stats; evsel->pmu = pmu; + evsel->pmu_name = pmu && pmu->name ? strdup(pmu->name) : NULL; if (name) evsel->name = strdup(name); @@ -328,18 +331,27 @@ static int add_event_tool(struct list_head *list, int *idx, return 0; } -static int parse_aliases(char *str, const char *const names[][EVSEL__MAX_ALIASES], int size) +/** + * parse_aliases - search names for entries beginning or equalling str ignoring + * case. If mutliple entries in names match str then the longest + * is chosen. + * @str: The needle to look for. + * @names: The haystack to search. + * @size: The size of the haystack. + * @longest: Out argument giving the length of the matching entry. + */ +static int parse_aliases(const char *str, const char *const names[][EVSEL__MAX_ALIASES], int size, + int *longest) { - int i, j; - int n, longest = -1; + *longest = -1; + for (int i = 0; i < size; i++) { + for (int j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) { + int n = strlen(names[i][j]); - for (i = 0; i < size; i++) { - for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) { - n = strlen(names[i][j]); - if (n > longest && !strncasecmp(str, names[i][j], n)) - longest = n; + if (n > *longest && !strncasecmp(str, names[i][j], n)) + *longest = n; } - if (longest > 0) + if (*longest > 0) return i; } @@ -357,52 +369,52 @@ static int config_attr(struct perf_event_attr *attr, struct parse_events_error *err, config_term_func_t config_term); -int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2, - struct parse_events_error *err, - struct list_head *head_config, - struct parse_events_state *parse_state) +/** + * parse_events__decode_legacy_cache - Search name for the legacy cache event + * name composed of 1, 2 or 3 hyphen + * separated sections. The first section is + * the cache type while the others are the + * optional op and optional result. To make + * life hard the names in the table also + * contain hyphens and the longest name + * should always be selected. + */ +int parse_events__decode_legacy_cache(const char *name, int extended_pmu_type, __u64 *config) { - struct perf_event_attr attr; - LIST_HEAD(config_terms); - char name[MAX_NAME_LEN]; - const char *config_name, *metric_id; - int cache_type = -1, cache_op = -1, cache_result = -1; - char *op_result[2] = { op_result1, op_result2 }; - int i, n, ret; - bool hybrid; + int len, cache_type = -1, cache_op = -1, cache_result = -1; + const char *name_end = &name[strlen(name) + 1]; + const char *str = name; - /* - * No fallback - if we cannot get a clear cache type - * then bail out: - */ - cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX); + cache_type = parse_aliases(str, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX, &len); if (cache_type == -1) return -EINVAL; - - config_name = get_config_name(head_config); - n = snprintf(name, MAX_NAME_LEN, "%s", type); - - for (i = 0; (i < 2) && (op_result[i]); i++) { - char *str = op_result[i]; - - n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str); - - if (cache_op == -1) { + str += len + 1; + + if (str < name_end) { + cache_op = parse_aliases(str, evsel__hw_cache_op, + PERF_COUNT_HW_CACHE_OP_MAX, &len); + if (cache_op >= 0) { + if (!evsel__is_cache_op_valid(cache_type, cache_op)) + return -EINVAL; + str += len + 1; + } else { + cache_result = parse_aliases(str, evsel__hw_cache_result, + PERF_COUNT_HW_CACHE_RESULT_MAX, &len); + if (cache_result >= 0) + str += len + 1; + } + } + if (str < name_end) { + if (cache_op < 0) { cache_op = parse_aliases(str, evsel__hw_cache_op, - PERF_COUNT_HW_CACHE_OP_MAX); + PERF_COUNT_HW_CACHE_OP_MAX, &len); if (cache_op >= 0) { if (!evsel__is_cache_op_valid(cache_type, cache_op)) return -EINVAL; - continue; } - } - - if (cache_result == -1) { + } else if (cache_result < 0) { cache_result = parse_aliases(str, evsel__hw_cache_result, - PERF_COUNT_HW_CACHE_RESULT_MAX); - if (cache_result >= 0) - continue; + PERF_COUNT_HW_CACHE_RESULT_MAX, &len); } } @@ -418,33 +430,71 @@ int parse_events_add_cache(struct list_head *list, int *idx, if (cache_result == -1) cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS; - memset(&attr, 0, sizeof(attr)); - attr.config = cache_type | (cache_op << 8) | (cache_result << 16); - attr.type = PERF_TYPE_HW_CACHE; + *config = cache_type | (cache_op << 8) | (cache_result << 16); + if (perf_pmus__supports_extended_type()) + *config |= (__u64)extended_pmu_type << PERF_PMU_TYPE_SHIFT; + return 0; +} - if (head_config) { - if (config_attr(&attr, head_config, err, - config_term_common)) - return -EINVAL; +/** + * parse_events__filter_pmu - returns false if a wildcard PMU should be + * considered, true if it should be filtered. + */ +bool parse_events__filter_pmu(const struct parse_events_state *parse_state, + const struct perf_pmu *pmu) +{ + if (parse_state->pmu_filter == NULL) + return false; - if (get_config_terms(head_config, &config_terms)) + if (pmu->name == NULL) + return true; + + return strcmp(parse_state->pmu_filter, pmu->name) != 0; +} + +int parse_events_add_cache(struct list_head *list, int *idx, const char *name, + struct parse_events_state *parse_state, + struct list_head *head_config) +{ + struct perf_pmu *pmu = NULL; + bool found_supported = false; + const char *config_name = get_config_name(head_config); + const char *metric_id = get_config_metric_id(head_config); + + /* Legacy cache events are only supported by core PMUs. */ + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + LIST_HEAD(config_terms); + struct perf_event_attr attr; + int ret; + + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + memset(&attr, 0, sizeof(attr)); + attr.type = PERF_TYPE_HW_CACHE; + + ret = parse_events__decode_legacy_cache(name, pmu->type, &attr.config); + if (ret) + return ret; + + found_supported = true; + + if (head_config) { + if (config_attr(&attr, head_config, parse_state->error, config_term_common)) + return -EINVAL; + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + } + + if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL) == NULL) return -ENOMEM; - } - metric_id = get_config_metric_id(head_config); - ret = parse_events__add_cache_hybrid(list, idx, &attr, - config_name ? : name, - metric_id, - &config_terms, - &hybrid, parse_state); - if (hybrid) - goto out_free_terms; - - ret = add_event(list, idx, &attr, config_name ? : name, metric_id, - &config_terms); -out_free_terms: - free_config_terms(&config_terms); - return ret; + free_config_terms(&config_terms); + } + return found_supported ? 0 : -EINVAL; } #ifdef HAVE_LIBTRACEEVENT @@ -896,10 +946,14 @@ do { \ return 0; } -int parse_events_add_breakpoint(struct list_head *list, int *idx, - u64 addr, char *type, u64 len) +int parse_events_add_breakpoint(struct parse_events_state *parse_state, + struct list_head *list, + u64 addr, char *type, u64 len, + struct list_head *head_config __maybe_unused) { struct perf_event_attr attr; + LIST_HEAD(config_terms); + const char *name; memset(&attr, 0, sizeof(attr)); attr.bp_addr = addr; @@ -920,8 +974,19 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; - return add_event(list, idx, &attr, /*name=*/NULL, /*mertic_id=*/NULL, - /*config_terms=*/NULL); + if (head_config) { + if (config_attr(&attr, head_config, parse_state->error, + config_term_common)) + return -EINVAL; + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + } + + name = get_config_name(head_config); + + return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL, + &config_terms); } static int check_type_val(struct parse_events_term *term, @@ -968,6 +1033,9 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", + [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", + [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", + [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", }; static bool config_term_shrinked; @@ -1089,6 +1157,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_METRIC_ID: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_RAW: + CHECK_TYPE_VAL(STR); + break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; @@ -1142,15 +1213,42 @@ static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_error *err) { + if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { + const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + + if (perf_pmu__supports_legacy_cache(pmu)) { + attr->type = PERF_TYPE_HW_CACHE; + return parse_events__decode_legacy_cache(term->config, pmu->type, + &attr->config); + } else + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + } + if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { + const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + + if (!pmu) { + char *err_str; + + if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0) + parse_events_error__handle(err, term->err_term, + err_str, /*help=*/NULL); + return -EINVAL; + } + attr->type = PERF_TYPE_HARDWARE; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + return 0; + } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || - term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG) + term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG) { /* * Always succeed for sysfs terms, as we dont know * at this point what type they need to have. */ return 0; - else - return config_term_common(attr, term, err); + } + return config_term_common(attr, term, err); } #ifdef HAVE_LIBTRACEEVENT @@ -1364,20 +1462,23 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, #endif } -int parse_events_add_numeric(struct parse_events_state *parse_state, - struct list_head *list, - u32 type, u64 config, - struct list_head *head_config) +static int __parse_events_add_numeric(struct parse_events_state *parse_state, + struct list_head *list, + struct perf_pmu *pmu, u32 type, u32 extended_type, + u64 config, struct list_head *head_config) { struct perf_event_attr attr; LIST_HEAD(config_terms); const char *name, *metric_id; - bool hybrid; int ret; memset(&attr, 0, sizeof(attr)); attr.type = type; attr.config = config; + if (extended_type && (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)) { + assert(perf_pmus__supports_extended_type()); + attr.config |= (u64)extended_type << PERF_PMU_TYPE_SHIFT; + } if (head_config) { if (config_attr(&attr, head_config, parse_state->error, @@ -1390,19 +1491,44 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); - ret = parse_events__add_numeric_hybrid(parse_state, list, &attr, - name, metric_id, - &config_terms, &hybrid); - if (hybrid) - goto out_free_terms; - - ret = add_event(list, &parse_state->idx, &attr, name, metric_id, - &config_terms); -out_free_terms: + ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL) ? 0 : -ENOMEM; free_config_terms(&config_terms); return ret; } +int parse_events_add_numeric(struct parse_events_state *parse_state, + struct list_head *list, + u32 type, u64 config, + struct list_head *head_config, + bool wildcard) +{ + struct perf_pmu *pmu = NULL; + bool found_supported = false; + + /* Wildcards on numeric values are only supported by core PMUs. */ + if (wildcard && perf_pmus__supports_extended_type()) { + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + int ret; + + found_supported = true; + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + ret = __parse_events_add_numeric(parse_state, list, pmu, + type, pmu->type, + config, head_config); + if (ret) + return ret; + } + if (found_supported) + return 0; + } + return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type), + type, /*extended_type=*/0, config, head_config); +} + int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, int tool_event) @@ -1422,33 +1548,6 @@ static bool config_term_percore(struct list_head *config_terms) return false; } -static int parse_events__inside_hybrid_pmu(struct parse_events_state *parse_state, - struct list_head *list, char *name, - struct list_head *head_config) -{ - struct parse_events_term *term; - int ret = -1; - - if (parse_state->fake_pmu || !head_config || list_empty(head_config) || - !perf_pmu__is_hybrid(name)) { - return -1; - } - - /* - * More than one term in list. - */ - if (head_config->next && head_config->next->next != head_config) - return -1; - - term = list_first_entry(head_config, struct parse_events_term, list); - if (term && term->config && strcmp(term->config, "event")) { - ret = parse_events__with_hybrid_pmu(parse_state, term->config, - name, list); - } - - return ret; -} - int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, @@ -1461,7 +1560,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, struct parse_events_error *err = parse_state->error; LIST_HEAD(config_terms); - pmu = parse_state->fake_pmu ?: perf_pmu__find(name); + pmu = parse_state->fake_pmu ?: perf_pmus__find(name); if (verbose > 1 && !(pmu && pmu->selectable)) { fprintf(stderr, "Attempting to add event pmu '%s' with '", @@ -1485,6 +1584,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, parse_events_error__handle(err, 0, err_str, NULL); return -EINVAL; } + if (head_config) + fix_raw(head_config, pmu); if (pmu->default_config) { memcpy(&attr, pmu->default_config, @@ -1492,20 +1593,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } else { memset(&attr, 0, sizeof(attr)); } + attr.type = pmu->type; if (!head_config) { - attr.type = pmu->type; evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, /*config_terms=*/NULL, auto_merge_stats, /*cpu_list=*/NULL); - if (evsel) { - evsel->pmu_name = name ? strdup(name) : NULL; - return 0; - } else { - return -ENOMEM; - } + return evsel ? 0 : -ENOMEM; } if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info)) @@ -1541,11 +1637,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) return -ENOMEM; - if (!parse_events__inside_hybrid_pmu(parse_state, list, name, - head_config)) { - return 0; - } - if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { free_config_terms(&config_terms); return -EINVAL; @@ -1561,7 +1652,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (evsel->name) evsel->use_config_name = true; - evsel->pmu_name = name ? strdup(name) : NULL; evsel->percore = config_term_percore(&evsel->config_terms); if (parse_state->fake_pmu) @@ -1615,15 +1705,21 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, INIT_LIST_HEAD(list); - while ((pmu = perf_pmu__scan(pmu)) != NULL) { + while ((pmu = perf_pmus__scan(pmu)) != NULL) { struct perf_pmu_alias *alias; + bool auto_merge_stats; + + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + auto_merge_stats = perf_pmu__auto_merge_stats(pmu); list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, str)) { parse_events_copy_term_list(head, &orig_head); if (!parse_events_add_pmu(parse_state, list, pmu->name, orig_head, - /*auto_merge_stats=*/true)) { + auto_merge_stats)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; @@ -1875,180 +1971,6 @@ int parse_events_name(struct list_head *list, const char *name) return 0; } -static int -comp_pmu(const void *p1, const void *p2) -{ - struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1; - struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2; - - return strcasecmp(pmu1->symbol, pmu2->symbol); -} - -static void perf_pmu__parse_cleanup(void) -{ - if (perf_pmu_events_list_num > 0) { - struct perf_pmu_event_symbol *p; - int i; - - for (i = 0; i < perf_pmu_events_list_num; i++) { - p = perf_pmu_events_list + i; - zfree(&p->symbol); - } - zfree(&perf_pmu_events_list); - perf_pmu_events_list_num = 0; - } -} - -#define SET_SYMBOL(str, stype) \ -do { \ - p->symbol = str; \ - if (!p->symbol) \ - goto err; \ - p->type = stype; \ -} while (0) - -/* - * Read the pmu events list from sysfs - * Save it into perf_pmu_events_list - */ -static void perf_pmu__parse_init(void) -{ - - struct perf_pmu *pmu = NULL; - struct perf_pmu_alias *alias; - int len = 0; - - pmu = NULL; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - list_for_each_entry(alias, &pmu->aliases, list) { - char *tmp = strchr(alias->name, '-'); - - if (tmp) { - char *tmp2 = NULL; - - tmp2 = strchr(tmp + 1, '-'); - len++; - if (tmp2) - len++; - } - - len++; - } - } - - if (len == 0) { - perf_pmu_events_list_num = -1; - return; - } - perf_pmu_events_list = malloc(sizeof(struct perf_pmu_event_symbol) * len); - if (!perf_pmu_events_list) - return; - perf_pmu_events_list_num = len; - - len = 0; - pmu = NULL; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - list_for_each_entry(alias, &pmu->aliases, list) { - struct perf_pmu_event_symbol *p = perf_pmu_events_list + len; - char *tmp = strchr(alias->name, '-'); - char *tmp2 = NULL; - - if (tmp) - tmp2 = strchr(tmp + 1, '-'); - if (tmp2) { - SET_SYMBOL(strndup(alias->name, tmp - alias->name), - PMU_EVENT_SYMBOL_PREFIX); - p++; - tmp++; - SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX); - p++; - SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2); - len += 3; - } else if (tmp) { - SET_SYMBOL(strndup(alias->name, tmp - alias->name), - PMU_EVENT_SYMBOL_PREFIX); - p++; - SET_SYMBOL(strdup(++tmp), PMU_EVENT_SYMBOL_SUFFIX); - len += 2; - } else { - SET_SYMBOL(strdup(alias->name), PMU_EVENT_SYMBOL); - len++; - } - } - } - qsort(perf_pmu_events_list, len, - sizeof(struct perf_pmu_event_symbol), comp_pmu); - - return; -err: - perf_pmu__parse_cleanup(); -} - -/* - * This function injects special term in - * perf_pmu_events_list so the test code - * can check on this functionality. - */ -int perf_pmu__test_parse_init(void) -{ - struct perf_pmu_event_symbol *list, *tmp, symbols[] = { - {(char *)"read", PMU_EVENT_SYMBOL}, - {(char *)"event", PMU_EVENT_SYMBOL_PREFIX}, - {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX}, - {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX}, - {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2}, - }; - unsigned long i, j; - - tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols)); - if (!list) - return -ENOMEM; - - for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) { - tmp->type = symbols[i].type; - tmp->symbol = strdup(symbols[i].symbol); - if (!tmp->symbol) - goto err_free; - } - - perf_pmu_events_list = list; - perf_pmu_events_list_num = ARRAY_SIZE(symbols); - - qsort(perf_pmu_events_list, ARRAY_SIZE(symbols), - sizeof(struct perf_pmu_event_symbol), comp_pmu); - return 0; - -err_free: - for (j = 0, tmp = list; j < i; j++, tmp++) - zfree(&tmp->symbol); - free(list); - return -ENOMEM; -} - -enum perf_pmu_event_symbol_type -perf_pmu__parse_check(const char *name) -{ - struct perf_pmu_event_symbol p, *r; - - /* scan kernel pmu events from sysfs if needed */ - if (perf_pmu_events_list_num == 0) - perf_pmu__parse_init(); - /* - * name "cpu" could be prefix of cpu-cycles or cpu// events. - * cpu-cycles has been handled by hardcode. - * So it must be cpu// events, not kernel pmu event. - */ - if ((perf_pmu_events_list_num <= 0) || !strcmp(name, "cpu")) - return PMU_EVENT_SYMBOL_ERR; - - p.symbol = strdup(name); - r = bsearch(&p, perf_pmu_events_list, - (size_t) perf_pmu_events_list_num, - sizeof(struct perf_pmu_event_symbol), comp_pmu); - zfree(&p.symbol); - return r ? r->type : PMU_EVENT_SYMBOL_ERR; -} - static int parse_events__scanner(const char *str, struct parse_events_state *parse_state) { @@ -2086,7 +2008,6 @@ int parse_events_terms(struct list_head *terms, const char *str) int ret; ret = parse_events__scanner(str, &parse_state); - perf_pmu__parse_cleanup(); if (!ret) { list_splice(parse_state.terms, terms); @@ -2098,31 +2019,71 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } -static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state, - const char *str, char *pmu_name, - struct list_head *list) +static int evsel__compute_group_pmu_name(struct evsel *evsel, + const struct list_head *head) { - struct parse_events_state ps = { - .list = LIST_HEAD_INIT(ps.list), - .stoken = PE_START_EVENTS, - .hybrid_pmu_name = pmu_name, - .idx = parse_state->idx, - }; - int ret; - - ret = parse_events__scanner(str, &ps); - perf_pmu__parse_cleanup(); + struct evsel *leader = evsel__leader(evsel); + struct evsel *pos; + const char *group_pmu_name; + struct perf_pmu *pmu = evsel__find_pmu(evsel); - if (!ret) { - if (!list_empty(&ps.list)) { - list_splice(&ps.list, list); - parse_state->idx = ps.idx; - return 0; - } else - return -1; + if (!pmu) { + /* + * For PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE types the PMU + * is a core PMU, but in heterogeneous systems this is + * unknown. For now pick the first core PMU. + */ + pmu = perf_pmus__scan_core(NULL); + } + if (!pmu) { + pr_debug("No PMU found for '%s'\n", evsel__name(evsel)); + return -EINVAL; } + group_pmu_name = pmu->name; + /* + * Software events may be in a group with other uncore PMU events. Use + * the pmu_name of the first non-software event to avoid breaking the + * software event out of the group. + * + * Aux event leaders, like intel_pt, expect a group with events from + * other PMUs, so substitute the AUX event's PMU in this case. + */ + if (perf_pmu__is_software(pmu) || evsel__is_aux_event(leader)) { + struct perf_pmu *leader_pmu = evsel__find_pmu(leader); - return ret; + if (!leader_pmu) { + /* As with determining pmu above. */ + leader_pmu = perf_pmus__scan_core(NULL); + } + /* + * Starting with the leader, find the first event with a named + * non-software PMU. for_each_group_(member|evsel) isn't used as + * the list isn't yet sorted putting evsel's in the same group + * together. + */ + if (leader_pmu && !perf_pmu__is_software(leader_pmu)) { + group_pmu_name = leader_pmu->name; + } else if (leader->core.nr_members > 1) { + list_for_each_entry(pos, head, core.node) { + struct perf_pmu *pos_pmu; + + if (pos == leader || evsel__leader(pos) != leader) + continue; + pos_pmu = evsel__find_pmu(pos); + if (!pos_pmu) { + /* As with determining pmu above. */ + pos_pmu = perf_pmus__scan_core(NULL); + } + if (pos_pmu && !perf_pmu__is_software(pos_pmu)) { + group_pmu_name = pos_pmu->name; + break; + } + } + } + } + /* Assign the actual name taking care that the fake PMU lacks a name. */ + evsel->group_pmu_name = strdup(group_pmu_name ?: "fake"); + return evsel->group_pmu_name ? 0 : -ENOMEM; } __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) @@ -2144,7 +2105,11 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list /* * First sort by grouping/leader. Read the leader idx only if the evsel - * is part of a group, as -1 indicates no group. + * is part of a group, by default ungrouped events will be sorted + * relative to grouped events based on where the first ungrouped event + * occurs. If both events don't have a group we want to fall-through to + * the arch specific sorting, that can reorder and fix things like + * Intel's topdown events. */ if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) { lhs_has_group = true; @@ -2160,8 +2125,8 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list /* Group by PMU if there is a group. Groups can't span PMUs. */ if (lhs_has_group && rhs_has_group) { - lhs_pmu_name = evsel__group_pmu_name(lhs); - rhs_pmu_name = evsel__group_pmu_name(rhs); + lhs_pmu_name = lhs->group_pmu_name; + rhs_pmu_name = rhs->group_pmu_name; ret = strcmp(lhs_pmu_name, rhs_pmu_name); if (ret) return ret; @@ -2171,13 +2136,14 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list return arch_evlist__cmp(lhs, rhs); } -static bool parse_events__sort_events_and_fix_groups(struct list_head *list) +static int parse_events__sort_events_and_fix_groups(struct list_head *list) { int idx = 0, unsorted_idx = -1; struct evsel *pos, *cur_leader = NULL; struct perf_evsel *cur_leaders_grp = NULL; bool idx_changed = false; int orig_num_leaders = 0, num_leaders = 0; + int ret; /* * Compute index to insert ungrouped events at. Place them where the @@ -2186,6 +2152,10 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list) list_for_each_entry(pos, list, core.node) { const struct evsel *pos_leader = evsel__leader(pos); + ret = evsel__compute_group_pmu_name(pos, list); + if (ret) + return ret; + if (pos == pos_leader) orig_num_leaders++; @@ -2210,7 +2180,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list) idx = 0; list_for_each_entry(pos, list, core.node) { const struct evsel *pos_leader = evsel__leader(pos); - const char *pos_pmu_name = evsel__group_pmu_name(pos); + const char *pos_pmu_name = pos->group_pmu_name; const char *cur_leader_pmu_name, *pos_leader_pmu_name; bool force_grouped = arch_evsel__must_be_in_group(pos); @@ -2227,7 +2197,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list) if (!cur_leader) cur_leader = pos; - cur_leader_pmu_name = evsel__group_pmu_name(cur_leader); + cur_leader_pmu_name = cur_leader->group_pmu_name; if ((cur_leaders_grp != pos->core.leader && !force_grouped) || strcmp(cur_leader_pmu_name, pos_pmu_name)) { /* Event is for a different group/PMU than last. */ @@ -2239,7 +2209,7 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list) */ cur_leaders_grp = pos->core.leader; } - pos_leader_pmu_name = evsel__group_pmu_name(pos_leader); + pos_leader_pmu_name = pos_leader->group_pmu_name; if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) { /* * Event's PMU differs from its leader's. Groups can't @@ -2256,10 +2226,10 @@ static bool parse_events__sort_events_and_fix_groups(struct list_head *list) num_leaders++; pos_leader->core.nr_members++; } - return idx_changed || num_leaders != orig_num_leaders; + return (idx_changed || num_leaders != orig_num_leaders) ? 1 : 0; } -int __parse_events(struct evlist *evlist, const char *str, +int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter, struct parse_events_error *err, struct perf_pmu *fake_pmu, bool warn_if_reordered) { @@ -2270,19 +2240,23 @@ int __parse_events(struct evlist *evlist, const char *str, .evlist = evlist, .stoken = PE_START_EVENTS, .fake_pmu = fake_pmu, + .pmu_filter = pmu_filter, + .match_legacy_cache_terms = true, }; - int ret; + int ret, ret2; ret = parse_events__scanner(str, &parse_state); - perf_pmu__parse_cleanup(); if (!ret && list_empty(&parse_state.list)) { WARN_ONCE(true, "WARNING: event parser found nothing\n"); return -1; } - if (parse_events__sort_events_and_fix_groups(&parse_state.list) && - warn_if_reordered && !parse_state.wild_card_pmus) + ret2 = parse_events__sort_events_and_fix_groups(&parse_state.list); + if (ret2 < 0) + return ret; + + if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) pr_warning("WARNING: events were regrouped to match PMUs\n"); /* @@ -2451,12 +2425,13 @@ void parse_events_error__print(struct parse_events_error *err, int parse_events_option(const struct option *opt, const char *str, int unset __maybe_unused) { - struct evlist *evlist = *(struct evlist **)opt->value; + struct parse_events_option_args *args = opt->value; struct parse_events_error err; int ret; parse_events_error__init(&err); - ret = parse_events(evlist, str, &err); + ret = __parse_events(*args->evlistp, str, args->pmu_filter, &err, + /*fake_pmu=*/NULL, /*warn_if_reordered=*/true); if (ret) { parse_events_error__print(&err, str); @@ -2469,22 +2444,21 @@ int parse_events_option(const struct option *opt, const char *str, int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset) { - struct evlist **evlistp = opt->value; + struct parse_events_option_args *args = opt->value; int ret; - if (*evlistp == NULL) { - *evlistp = evlist__new(); + if (*args->evlistp == NULL) { + *args->evlistp = evlist__new(); - if (*evlistp == NULL) { + if (*args->evlistp == NULL) { fprintf(stderr, "Not enough memory to create evlist\n"); return -1; } } - ret = parse_events_option(opt, str, unset); if (ret) { - evlist__delete(*evlistp); - *evlistp = NULL; + evlist__delete(*args->evlistp); + *args->evlistp = NULL; } return ret; @@ -2546,7 +2520,7 @@ static int set_filter(struct evsel *evsel, const void *arg) return 0; } - while ((pmu = perf_pmu__scan(pmu)) != NULL) + while ((pmu = perf_pmus__scan(pmu)) != NULL) if (pmu->type == evsel->core.attr.type) { found = true; break; @@ -2682,29 +2656,13 @@ int parse_events_term__str(struct parse_events_term **term, return new_term(term, &temp, str, 0); } -int parse_events_term__sym_hw(struct parse_events_term **term, - char *config, unsigned idx) +int parse_events_term__term(struct parse_events_term **term, + int term_lhs, int term_rhs, + void *loc_term, void *loc_val) { - struct event_symbol *sym; - char *str; - struct parse_events_term temp = { - .type_val = PARSE_EVENTS__TERM_TYPE_STR, - .type_term = PARSE_EVENTS__TERM_TYPE_USER, - .config = config, - }; - - if (!temp.config) { - temp.config = strdup("event"); - if (!temp.config) - return -ENOMEM; - } - BUG_ON(idx >= PERF_COUNT_HW_MAX); - sym = &event_symbols_hw[idx]; - - str = strdup(sym->symbol); - if (!str) - return -ENOMEM; - return new_term(term, &temp, str, 0); + return parse_events_term__str(term, term_lhs, NULL, + strdup(config_term_names[term_rhs]), + loc_term, loc_val); } int parse_events_term__clone(struct parse_events_term **new, @@ -2855,15 +2813,3 @@ char *parse_events_formats_error_string(char *additional_terms) fail: return NULL; } - -struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct perf_pmu *pmu, - struct list_head *config_terms) -{ - return __add_event(list, idx, attr, /*init_attr=*/true, name, metric_id, - pmu, config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL); -} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 86ad4438a2aa..b0eb95f93e9c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -18,21 +18,26 @@ struct parse_events_error; struct option; struct perf_pmu; -bool is_event_supported(u8 type, u64 config); - const char *event_type(int type); +/* Arguments encoded in opt->value. */ +struct parse_events_option_args { + struct evlist **evlistp; + const char *pmu_filter; +}; int parse_events_option(const struct option *opt, const char *str, int unset); int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); -__attribute__((nonnull(1, 2, 3))) -int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error, - struct perf_pmu *fake_pmu, bool warn_if_reordered); +__attribute__((nonnull(1, 2, 4))) +int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter, + struct parse_events_error *error, struct perf_pmu *fake_pmu, + bool warn_if_reordered); __attribute__((nonnull(1, 2, 3))) static inline int parse_events(struct evlist *evlist, const char *str, struct parse_events_error *err) { - return __parse_events(evlist, str, err, /*fake_pmu=*/NULL, /*warn_if_reordered=*/true); + return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/NULL, + /*warn_if_reordered=*/true); } int parse_event(struct evlist *evlist, const char *str); @@ -41,14 +46,6 @@ int parse_events_terms(struct list_head *terms, const char *str); int parse_filter(const struct option *opt, const char *str, int unset); int exclude_perf(const struct option *opt, const char *arg, int unset); -enum perf_pmu_event_symbol_type { - PMU_EVENT_SYMBOL_ERR, /* not a PMU EVENT */ - PMU_EVENT_SYMBOL, /* normal style PMU event */ - PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */ - PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */ - PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */ -}; - enum { PARSE_EVENTS__TERM_TYPE_NUM, PARSE_EVENTS__TERM_TYPE_STR, @@ -78,6 +75,9 @@ enum { PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, PARSE_EVENTS__TERM_TYPE_METRIC_ID, + PARSE_EVENTS__TERM_TYPE_RAW, + PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, + PARSE_EVENTS__TERM_TYPE_HARDWARE, __PARSE_EVENTS__TERM_TYPE_NR, }; @@ -128,10 +128,15 @@ struct parse_events_state { struct list_head *terms; int stoken; struct perf_pmu *fake_pmu; - char *hybrid_pmu_name; + /* If non-null, when wildcard matching only match the given PMU. */ + const char *pmu_filter; + /* Should PE_LEGACY_NAME tokens be generated for config terms? */ + bool match_legacy_cache_terms; bool wild_card_pmus; }; +bool parse_events__filter_pmu(const struct parse_events_state *parse_state, + const struct perf_pmu *pmu); void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, @@ -141,8 +146,9 @@ int parse_events_term__num(struct parse_events_term **term, int parse_events_term__str(struct parse_events_term **term, int type_term, char *config, char *str, void *loc_term, void *loc_val); -int parse_events_term__sym_hw(struct parse_events_term **term, - char *config, unsigned idx); +int parse_events_term__term(struct parse_events_term **term, + int term_lhs, int term_rhs, + void *loc_term, void *loc_val); int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term); void parse_events_term__delete(struct parse_events_term *term); @@ -170,17 +176,19 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, - struct list_head *head_config); + struct list_head *head_config, + bool wildcard); int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, int tool_event); -int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2, - struct parse_events_error *error, - struct list_head *head_config, - struct parse_events_state *parse_state); -int parse_events_add_breakpoint(struct list_head *list, int *idx, - u64 addr, char *type, u64 len); +int parse_events_add_cache(struct list_head *list, int *idx, const char *name, + struct parse_events_state *parse_state, + struct list_head *head_config); +int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config); +int parse_events_add_breakpoint(struct parse_events_state *parse_state, + struct list_head *list, + u64 addr, char *type, u64 len, + struct list_head *head_config); int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, @@ -198,8 +206,6 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, int parse_events_copy_term_list(struct list_head *old, struct list_head **new); -enum perf_pmu_event_symbol_type -perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); @@ -241,13 +247,4 @@ static inline bool is_sdt_event(char *str __maybe_unused) } #endif /* HAVE_LIBELF_SUPPORT */ -int perf_pmu__test_parse_init(void); - -struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, - const char *name, - const char *metric_id, - struct perf_pmu *pmu, - struct list_head *config_terms); - #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 51fe0a9fb3de..99335ec586ae 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -63,15 +63,9 @@ static int str(yyscan_t scanner, int token) return token; } -static int raw(yyscan_t scanner) +static int lc_str(yyscan_t scanner, const struct parse_events_state *state) { - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); - - if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL) - return str(scanner, PE_NAME); - - return __value(yylval, text + 1, 16, PE_RAW); + return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME); } static bool isbpf_suffix(char *text) @@ -131,35 +125,6 @@ do { \ yyless(0); \ } while (0) -static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); - - yylval->str = strdup(text); - - /* - * If we're not testing then parse check determines the PMU event type - * which if it isn't a PMU returns PE_NAME. When testing the result of - * parse check can't be trusted so we return PE_PMU_EVENT_FAKE unless - * an '!' is present in which case the text can't be a PMU name. - */ - switch (perf_pmu__parse_check(text)) { - case PMU_EVENT_SYMBOL_PREFIX: - return PE_PMU_EVENT_PRE; - case PMU_EVENT_SYMBOL_SUFFIX: - return PE_PMU_EVENT_SUF; - case PMU_EVENT_SYMBOL_SUFFIX2: - return PE_PMU_EVENT_SUF2; - case PMU_EVENT_SYMBOL: - return parse_state->fake_pmu - ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT; - default: - return parse_state->fake_pmu && !strchr(text,'!') - ? PE_PMU_EVENT_FAKE : PE_NAME; - } -} - static int sym(yyscan_t scanner, int type, int config) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -184,6 +149,16 @@ static int term(yyscan_t scanner, int type) return PE_TERM; } +static int hw_term(yyscan_t scanner, int config) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + yylval->hardware_term.str = strdup(text); + yylval->hardware_term.num = PERF_TYPE_HARDWARE + config; + return PE_TERM_HW; +} + #define YY_USER_ACTION \ do { \ yylloc->last_column = yylloc->first_column; \ @@ -211,19 +186,25 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!]* +name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]* name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? -/* If you add a modifier you need to update check_modifier() */ +/* + * If you add a modifier you need to update check_modifier(). + * Also, the letters in modifier_event must not be in modifier_bp. + */ modifier_event [ukhpPGHSDIWeb]+ modifier_bp [rwx]{1,3} +lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node) +lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss) +digit [0-9] +non_digit [^0-9] %% %{ struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner); - { int start_token = _parse_state->stoken; @@ -303,10 +284,23 @@ percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } -r{num_raw_hex} { return raw(yyscanner); } -r0x{num_raw_hex} { return raw(yyscanner); } +cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } +stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } +stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } +instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); } +cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); } +cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); } +branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } +branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); } +bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); } +ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); } +r{num_raw_hex} { return str(yyscanner, PE_RAW); } +r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } +{lc_type} { return lc_str(yyscanner, _parse_state); } +{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } +{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } {name_minus} { return str(yyscanner, PE_NAME); } \[all\] { return PE_ARRAY_ALL; } "[" { BEGIN(array); return '['; } @@ -315,8 +309,20 @@ r0x{num_raw_hex} { return raw(yyscanner); } <mem>{ {modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } -: { return ':'; } -"/" { return '/'; } + /* + * The colon before memory access modifiers can get mixed up with the + * colon before event modifiers. Fortunately none of the option letters + * are the same, so trailing context can be used disambiguate the two + * cases. + */ +":"/{modifier_bp} { return PE_BP_COLON; } + /* + * The slash before memory length can get mixed up with the slash before + * config terms. Fortunately config terms do not start with a numeric + * digit, so trailing context can be used disambiguate the two cases. + */ +"/"/{digit} { return PE_BP_SLASH; } +"/"/{non_digit} { BEGIN(config); return '/'; } {num_dec} { return value(yyscanner, 10); } {num_hex} { return value(yyscanner, 16); } /* @@ -359,47 +365,20 @@ system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } - /* - * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. - * Because the prefix cycles is mixed up with cpu-cycles. - * loads and stores are mixed up with cache event - */ -cycles-ct | -cycles-t | -mem-loads | -mem-loads-aux | -mem-stores | -topdown-[a-z-]+ | -tx-capacity-[a-z-]+ | -el-capacity-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } - -L1-dcache|l1-d|l1d|L1-data | -L1-icache|l1-i|l1i|L1-instruction | -LLC|L2 | -dTLB|d-tlb|Data-TLB | -iTLB|i-tlb|Instruction-TLB | -branch|branches|bpu|btb|bpc | -node { return str(yyscanner, PE_NAME_CACHE_TYPE); } - -load|loads|read | -store|stores|write | -prefetch|prefetches | -speculative-read|speculative-load | -refs|Reference|ops|access | -misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); } - +{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); } +{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } +{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } mem: { BEGIN(mem); return PE_PREFIX_MEM; } -r{num_raw_hex} { return raw(yyscanner); } +r{num_raw_hex} { return str(yyscanner, PE_RAW); } {num_dec} { return value(yyscanner, 10); } {num_hex} { return value(yyscanner, 16); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } {bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } {bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } -{name} { return pmu_str_check(yyscanner, _parse_state); } +{name} { return str(yyscanner, PE_NAME); } {name_tag} { return str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } -- { return '-'; } , { BEGIN(event); return ','; } : { return ':'; } "{" { BEGIN(event); return '{'; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 4488443e506e..9f28d4b5502f 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -8,12 +8,14 @@ #define YYDEBUG 1 +#include <errno.h> #include <fnmatch.h> #include <stdio.h> #include <linux/compiler.h> #include <linux/types.h> #include <linux/zalloc.h> #include "pmu.h" +#include "pmus.h" #include "evsel.h" #include "parse-events.h" #include "parse-events-bison.h" @@ -52,36 +54,36 @@ static void free_list_evsel(struct list_head* list_evsel) %} %token PE_START_EVENTS PE_START_TERMS -%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM +%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM %token PE_VALUE_SYM_TOOL %token PE_EVENT_NAME -%token PE_NAME +%token PE_RAW PE_NAME %token PE_BPF_OBJECT PE_BPF_SOURCE -%token PE_MODIFIER_EVENT PE_MODIFIER_BP -%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT +%token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH +%token PE_LEGACY_CACHE %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%token PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM +%token PE_TERM_HW %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW %type <num> PE_VALUE_SYM_SW %type <num> PE_VALUE_SYM_TOOL -%type <num> PE_RAW %type <num> PE_TERM %type <num> value_sym +%type <str> PE_RAW %type <str> PE_NAME %type <str> PE_BPF_OBJECT %type <str> PE_BPF_SOURCE -%type <str> PE_NAME_CACHE_TYPE -%type <str> PE_NAME_CACHE_OP_RESULT +%type <str> PE_LEGACY_CACHE %type <str> PE_MODIFIER_EVENT %type <str> PE_MODIFIER_BP %type <str> PE_EVENT_NAME -%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%type <str> PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type <str> PE_DRV_CFG_TERM -%type <str> event_pmu_name +%type <str> name_or_raw name_or_legacy %destructor { free ($$); } <str> %type <term> event_term %destructor { parse_events_term__delete ($$); } <term> @@ -112,6 +114,8 @@ static void free_list_evsel(struct list_head* list_evsel) %type <array> array_term %type <array> array_terms %destructor { free ($$.ranges); } <array> +%type <hardware_term> PE_TERM_HW +%destructor { free ($$.str); } <hardware_term> %union { @@ -125,6 +129,10 @@ static void free_list_evsel(struct list_head* list_evsel) char *event; } tracepoint_name; struct parse_events_array array; + struct hardware_term { + char *str; + u64 num; + } hardware_term; } %% @@ -267,17 +275,14 @@ event_def event_def: event_pmu | event_legacy_symbol | event_legacy_cache sep_dc | - event_legacy_mem | + event_legacy_mem sep_dc | event_legacy_tracepoint sep_dc | event_legacy_numeric sep_dc | event_legacy_raw sep_dc | event_bpf_file -event_pmu_name: -PE_NAME | PE_PMU_EVENT_PRE - event_pmu: -event_pmu_name opt_pmu_config +PE_NAME opt_pmu_config { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; @@ -303,25 +308,32 @@ event_pmu_name opt_pmu_config list = alloc_list(); if (!list) CLEANUP_YYABORT; - if (parse_events_add_pmu(_parse_state, list, $1, $2, /*auto_merge_stats=*/false)) { + /* Attempt to add to list assuming $1 is a PMU name. */ + if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) { struct perf_pmu *pmu = NULL; int ok = 0; + /* Failure to add, try wildcard expansion of $1 as a PMU name. */ if (asprintf(&pattern, "%s*", $1) < 0) CLEANUP_YYABORT; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { + while ((pmu = perf_pmus__scan(pmu)) != NULL) { char *name = pmu->name; + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + if (!strncmp(name, "uncore_", 7) && strncmp($1, "uncore_", 7)) name += 7; if (!perf_pmu__match(pattern, name, $1) || !perf_pmu__match(pattern, pmu->alias_name, $1)) { + bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu); + if (parse_events_copy_term_list(orig_terms, &terms)) CLEANUP_YYABORT; - if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, - /*auto_merge_stats=*/true)) { + if (!parse_events_add_pmu(parse_state, list, pmu->name, terms, + auto_merge_stats)) { ok++; parse_state->wild_card_pmus = true; } @@ -329,6 +341,12 @@ event_pmu_name opt_pmu_config } } + if (!ok) { + /* Failure to add, assume $1 is an event name. */ + zfree(&list); + ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list); + $2 = NULL; + } if (!ok) CLEANUP_YYABORT; } @@ -352,41 +370,27 @@ PE_KERNEL_PMU_EVENT sep_dc $$ = list; } | -PE_KERNEL_PMU_EVENT opt_pmu_config +PE_NAME sep_dc { struct list_head *list; int err; - /* frees $2 */ - err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); + err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); free($1); if (err < 0) YYABORT; $$ = list; } | -PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc -{ - struct list_head *list; - char pmu_name[128]; - snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5); - free($1); - free($3); - free($5); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) - YYABORT; - $$ = list; -} -| -PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc +PE_KERNEL_PMU_EVENT opt_pmu_config { struct list_head *list; - char pmu_name[128]; + int err; - snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3); + /* frees $2 */ + err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); free($1); - free($3); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) + if (err < 0) YYABORT; $$ = list; } @@ -441,10 +445,11 @@ value_sym '/' event_config '/' int type = $1 >> 16; int config = $1 & 255; int err; + bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_numeric(_parse_state, list, type, config, $3); + err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard); parse_events_terms__delete($3); if (err) { free_list_evsel(list); @@ -458,10 +463,12 @@ value_sym sep_slash_slash_dc struct list_head *list; int type = $1 >> 16; int config = $1 & 255; + bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); list = alloc_list(); ABORT_ON(!list); - ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL)); + ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, + /*head_config=*/NULL, wildcard)); $$ = list; } | @@ -476,60 +483,16 @@ PE_VALUE_SYM_TOOL sep_slash_slash_dc } event_legacy_cache: -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config +PE_LEGACY_CACHE opt_event_config { struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; struct list_head *list; int err; list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6, - parse_state); - parse_events_terms__delete($6); - free($1); - free($3); - free($5); - if (err) { - free_list_evsel(list); - YYABORT; - } - $$ = list; -} -| -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - struct list_head *list; - int err; - - list = alloc_list(); - ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4, - parse_state); - parse_events_terms__delete($4); - free($1); - free($3); - if (err) { - free_list_evsel(list); - YYABORT; - } - $$ = list; -} -| -PE_NAME_CACHE_TYPE opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - struct list_head *list; - int err; + err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2); - list = alloc_list(); - ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2, - parse_state); parse_events_terms__delete($2); free($1); if (err) { @@ -540,16 +503,16 @@ PE_NAME_CACHE_TYPE opt_event_config } event_legacy_mem: -PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc +PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config { - struct parse_events_state *parse_state = _parse_state; struct list_head *list; int err; list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_breakpoint(list, &parse_state->idx, - $2, $6, $4); + err = parse_events_add_breakpoint(_parse_state, list, + $2, $6, $4, $7); + parse_events_terms__delete($7); free($6); if (err) { free(list); @@ -558,31 +521,33 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc $$ = list; } | -PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc +PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config { - struct parse_events_state *parse_state = _parse_state; struct list_head *list; + int err; list = alloc_list(); ABORT_ON(!list); - if (parse_events_add_breakpoint(list, &parse_state->idx, - $2, NULL, $4)) { + err = parse_events_add_breakpoint(_parse_state, list, + $2, NULL, $4, $5); + parse_events_terms__delete($5); + if (err) { free(list); YYABORT; } $$ = list; } | -PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc +PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config { - struct parse_events_state *parse_state = _parse_state; struct list_head *list; int err; list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_breakpoint(list, &parse_state->idx, - $2, $4, 0); + err = parse_events_add_breakpoint(_parse_state, list, + $2, $4, 0, $5); + parse_events_terms__delete($5); free($4); if (err) { free(list); @@ -591,15 +556,17 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc $$ = list; } | -PE_PREFIX_MEM PE_VALUE sep_dc +PE_PREFIX_MEM PE_VALUE opt_event_config { - struct parse_events_state *parse_state = _parse_state; struct list_head *list; + int err; list = alloc_list(); ABORT_ON(!list); - if (parse_events_add_breakpoint(list, &parse_state->idx, - $2, NULL, 0)) { + err = parse_events_add_breakpoint(_parse_state, list, + $2, NULL, 0, $3); + parse_events_terms__delete($3); + if (err) { free(list); YYABORT; } @@ -633,17 +600,6 @@ tracepoint_name opt_event_config } tracepoint_name: -PE_NAME '-' PE_NAME ':' PE_NAME -{ - struct tracepoint_name tracepoint; - - ABORT_ON(asprintf(&tracepoint.sys, "%s-%s", $1, $3) < 0); - tracepoint.event = $5; - free($1); - free($3); - $$ = tracepoint; -} -| PE_NAME ':' PE_NAME { struct tracepoint_name tracepoint = {$1, $3}; @@ -659,7 +615,8 @@ PE_VALUE ':' PE_VALUE opt_event_config list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4); + err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4, + /*wildcard=*/false); parse_events_terms__delete($4); if (err) { free(list); @@ -673,10 +630,16 @@ PE_RAW opt_event_config { struct list_head *list; int err; + u64 num; list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, $1, $2); + errno = 0; + num = strtoull($1 + 1, NULL, 16); + ABORT_ON(errno); + free($1); + err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, num, $2, + /*wildcard=*/false); parse_events_terms__delete($2); if (err) { free(list); @@ -781,17 +744,24 @@ event_term $$ = head; } +name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE + +name_or_legacy: PE_NAME | PE_LEGACY_CACHE + event_term: PE_RAW { struct parse_events_term *term; - ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG, - NULL, $1, false, &@1, NULL)); + if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW, + strdup("raw"), $1, &@1, &@1)) { + free($1); + YYABORT; + } $$ = term; } | -PE_NAME '=' PE_NAME +name_or_raw '=' name_or_legacy { struct parse_events_term *term; @@ -804,7 +774,7 @@ PE_NAME '=' PE_NAME $$ = term; } | -PE_NAME '=' PE_VALUE +name_or_raw '=' PE_VALUE { struct parse_events_term *term; @@ -816,12 +786,25 @@ PE_NAME '=' PE_VALUE $$ = term; } | -PE_NAME '=' PE_VALUE_SYM_HW +name_or_raw '=' PE_TERM_HW +{ + struct parse_events_term *term; + + if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $3.str, &@1, &@3)) { + free($1); + free($3.str); + YYABORT; + } + $$ = term; +} +| +PE_LEGACY_CACHE { struct parse_events_term *term; - int config = $3 & 255; - if (parse_events_term__sym_hw(&term, $1, config)) { + if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, + $1, 1, true, &@1, NULL)) { free($1); YYABORT; } @@ -840,16 +823,19 @@ PE_NAME $$ = term; } | -PE_VALUE_SYM_HW +PE_TERM_HW { struct parse_events_term *term; - int config = $1 & 255; - ABORT_ON(parse_events_term__sym_hw(&term, NULL, config)); + if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE, + $1.str, $1.num & 255, false, &@1, NULL)) { + free($1.str); + YYABORT; + } $$ = term; } | -PE_TERM '=' PE_NAME +PE_TERM '=' name_or_legacy { struct parse_events_term *term; @@ -860,6 +846,25 @@ PE_TERM '=' PE_NAME $$ = term; } | +PE_TERM '=' PE_TERM_HW +{ + struct parse_events_term *term; + + if (parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3)) { + free($3.str); + YYABORT; + } + $$ = term; +} +| +PE_TERM '=' PE_TERM +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3)); + $$ = term; +} +| PE_TERM '=' PE_VALUE { struct parse_events_term *term; @@ -876,7 +881,7 @@ PE_TERM $$ = term; } | -PE_NAME array '=' PE_NAME +name_or_raw array '=' name_or_legacy { struct parse_events_term *term; @@ -891,7 +896,7 @@ PE_NAME array '=' PE_NAME $$ = term; } | -PE_NAME array '=' PE_VALUE +name_or_raw array '=' PE_VALUE { struct parse_events_term *term; diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index ce80b79be103..00adf872bf00 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -1,16 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -/* - * I'm tired of doing "vsnprintf()" etc just to open a - * file, so here's a "return static buffer with printf" - * interface for paths. - * - * It's obviously not thread-safe. Sue me. But it's quite - * useful for doing things like - * - * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY); - * - * which is what it's designed for. - */ #include "path.h" #include "cache.h" #include <linux/kernel.h> @@ -22,18 +10,6 @@ #include <dirent.h> #include <unistd.h> -static char bad_path[] = "/bad-path/"; -/* - * One hack: - */ -static char *get_pathname(void) -{ - static char pathname_array[4][PATH_MAX]; - static int idx; - - return pathname_array[3 & ++idx]; -} - static char *cleanup_path(char *path) { /* Clean it up */ @@ -45,18 +21,17 @@ static char *cleanup_path(char *path) return path; } -char *mkpath(const char *fmt, ...) +char *mkpath(char *path_buf, size_t sz, const char *fmt, ...) { va_list args; unsigned len; - char *pathname = get_pathname(); va_start(args, fmt); - len = vsnprintf(pathname, PATH_MAX, fmt, args); + len = vsnprintf(path_buf, sz, fmt, args); va_end(args); - if (len >= PATH_MAX) - return bad_path; - return cleanup_path(pathname); + if (len >= sz) + strncpy(path_buf, "/bad-path/", sz); + return cleanup_path(path_buf); } int path__join(char *bf, size_t size, const char *path1, const char *path2) diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 6c11914c179f..862e4a689868 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -10,9 +10,10 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/parse-events.h" -#include "util/pmu.h" +#include "util/pmus.h" #include "util/pfm.h" #include "util/strbuf.h" +#include "util/thread_map.h" #include <string.h> #include <linux/kernel.h> @@ -49,7 +50,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, /* * force loading of the PMU list */ - perf_pmu__scan(NULL); + perf_pmus__scan(NULL); for (q = p; strsep(&p, ",{}"); q = p) { sep = p ? str + (p - p_orig - 1) : ""; @@ -86,7 +87,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, goto error; } - pmu = perf_pmu__find_by_type((unsigned int)attr.type); + pmu = perf_pmus__find_by_type((unsigned int)attr.type); evsel = parse_events__add_event(evlist->core.nr_entries, &attr, q, /*metric_id=*/NULL, pmu); @@ -123,6 +124,36 @@ error: return -1; } +static bool is_libpfm_event_supported(const char *name, struct perf_cpu_map *cpus, + struct perf_thread_map *threads) +{ + struct perf_pmu *pmu; + struct evsel *evsel; + struct perf_event_attr attr = {}; + bool result = true; + int ret; + + ret = pfm_get_perf_event_encoding(name, PFM_PLM0|PFM_PLM3, + &attr, NULL, NULL); + if (ret != PFM_SUCCESS) + return false; + + pmu = perf_pmus__find_by_type((unsigned int)attr.type); + evsel = parse_events__add_event(0, &attr, name, /*metric_id=*/NULL, pmu); + if (evsel == NULL) + return false; + + evsel->is_libpfm_event = true; + + if (evsel__open(evsel, cpus, threads) < 0) + result = false; + + evsel__close(evsel); + evsel__delete(evsel); + + return result; +} + static const char *srcs[PFM_ATTR_CTRL_MAX] = { [PFM_ATTR_CTRL_UNKNOWN] = "???", [PFM_ATTR_CTRL_PMU] = "PMU", @@ -146,6 +177,8 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, { int j, ret; char topic[80], name[80]; + struct perf_cpu_map *cpus = perf_cpu_map__empty_new(1); + struct perf_thread_map *threads = thread_map__new_by_tid(0); strbuf_setlen(buf, 0); snprintf(topic, sizeof(topic), "pfm %s", pinfo->name); @@ -185,14 +218,15 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, ainfo.name, ainfo.desc); } } - print_cb->print_event(print_state, - pinfo->name, - topic, - name, info->equiv, - /*scale_unit=*/NULL, - /*deprecated=*/NULL, "PFM event", - info->desc, /*long_desc=*/NULL, - /*encoding_desc=*/buf->buf); + + if (is_libpfm_event_supported(name, cpus, threads)) { + print_cb->print_event(print_state, pinfo->name, topic, + name, info->equiv, + /*scale_unit=*/NULL, + /*deprecated=*/NULL, "PFM event", + info->desc, /*long_desc=*/NULL, + /*encoding_desc=*/buf->buf); + } pfm_for_each_event_attr(j, info) { pfm_event_attr_info_t ainfo; @@ -215,6 +249,10 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, print_attr_flags(buf, &ainfo); snprintf(name, sizeof(name), "%s::%s:%s", pinfo->name, info->name, ainfo.name); + + if (!is_libpfm_event_supported(name, cpus, threads)) + continue; + print_cb->print_event(print_state, pinfo->name, topic, @@ -225,6 +263,9 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, /*encoding_desc=*/buf->buf); } } + + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); } void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state) diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c deleted file mode 100644 index 38628805a952..000000000000 --- a/tools/perf/util/pmu-hybrid.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/list.h> -#include <linux/compiler.h> -#include <linux/string.h> -#include <linux/zalloc.h> -#include <sys/types.h> -#include <errno.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <unistd.h> -#include <stdio.h> -#include <stdbool.h> -#include <stdarg.h> -#include <locale.h> -#include <api/fs/fs.h> -#include "fncache.h" -#include "pmu-hybrid.h" - -LIST_HEAD(perf_pmu__hybrid_pmus); - -bool perf_pmu__hybrid_mounted(const char *name) -{ - int cpu; - char pmu_name[PATH_MAX]; - struct perf_pmu pmu = {.name = pmu_name}; - - if (strncmp(name, "cpu_", 4)) - return false; - - strlcpy(pmu_name, name, sizeof(pmu_name)); - return perf_pmu__scan_file(&pmu, "cpus", "%u", &cpu) > 0; -} - -struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name) -{ - struct perf_pmu *pmu; - - if (!name) - return NULL; - - perf_pmu__for_each_hybrid_pmu(pmu) { - if (!strcmp(name, pmu->name)) - return pmu; - } - - return NULL; -} - -bool perf_pmu__is_hybrid(const char *name) -{ - return perf_pmu__find_hybrid_pmu(name) != NULL; -} - -char *perf_pmu__hybrid_type_to_pmu(const char *type) -{ - char *pmu_name = NULL; - - if (asprintf(&pmu_name, "cpu_%s", type) < 0) - return NULL; - - if (perf_pmu__is_hybrid(pmu_name)) - return pmu_name; - - /* - * pmu may be not scanned, check the sysfs. - */ - if (perf_pmu__hybrid_mounted(pmu_name)) - return pmu_name; - - free(pmu_name); - return NULL; -} diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h deleted file mode 100644 index 2b186c26a43e..000000000000 --- a/tools/perf/util/pmu-hybrid.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PMU_HYBRID_H -#define __PMU_HYBRID_H - -#include <linux/perf_event.h> -#include <linux/compiler.h> -#include <linux/list.h> -#include <stdbool.h> -#include "pmu.h" - -extern struct list_head perf_pmu__hybrid_pmus; - -#define perf_pmu__for_each_hybrid_pmu(pmu) \ - list_for_each_entry(pmu, &perf_pmu__hybrid_pmus, hybrid_list) - -bool perf_pmu__hybrid_mounted(const char *name); - -struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name); -bool perf_pmu__is_hybrid(const char *name); -char *perf_pmu__hybrid_type_to_pmu(const char *type); - -static inline int perf_pmu__hybrid_pmu_num(void) -{ - struct perf_pmu *pmu; - int num = 0; - - perf_pmu__for_each_hybrid_pmu(pmu) - num++; - - return num; -} - -#endif /* __PMU_HYBRID_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ad209c88a124..64fa568a5426 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -4,20 +4,15 @@ #include <linux/string.h> #include <linux/zalloc.h> #include <linux/ctype.h> -#include <subcmd/pager.h> #include <sys/types.h> -#include <errno.h> #include <fcntl.h> #include <sys/stat.h> #include <unistd.h> #include <stdio.h> #include <stdbool.h> -#include <stdarg.h> #include <dirent.h> #include <api/fs/fs.h> #include <locale.h> -#include <regex.h> -#include <perf/cpumap.h> #include <fnmatch.h> #include <math.h> #include "debug.h" @@ -32,7 +27,6 @@ #include "string2.h" #include "strbuf.h" #include "fncache.h" -#include "pmu-hybrid.h" #include "util/evsel_config.h" struct perf_pmu perf_pmu__fake; @@ -60,10 +54,6 @@ struct perf_pmu_format { struct list_head list; }; -static bool hybrid_scanned; - -static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name); - /* * Parse & process all the sysfs attributes located under * the directory specified in 'dir' parameter. @@ -557,36 +547,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, return 0; } -/* Add all pmus in sysfs to pmu list: */ -static void pmu_read_sysfs(void) -{ - int fd; - DIR *dir; - struct dirent *dent; - - fd = perf_pmu__event_source_devices_fd(); - if (fd < 0) - return; - - dir = fdopendir(fd); - if (!dir) - return; - - while ((dent = readdir(dir))) { - if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) - continue; - /* add to static LIST_HEAD(pmus): */ - perf_pmu__find2(fd, dent->d_name); - } - - closedir(dir); -} - /* * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ -static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name) +static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name, bool is_core) { struct perf_cpu_map *cpus; const char *templates[] = { @@ -610,16 +575,14 @@ static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name) return cpus; } - return NULL; + /* Nothing found, for core PMUs assume this means all CPUs. */ + return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL; } static bool pmu_is_uncore(int dirfd, const char *name) { int fd; - if (perf_pmu__hybrid_mounted(name)) - return false; - fd = perf_pmu__pathname_fd(dirfd, name, "cpumask", O_PATH); if (fd < 0) return false; @@ -643,12 +606,14 @@ static char *pmu_id(const char *name) return str; } -/* - * PMU CORE devices have different name other than cpu in sysfs on some - * platforms. - * Looking for possible sysfs files to identify the arm core device. +/** + * is_sysfs_pmu_core() - PMU CORE devices have different name other than cpu in + * sysfs on some platforms like ARM or Intel hybrid. Looking for + * possible the cpus file in sysfs files to identify whether this is a + * core device. + * @name: The PMU name such as "cpu_atom". */ -static int is_arm_pmu_core(const char *name) +static int is_sysfs_pmu_core(const char *name) { char path[PATH_MAX]; @@ -777,9 +742,11 @@ out: } struct pmu_add_cpu_aliases_map_data { + /* List being added to. */ struct list_head *head; - const char *name; - const char *cpu_name; + /* If a pmu_event lacks a given PMU the default used. */ + char *default_pmu_name; + /* The PMU that we're searching for events for. */ struct perf_pmu *pmu; }; @@ -788,37 +755,32 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, void *vdata) { struct pmu_add_cpu_aliases_map_data *data = vdata; - const char *pname = pe->pmu ? pe->pmu : data->cpu_name; - - if (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->name)) - goto new_alias; - - if (strcmp(pname, data->name)) - return 0; + const char *pname = pe->pmu ?: data->default_pmu_name; -new_alias: - /* need type casts to override 'const' */ - __perf_pmu__new_alias(data->head, -1, (char *)pe->name, (char *)pe->desc, - (char *)pe->event, pe); + if (!strcmp(pname, data->pmu->name) || + (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->pmu->name))) { + /* need type casts to override 'const' */ + __perf_pmu__new_alias(data->head, -1, (char *)pe->name, (char *)pe->desc, + (char *)pe->event, pe); + } return 0; } /* - * From the pmu_events_map, find the table of PMU events that corresponds - * to the current running CPU. Then, add all PMU events from that table - * as aliases. + * From the pmu_events_table, find the events that correspond to the given + * PMU and add them to the list 'head'. */ void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu, - const struct pmu_events_table *table) + const struct pmu_events_table *table) { struct pmu_add_cpu_aliases_map_data data = { .head = head, - .name = pmu->name, - .cpu_name = is_arm_pmu_core(pmu->name) ? pmu->name : "cpu", + .default_pmu_name = perf_pmus__default_pmu_name(), .pmu = pmu, }; pmu_events_table_for_each_event(table, pmu_add_cpu_aliases_map_callback, &data); + free(data.default_pmu_name); } static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) @@ -898,23 +860,16 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu) return max_precise; } -static struct perf_pmu *pmu_lookup(int dirfd, const char *lookup_name) +struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name) { struct perf_pmu *pmu; LIST_HEAD(format); LIST_HEAD(aliases); __u32 type; char *name = pmu_find_real_name(lookup_name); - bool is_hybrid = perf_pmu__hybrid_mounted(name); char *alias_name; /* - * Check pmu name for hybrid and the pmu may be invalid in sysfs - */ - if (!strncmp(name, "cpu_", 4) && !is_hybrid) - return NULL; - - /* * The pmu data we store & need consists of the pmu * type value and format definitions. Load both right * now. @@ -932,9 +887,9 @@ static struct perf_pmu *pmu_lookup(int dirfd, const char *lookup_name) if (!pmu) return NULL; - pmu->cpus = pmu_cpumask(dirfd, name); + pmu->is_core = is_pmu_core(name); + pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core); pmu->name = strdup(name); - if (!pmu->name) goto err; @@ -962,12 +917,7 @@ static struct perf_pmu *pmu_lookup(int dirfd, const char *lookup_name) INIT_LIST_HEAD(&pmu->caps); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); - list_add_tail(&pmu->list, &pmus); - - if (is_hybrid) - list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus); - else - INIT_LIST_HEAD(&pmu->hybrid_list); + list_add_tail(&pmu->list, pmus); pmu->default_config = perf_pmu__get_default_config(pmu); @@ -982,6 +932,11 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) { struct perf_pmu_format *format; + if (pmu->formats_checked) + return; + + pmu->formats_checked = true; + /* fake pmu doesn't have format list */ if (pmu == &perf_pmu__fake) return; @@ -995,61 +950,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) } } -static struct perf_pmu *pmu_find(const char *name) -{ - struct perf_pmu *pmu; - - list_for_each_entry(pmu, &pmus, list) { - if (!strcmp(pmu->name, name) || - (pmu->alias_name && !strcmp(pmu->alias_name, name))) - return pmu; - } - - return NULL; -} - -struct perf_pmu *perf_pmu__find_by_type(unsigned int type) -{ - struct perf_pmu *pmu; - - list_for_each_entry(pmu, &pmus, list) - if (pmu->type == type) - return pmu; - - return NULL; -} - -struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) -{ - /* - * pmu iterator: If pmu is NULL, we start at the begin, - * otherwise return the next pmu. Returns NULL on end. - */ - if (!pmu) { - pmu_read_sysfs(); - pmu = list_prepare_entry(pmu, &pmus, list); - } - list_for_each_entry_continue(pmu, &pmus, list) - return pmu; - return NULL; -} - -struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) -{ - struct perf_pmu *pmu = NULL; - - if (evsel->pmu) - return evsel->pmu; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu->type == evsel->core.attr.type) - break; - } - - ((struct evsel *)evsel)->pmu = pmu; - return pmu; -} - bool evsel__is_aux_event(const struct evsel *evsel) { struct perf_pmu *pmu = evsel__find_pmu(evsel); @@ -1086,43 +986,6 @@ void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel, evsel->core.attr.config |= field_prep(bits, val); } -struct perf_pmu *perf_pmu__find(const char *name) -{ - struct perf_pmu *pmu; - int dirfd; - - /* - * Once PMU is loaded it stays in the list, - * so we keep us from multiple reading/parsing - * the pmu format definitions. - */ - pmu = pmu_find(name); - if (pmu) - return pmu; - - dirfd = perf_pmu__event_source_devices_fd(); - pmu = pmu_lookup(dirfd, name); - close(dirfd); - - return pmu; -} - -static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) -{ - struct perf_pmu *pmu; - - /* - * Once PMU is loaded it stays in the list, - * so we keep us from multiple reading/parsing - * the pmu format definitions. - */ - pmu = pmu_find(name); - if (pmu) - return pmu; - - return pmu_lookup(dirfd, name); -} - static struct perf_pmu_format * pmu_find_format(struct list_head *formats, const char *name) { @@ -1398,7 +1261,6 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, { bool zero = !!pmu->default_config; - attr->type = pmu->type; return perf_pmu__config_terms(pmu->name, &pmu->format, attr, head_terms, zero, err); } @@ -1553,250 +1415,46 @@ void perf_pmu__del_formats(struct list_head *formats) } } -static int sub_non_neg(int a, int b) -{ - if (b > a) - return 0; - return a - b; -} - -static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, - const struct perf_pmu_alias *alias) -{ - struct parse_events_term *term; - int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); - - list_for_each_entry(term, &alias->terms, list) { - if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) - used += snprintf(buf + used, sub_non_neg(len, used), - ",%s=%s", term->config, - term->val.str); - } - - if (sub_non_neg(len, used) > 0) { - buf[used] = '/'; - used++; - } - if (sub_non_neg(len, used) > 0) { - buf[used] = '\0'; - used++; - } else - buf[len - 1] = '\0'; - - return buf; -} - -/** Struct for ordering events as output in perf list. */ -struct sevent { - /** PMU for event. */ - const struct perf_pmu *pmu; - /** - * Optional event for name, desc, etc. If not present then this is a - * selectable PMU and the event name is shown as "//". - */ - const struct perf_pmu_alias *event; - /** Is the PMU for the CPU? */ - bool is_cpu; -}; - -static int cmp_sevent(const void *a, const void *b) +bool is_pmu_core(const char *name) { - const struct sevent *as = a; - const struct sevent *bs = b; - const char *a_pmu_name = NULL, *b_pmu_name = NULL; - const char *a_name = "//", *a_desc = NULL, *a_topic = ""; - const char *b_name = "//", *b_desc = NULL, *b_topic = ""; - int ret; - - if (as->event) { - a_name = as->event->name; - a_desc = as->event->desc; - a_topic = as->event->topic ?: ""; - a_pmu_name = as->event->pmu_name; - } - if (bs->event) { - b_name = bs->event->name; - b_desc = bs->event->desc; - b_topic = bs->event->topic ?: ""; - b_pmu_name = bs->event->pmu_name; - } - /* Put extra events last. */ - if (!!a_desc != !!b_desc) - return !!a_desc - !!b_desc; - - /* Order by topics. */ - ret = strcmp(a_topic, b_topic); - if (ret) - return ret; - - /* Order CPU core events to be first */ - if (as->is_cpu != bs->is_cpu) - return as->is_cpu ? -1 : 1; - - /* Order by PMU name. */ - if (as->pmu != bs->pmu) { - a_pmu_name = a_pmu_name ?: (as->pmu->name ?: ""); - b_pmu_name = b_pmu_name ?: (bs->pmu->name ?: ""); - ret = strcmp(a_pmu_name, b_pmu_name); - if (ret) - return ret; - } - - /* Order by event name. */ - return strcmp(a_name, b_name); + return !strcmp(name, "cpu") || !strcmp(name, "cpum_cf") || is_sysfs_pmu_core(name); } -bool is_pmu_core(const char *name) +bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu) { - return !strcmp(name, "cpu") || is_arm_pmu_core(name); + return pmu->is_core; } -static bool pmu_alias_is_duplicate(struct sevent *alias_a, - struct sevent *alias_b) +bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) { - const char *a_pmu_name = NULL, *b_pmu_name = NULL; - const char *a_name = "//", *b_name = "//"; - - - if (alias_a->event) { - a_name = alias_a->event->name; - a_pmu_name = alias_a->event->pmu_name; - } - if (alias_b->event) { - b_name = alias_b->event->name; - b_pmu_name = alias_b->event->pmu_name; - } - - /* Different names -> never duplicates */ - if (strcmp(a_name, b_name)) - return false; - - /* Don't remove duplicates for different PMUs */ - a_pmu_name = a_pmu_name ?: (alias_a->pmu->name ?: ""); - b_pmu_name = b_pmu_name ?: (alias_b->pmu->name ?: ""); - return strcmp(a_pmu_name, b_pmu_name) == 0; + return pmu->is_core && perf_pmus__num_core_pmus() == 1; } -void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) +bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name) { - struct perf_pmu *pmu; - struct perf_pmu_alias *event; - char buf[1024]; - int printed = 0; - int len, j; - struct sevent *aliases; - - pmu = NULL; - len = 0; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - list_for_each_entry(event, &pmu->aliases, list) - len++; - if (pmu->selectable) - len++; - } - aliases = zalloc(sizeof(struct sevent) * len); - if (!aliases) { - pr_err("FATAL: not enough memory to print PMU events\n"); - return; - } - pmu = NULL; - j = 0; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - bool is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); - - list_for_each_entry(event, &pmu->aliases, list) { - aliases[j].event = event; - aliases[j].pmu = pmu; - aliases[j].is_cpu = is_cpu; - j++; - } - if (pmu->selectable) { - aliases[j].event = NULL; - aliases[j].pmu = pmu; - aliases[j].is_cpu = is_cpu; - j++; - } - } - len = j; - qsort(aliases, len, sizeof(struct sevent), cmp_sevent); - for (j = 0; j < len; j++) { - const char *name, *alias = NULL, *scale_unit = NULL, - *desc = NULL, *long_desc = NULL, - *encoding_desc = NULL, *topic = NULL, - *pmu_name = NULL; - bool deprecated = false; - size_t buf_used; - - /* Skip duplicates */ - if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) - continue; + struct perf_pmu_alias *alias; - if (!aliases[j].event) { - /* A selectable event. */ - pmu_name = aliases[j].pmu->name; - buf_used = snprintf(buf, sizeof(buf), "%s//", pmu_name) + 1; - name = buf; - } else { - if (aliases[j].event->desc) { - name = aliases[j].event->name; - buf_used = 0; - } else { - name = format_alias(buf, sizeof(buf), aliases[j].pmu, - aliases[j].event); - if (aliases[j].is_cpu) { - alias = name; - name = aliases[j].event->name; - } - buf_used = strlen(buf) + 1; - } - pmu_name = aliases[j].event->pmu_name ?: (aliases[j].pmu->name ?: ""); - if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) { - scale_unit = buf + buf_used; - buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, - "%G%s", aliases[j].event->scale, - aliases[j].event->unit) + 1; - } - desc = aliases[j].event->desc; - long_desc = aliases[j].event->long_desc; - topic = aliases[j].event->topic; - encoding_desc = buf + buf_used; - buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, - "%s/%s/", pmu_name, aliases[j].event->str) + 1; - deprecated = aliases[j].event->deprecated; - } - print_cb->print_event(print_state, - pmu_name, - topic, - name, - alias, - scale_unit, - deprecated, - "Kernel PMU event", - desc, - long_desc, - encoding_desc); + list_for_each_entry(alias, &pmu->aliases, list) { + if (!strcmp(alias->name, name)) + return true; } - if (printed && pager_in_use()) - printf("\n"); - - zfree(&aliases); - return; + return false; } -bool pmu_have_event(const char *pname, const char *name) +bool perf_pmu__is_software(const struct perf_pmu *pmu) { - struct perf_pmu *pmu; - struct perf_pmu_alias *alias; - - pmu = NULL; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (strcmp(pname, pmu->name)) - continue; - list_for_each_entry(alias, &pmu->aliases, list) - if (!strcmp(alias->name, name)) - return true; + if (pmu->is_core || pmu->is_uncore || pmu->auxtrace) + return false; + switch (pmu->type) { + case PERF_TYPE_HARDWARE: return false; + case PERF_TYPE_SOFTWARE: return true; + case PERF_TYPE_TRACEPOINT: return true; + case PERF_TYPE_HW_CACHE: return false; + case PERF_TYPE_RAW: return false; + case PERF_TYPE_BREAKPOINT: return true; + default: break; } - return false; + return !strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe"); } FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) @@ -1967,47 +1625,53 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) return pmu->nr_caps; } -void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - const char *name) +static void perf_pmu__compute_config_masks(struct perf_pmu *pmu) { struct perf_pmu_format *format; - __u64 masks = 0, bits; - char buf[100]; - unsigned int i; + + if (pmu->config_masks_computed) + return; list_for_each_entry(format, &pmu->format, list) { - if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG) + unsigned int i; + __u64 *mask; + + if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) continue; + pmu->config_masks_present = true; + mask = &pmu->config_masks[format->value]; + for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS) - masks |= 1ULL << i; + *mask |= 1ULL << i; } + pmu->config_masks_computed = true; +} + +void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, + const char *name, int config_num, + const char *config_name) +{ + __u64 bits; + char buf[100]; + + perf_pmu__compute_config_masks(pmu); /* * Kernel doesn't export any valid format bits. */ - if (masks == 0) + if (!pmu->config_masks_present) return; - bits = config & ~masks; + bits = config & ~pmu->config_masks[config_num]; if (bits == 0) return; bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf)); - pr_warning("WARNING: event '%s' not valid (bits %s of config " + pr_warning("WARNING: event '%s' not valid (bits %s of %s " "'%llx' not supported by kernel)!\n", - name ?: "N/A", buf, config); -} - -bool perf_pmu__has_hybrid(void) -{ - if (!hybrid_scanned) { - hybrid_scanned = true; - perf_pmu__scan(NULL); - } - - return !list_empty(&perf_pmu__hybrid_pmus); + name ?: "N/A", buf, config_name, config); } int perf_pmu__match(char *pattern, char *name, char *tok) @@ -2024,39 +1688,6 @@ int perf_pmu__match(char *pattern, char *name, char *tok) return 0; } -int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, - struct perf_cpu_map **mcpus_ptr, - struct perf_cpu_map **ucpus_ptr) -{ - struct perf_cpu_map *pmu_cpus = pmu->cpus; - struct perf_cpu_map *matched_cpus, *unmatched_cpus; - struct perf_cpu cpu; - int i, matched_nr = 0, unmatched_nr = 0; - - matched_cpus = perf_cpu_map__default_new(); - if (!matched_cpus) - return -1; - - unmatched_cpus = perf_cpu_map__default_new(); - if (!unmatched_cpus) { - perf_cpu_map__put(matched_cpus); - return -1; - } - - perf_cpu_map__for_each_cpu(cpu, i, cpus) { - if (!perf_cpu_map__has(pmu_cpus, cpu)) - RC_CHK_ACCESS(unmatched_cpus)->map[unmatched_nr++] = cpu; - else - RC_CHK_ACCESS(matched_cpus)->map[matched_nr++] = cpu; - } - - perf_cpu_map__set_nr(unmatched_cpus, unmatched_nr); - perf_cpu_map__set_nr(matched_cpus, matched_nr); - *mcpus_ptr = matched_cpus; - *ucpus_ptr = unmatched_cpus; - return 0; -} - double __weak perf_pmu__cpu_slots_per_cycle(void) { return NAN; @@ -2110,7 +1741,7 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, return openat(dirfd, path, flags); } -static void perf_pmu__delete(struct perf_pmu *pmu) +void perf_pmu__delete(struct perf_pmu *pmu) { perf_pmu__del_formats(&pmu->format); perf_pmu__del_aliases(pmu); @@ -2123,15 +1754,3 @@ static void perf_pmu__delete(struct perf_pmu *pmu) zfree(&pmu->alias_name); free(pmu); } - -void perf_pmu__destroy(void) -{ - struct perf_pmu *pmu, *tmp; - - list_for_each_entry_safe(pmu, tmp, &pmus, list) { - list_del(&pmu->list); - list_del(&pmu->hybrid_list); - - perf_pmu__delete(pmu); - } -} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b9a02dedd473..8807a624e918 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -60,6 +60,13 @@ struct perf_pmu { */ bool selectable; /** + * @is_core: Is the PMU the core CPU PMU? Determined by the name being + * "cpu" or by the presence of + * <sysfs>/bus/event_source/devices/<name>/cpus. There may be >1 core + * PMU on systems like Intel hybrid. + */ + bool is_core; + /** * @is_uncore: Is the PMU not within the CPU core? Determined by the * presence of <sysfs>/bus/event_source/devices/<name>/cpumask. */ @@ -70,6 +77,15 @@ struct perf_pmu { */ bool auxtrace; /** + * @formats_checked: Only check PMU's formats are valid for + * perf_event_attr once. + */ + bool formats_checked; + /** @config_masks_present: Are there config format values? */ + bool config_masks_present; + /** @config_masks_computed: Set when masks are lazily computed. */ + bool config_masks_computed; + /** * @max_precise: Number of levels of :ppp precision supported by the * PMU, read from * <sysfs>/bus/event_source/devices/<name>/caps/max_precise. @@ -112,8 +128,12 @@ struct perf_pmu { struct list_head caps; /** @list: Element on pmus list in pmu.c. */ struct list_head list; - /** @hybrid_list: Element on perf_pmu__hybrid_pmus. */ - struct list_head hybrid_list; + + /** + * @config_masks: Derived from the PMU's format data, bits that are + * valid within the config value. + */ + __u64 config_masks[PERF_PMU_FORMAT_VALUE_CONFIG_END]; /** * @missing_features: Features to inhibit when events on this PMU are @@ -193,8 +213,6 @@ struct perf_pmu_alias { char *pmu_name; }; -struct perf_pmu *perf_pmu__find(const char *name); -struct perf_pmu *perf_pmu__find_by_type(unsigned int type); void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct list_head *head_terms, @@ -217,11 +235,15 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to); int perf_pmu__format_parse(int dirfd, struct list_head *head); void perf_pmu__del_formats(struct list_head *formats); -struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); - bool is_pmu_core(const char *name); -void print_pmu_events(const struct print_callbacks *print_cb, void *print_state); -bool pmu_have_event(const char *pname, const char *name); +bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu); +bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu); +bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name); +/** + * perf_pmu_is_software - is the PMU a software PMU as in it uses the + * perf_sw_context in the kernel? + */ +bool perf_pmu__is_software(const struct perf_pmu *pmu); FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name); FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name); @@ -248,16 +270,12 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - const char *name); + const char *name, int config_num, + const char *config_name); void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); -bool perf_pmu__has_hybrid(void); int perf_pmu__match(char *pattern, char *name, char *tok); -int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, - struct perf_cpu_map **mcpus_ptr, - struct perf_cpu_map **ucpus_ptr); - char *pmu_find_real_name(const char *name); char *pmu_find_alias_name(const char *name); double perf_pmu__cpu_slots_per_cycle(void); @@ -267,6 +285,7 @@ int perf_pmu__pathname_scnprintf(char *buf, size_t size, int perf_pmu__event_source_devices_fd(void); int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags); -void perf_pmu__destroy(void); +struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name); +void perf_pmu__delete(struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 7f3b93c4d229..0866dee3fc62 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -1,5 +1,571 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/list.h> -#include <pmus.h> +#include <linux/zalloc.h> +#include <subcmd/pager.h> +#include <sys/types.h> +#include <dirent.h> +#include <pthread.h> +#include <string.h> +#include <unistd.h> +#include "debug.h" +#include "evsel.h" +#include "pmus.h" +#include "pmu.h" +#include "print-events.h" -LIST_HEAD(pmus); +/* + * core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs + * directory contains "cpus" file. All PMUs belonging to core_pmus + * must have pmu->is_core=1. If there are more than one PMU in + * this list, perf interprets it as a heterogeneous platform. + * (FWIW, certain ARM platforms having heterogeneous cores uses + * homogeneous PMU, and thus they are treated as homogeneous + * platform by perf because core_pmus will have only one entry) + * other_pmus: All other PMUs which are not part of core_pmus list. It doesn't + * matter whether PMU is present per SMT-thread or outside of the + * core in the hw. For e.g., an instance of AMD ibs_fetch// and + * ibs_op// PMUs is present in each hw SMT thread, however they + * are captured under other_pmus. PMUs belonging to other_pmus + * must have pmu->is_core=0 but pmu->is_uncore could be 0 or 1. + */ +static LIST_HEAD(core_pmus); +static LIST_HEAD(other_pmus); +static bool read_sysfs_core_pmus; +static bool read_sysfs_all_pmus; + +void perf_pmus__destroy(void) +{ + struct perf_pmu *pmu, *tmp; + + list_for_each_entry_safe(pmu, tmp, &core_pmus, list) { + list_del(&pmu->list); + + perf_pmu__delete(pmu); + } + list_for_each_entry_safe(pmu, tmp, &other_pmus, list) { + list_del(&pmu->list); + + perf_pmu__delete(pmu); + } + read_sysfs_core_pmus = false; + read_sysfs_all_pmus = false; +} + +static struct perf_pmu *pmu_find(const char *name) +{ + struct perf_pmu *pmu; + + list_for_each_entry(pmu, &core_pmus, list) { + if (!strcmp(pmu->name, name) || + (pmu->alias_name && !strcmp(pmu->alias_name, name))) + return pmu; + } + list_for_each_entry(pmu, &other_pmus, list) { + if (!strcmp(pmu->name, name) || + (pmu->alias_name && !strcmp(pmu->alias_name, name))) + return pmu; + } + + return NULL; +} + +struct perf_pmu *perf_pmus__find(const char *name) +{ + struct perf_pmu *pmu; + int dirfd; + bool core_pmu; + + /* + * Once PMU is loaded it stays in the list, + * so we keep us from multiple reading/parsing + * the pmu format definitions. + */ + pmu = pmu_find(name); + if (pmu) + return pmu; + + if (read_sysfs_all_pmus) + return NULL; + + core_pmu = is_pmu_core(name); + if (core_pmu && read_sysfs_core_pmus) + return NULL; + + dirfd = perf_pmu__event_source_devices_fd(); + pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name); + close(dirfd); + + return pmu; +} + +static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) +{ + struct perf_pmu *pmu; + bool core_pmu; + + /* + * Once PMU is loaded it stays in the list, + * so we keep us from multiple reading/parsing + * the pmu format definitions. + */ + pmu = pmu_find(name); + if (pmu) + return pmu; + + if (read_sysfs_all_pmus) + return NULL; + + core_pmu = is_pmu_core(name); + if (core_pmu && read_sysfs_core_pmus) + return NULL; + + return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name); +} + +/* Add all pmus in sysfs to pmu list: */ +static void pmu_read_sysfs(bool core_only) +{ + int fd; + DIR *dir; + struct dirent *dent; + + if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus)) + return; + + fd = perf_pmu__event_source_devices_fd(); + if (fd < 0) + return; + + dir = fdopendir(fd); + if (!dir) { + close(fd); + return; + } + + while ((dent = readdir(dir))) { + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) + continue; + if (core_only && !is_pmu_core(dent->d_name)) + continue; + /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */ + perf_pmu__find2(fd, dent->d_name); + } + + closedir(dir); + if (core_only) { + read_sysfs_core_pmus = true; + } else { + read_sysfs_core_pmus = true; + read_sysfs_all_pmus = true; + } +} + +static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type) +{ + struct perf_pmu *pmu; + + list_for_each_entry(pmu, &core_pmus, list) { + if (pmu->type == type) + return pmu; + } + + list_for_each_entry(pmu, &other_pmus, list) { + if (pmu->type == type) + return pmu; + } + return NULL; +} + +struct perf_pmu *perf_pmus__find_by_type(unsigned int type) +{ + struct perf_pmu *pmu = __perf_pmus__find_by_type(type); + + if (pmu || read_sysfs_all_pmus) + return pmu; + + pmu_read_sysfs(/*core_only=*/false); + pmu = __perf_pmus__find_by_type(type); + return pmu; +} + +/* + * pmu iterator: If pmu is NULL, we start at the begin, otherwise return the + * next pmu. Returns NULL on end. + */ +struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu) +{ + bool use_core_pmus = !pmu || pmu->is_core; + + if (!pmu) { + pmu_read_sysfs(/*core_only=*/false); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } + if (use_core_pmus) { + list_for_each_entry_continue(pmu, &core_pmus, list) + return pmu; + + pmu = NULL; + pmu = list_prepare_entry(pmu, &other_pmus, list); + } + list_for_each_entry_continue(pmu, &other_pmus, list) + return pmu; + return NULL; +} + +struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) +{ + if (!pmu) { + pmu_read_sysfs(/*core_only=*/true); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } + list_for_each_entry_continue(pmu, &core_pmus, list) + return pmu; + + return NULL; +} + +const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmus__scan(pmu)) != NULL) { + if (!strcmp(pmu->name, str)) + return pmu; + /* Ignore "uncore_" prefix. */ + if (!strncmp(pmu->name, "uncore_", 7)) { + if (!strcmp(pmu->name + 7, str)) + return pmu; + } + /* Ignore "cpu_" prefix on Intel hybrid PMUs. */ + if (!strncmp(pmu->name, "cpu_", 4)) { + if (!strcmp(pmu->name + 4, str)) + return pmu; + } + } + return NULL; +} + +int __weak perf_pmus__num_mem_pmus(void) +{ + /* All core PMUs are for mem events. */ + return perf_pmus__num_core_pmus(); +} + +/** Struct for ordering events as output in perf list. */ +struct sevent { + /** PMU for event. */ + const struct perf_pmu *pmu; + /** + * Optional event for name, desc, etc. If not present then this is a + * selectable PMU and the event name is shown as "//". + */ + const struct perf_pmu_alias *event; + /** Is the PMU for the CPU? */ + bool is_cpu; +}; + +static int cmp_sevent(const void *a, const void *b) +{ + const struct sevent *as = a; + const struct sevent *bs = b; + const char *a_pmu_name = NULL, *b_pmu_name = NULL; + const char *a_name = "//", *a_desc = NULL, *a_topic = ""; + const char *b_name = "//", *b_desc = NULL, *b_topic = ""; + int ret; + + if (as->event) { + a_name = as->event->name; + a_desc = as->event->desc; + a_topic = as->event->topic ?: ""; + a_pmu_name = as->event->pmu_name; + } + if (bs->event) { + b_name = bs->event->name; + b_desc = bs->event->desc; + b_topic = bs->event->topic ?: ""; + b_pmu_name = bs->event->pmu_name; + } + /* Put extra events last. */ + if (!!a_desc != !!b_desc) + return !!a_desc - !!b_desc; + + /* Order by topics. */ + ret = strcmp(a_topic, b_topic); + if (ret) + return ret; + + /* Order CPU core events to be first */ + if (as->is_cpu != bs->is_cpu) + return as->is_cpu ? -1 : 1; + + /* Order by PMU name. */ + if (as->pmu != bs->pmu) { + a_pmu_name = a_pmu_name ?: (as->pmu->name ?: ""); + b_pmu_name = b_pmu_name ?: (bs->pmu->name ?: ""); + ret = strcmp(a_pmu_name, b_pmu_name); + if (ret) + return ret; + } + + /* Order by event name. */ + return strcmp(a_name, b_name); +} + +static bool pmu_alias_is_duplicate(struct sevent *alias_a, + struct sevent *alias_b) +{ + const char *a_pmu_name = NULL, *b_pmu_name = NULL; + const char *a_name = "//", *b_name = "//"; + + + if (alias_a->event) { + a_name = alias_a->event->name; + a_pmu_name = alias_a->event->pmu_name; + } + if (alias_b->event) { + b_name = alias_b->event->name; + b_pmu_name = alias_b->event->pmu_name; + } + + /* Different names -> never duplicates */ + if (strcmp(a_name, b_name)) + return false; + + /* Don't remove duplicates for different PMUs */ + a_pmu_name = a_pmu_name ?: (alias_a->pmu->name ?: ""); + b_pmu_name = b_pmu_name ?: (alias_b->pmu->name ?: ""); + return strcmp(a_pmu_name, b_pmu_name) == 0; +} + +static int sub_non_neg(int a, int b) +{ + if (b > a) + return 0; + return a - b; +} + +static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, + const struct perf_pmu_alias *alias) +{ + struct parse_events_term *term; + int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); + + list_for_each_entry(term, &alias->terms, list) { + if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + used += snprintf(buf + used, sub_non_neg(len, used), + ",%s=%s", term->config, + term->val.str); + } + + if (sub_non_neg(len, used) > 0) { + buf[used] = '/'; + used++; + } + if (sub_non_neg(len, used) > 0) { + buf[used] = '\0'; + used++; + } else + buf[len - 1] = '\0'; + + return buf; +} + +void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state) +{ + struct perf_pmu *pmu; + struct perf_pmu_alias *event; + char buf[1024]; + int printed = 0; + int len, j; + struct sevent *aliases; + + pmu = NULL; + len = 0; + while ((pmu = perf_pmus__scan(pmu)) != NULL) { + list_for_each_entry(event, &pmu->aliases, list) + len++; + if (pmu->selectable) + len++; + } + aliases = zalloc(sizeof(struct sevent) * len); + if (!aliases) { + pr_err("FATAL: not enough memory to print PMU events\n"); + return; + } + pmu = NULL; + j = 0; + while ((pmu = perf_pmus__scan(pmu)) != NULL) { + bool is_cpu = pmu->is_core; + + list_for_each_entry(event, &pmu->aliases, list) { + aliases[j].event = event; + aliases[j].pmu = pmu; + aliases[j].is_cpu = is_cpu; + j++; + } + if (pmu->selectable) { + aliases[j].event = NULL; + aliases[j].pmu = pmu; + aliases[j].is_cpu = is_cpu; + j++; + } + } + len = j; + qsort(aliases, len, sizeof(struct sevent), cmp_sevent); + for (j = 0; j < len; j++) { + const char *name, *alias = NULL, *scale_unit = NULL, + *desc = NULL, *long_desc = NULL, + *encoding_desc = NULL, *topic = NULL, + *pmu_name = NULL; + bool deprecated = false; + size_t buf_used; + + /* Skip duplicates */ + if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) + continue; + + if (!aliases[j].event) { + /* A selectable event. */ + pmu_name = aliases[j].pmu->name; + buf_used = snprintf(buf, sizeof(buf), "%s//", pmu_name) + 1; + name = buf; + } else { + if (aliases[j].event->desc) { + name = aliases[j].event->name; + buf_used = 0; + } else { + name = format_alias(buf, sizeof(buf), aliases[j].pmu, + aliases[j].event); + if (aliases[j].is_cpu) { + alias = name; + name = aliases[j].event->name; + } + buf_used = strlen(buf) + 1; + } + pmu_name = aliases[j].event->pmu_name ?: (aliases[j].pmu->name ?: ""); + if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) { + scale_unit = buf + buf_used; + buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, + "%G%s", aliases[j].event->scale, + aliases[j].event->unit) + 1; + } + desc = aliases[j].event->desc; + long_desc = aliases[j].event->long_desc; + topic = aliases[j].event->topic; + encoding_desc = buf + buf_used; + buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, + "%s/%s/", pmu_name, aliases[j].event->str) + 1; + deprecated = aliases[j].event->deprecated; + } + print_cb->print_event(print_state, + pmu_name, + topic, + name, + alias, + scale_unit, + deprecated, + "Kernel PMU event", + desc, + long_desc, + encoding_desc); + } + if (printed && pager_in_use()) + printf("\n"); + + zfree(&aliases); +} + +bool perf_pmus__have_event(const char *pname, const char *name) +{ + struct perf_pmu *pmu = perf_pmus__find(pname); + + return pmu && perf_pmu__have_event(pmu, name); +} + +int perf_pmus__num_core_pmus(void) +{ + static int count; + + if (!count) { + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) + count++; + } + return count; +} + +static bool __perf_pmus__supports_extended_type(void) +{ + struct perf_pmu *pmu = NULL; + + if (perf_pmus__num_core_pmus() <= 1) + return false; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (!is_event_supported(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES | ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) + return false; + } + + return true; +} + +static bool perf_pmus__do_support_extended_type; + +static void perf_pmus__init_supports_extended_type(void) +{ + perf_pmus__do_support_extended_type = __perf_pmus__supports_extended_type(); +} + +bool perf_pmus__supports_extended_type(void) +{ + static pthread_once_t extended_type_once = PTHREAD_ONCE_INIT; + + pthread_once(&extended_type_once, perf_pmus__init_supports_extended_type); + + return perf_pmus__do_support_extended_type; +} + +char *perf_pmus__default_pmu_name(void) +{ + int fd; + DIR *dir; + struct dirent *dent; + char *result = NULL; + + if (!list_empty(&core_pmus)) + return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name); + + fd = perf_pmu__event_source_devices_fd(); + if (fd < 0) + return strdup("cpu"); + + dir = fdopendir(fd); + if (!dir) { + close(fd); + return strdup("cpu"); + } + + while ((dent = readdir(dir))) { + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) + continue; + if (is_pmu_core(dent->d_name)) { + result = strdup(dent->d_name); + break; + } + } + + closedir(dir); + return result ?: strdup("cpu"); +} + +struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel->pmu; + + if (!pmu) { + pmu = perf_pmus__find_by_type(evsel->core.attr.type); + ((struct evsel *)evsel)->pmu = pmu; + } + return pmu; +} diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index 5ec12007eb5c..a21464432d0f 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -2,8 +2,24 @@ #ifndef __PMUS_H #define __PMUS_H -extern struct list_head pmus; +struct perf_pmu; +struct print_callbacks; -#define perf_pmus__for_each_pmu(pmu) list_for_each_entry(pmu, &pmus, list) +void perf_pmus__destroy(void); + +struct perf_pmu *perf_pmus__find(const char *name); +struct perf_pmu *perf_pmus__find_by_type(unsigned int type); + +struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu); +struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu); + +const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str); + +int perf_pmus__num_mem_pmus(void); +void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state); +bool perf_pmus__have_event(const char *pname, const char *name); +int perf_pmus__num_core_pmus(void); +bool perf_pmus__supports_extended_type(void); +char *perf_pmus__default_pmu_name(void); #endif /* __PMUS_H */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index ee145cec42c0..a7566edc86a3 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -20,13 +20,14 @@ #include "metricgroup.h" #include "parse-events.h" #include "pmu.h" +#include "pmus.h" #include "print-events.h" #include "probe-file.h" #include "string2.h" #include "strlist.h" #include "tracepoint.h" #include "pfm.h" -#include "pmu-hybrid.h" +#include "thread_map.h" #define MAX_NAME_LEN 100 @@ -228,60 +229,93 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) strlist__delete(sdtlist); } +bool is_event_supported(u8 type, u64 config) +{ + bool ret = true; + int open_return; + struct evsel *evsel; + struct perf_event_attr attr = { + .type = type, + .config = config, + .disabled = 1, + }; + struct perf_thread_map *tmap = thread_map__new_by_tid(0); + + if (tmap == NULL) + return false; + + evsel = evsel__new(&attr); + if (evsel) { + open_return = evsel__open(evsel, NULL, tmap); + ret = open_return >= 0; + + if (open_return == -EACCES) { + /* + * This happens if the paranoid value + * /proc/sys/kernel/perf_event_paranoid is set to 2 + * Re-run with exclude_kernel set; we don't do that + * by default as some ARM machines do not support it. + * + */ + evsel->core.attr.exclude_kernel = 1; + ret = evsel__open(evsel, NULL, tmap) >= 0; + } + evsel__delete(evsel); + } + + perf_thread_map__put(tmap); + return ret; +} + int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state) { - struct strlist *evt_name_list = strlist__new(NULL, NULL); - struct str_node *nd; + struct perf_pmu *pmu = NULL; + const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE]; - if (!evt_name_list) { - pr_debug("Failed to allocate new strlist for hwcache events\n"); - return -ENOMEM; - } - for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { - /* skip invalid cache type */ - if (!evsel__is_cache_op_valid(type, op)) - continue; - - for (int i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - struct perf_pmu *pmu = NULL; - char name[64]; - - __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - if (!perf_pmu__has_hybrid()) { - if (is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) - strlist__add(evt_name_list, name); + /* + * Only print core PMUs, skipping uncore for performance and + * PERF_TYPE_SOFTWARE that can succeed in opening legacy cache evenst. + */ + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE) + continue; + + for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!evsel__is_cache_op_valid(type, op)) continue; - } - perf_pmu__for_each_hybrid_pmu(pmu) { - if (is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16) | - ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) { - char new_name[128]; - snprintf(new_name, sizeof(new_name), - "%s/%s/", pmu->name, name); - strlist__add(evt_name_list, new_name); - } + + for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) { + char name[64]; + char alias_name[128]; + __u64 config; + int ret; + + __evsel__hw_cache_type_op_res_name(type, op, res, + name, sizeof(name)); + + ret = parse_events__decode_legacy_cache(name, pmu->type, + &config); + if (ret || !is_event_supported(PERF_TYPE_HW_CACHE, config)) + continue; + snprintf(alias_name, sizeof(alias_name), "%s/%s/", + pmu->name, name); + print_cb->print_event(print_state, + "cache", + pmu->name, + name, + alias_name, + /*scale_unit=*/NULL, + /*deprecated=*/false, + event_type_descriptor, + /*desc=*/NULL, + /*long_desc=*/NULL, + /*encoding_desc=*/NULL); } } } } - - strlist__for_each_entry(nd, evt_name_list) { - print_cb->print_event(print_state, - "cache", - /*pmu_name=*/NULL, - nd->s, - /*event_alias=*/NULL, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptors[PERF_TYPE_HW_CACHE], - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } - strlist__delete(evt_name_list); return 0; } @@ -370,7 +404,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) print_hwcache_events(print_cb, print_state); - print_pmu_events(print_cb, print_state); + perf_pmus__print_pmu_events(print_cb, print_state); print_cb->print_event(print_state, /*topic=*/NULL, diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index e75a3d7e3fe3..d7fab411e75c 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -3,6 +3,7 @@ #define __PERF_PRINT_EVENTS_H #include <linux/perf_event.h> +#include <linux/types.h> #include <stdbool.h> struct event_symbol; @@ -36,5 +37,6 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta unsigned int max); void print_tool_events(const struct print_callbacks *print_cb, void *print_state); void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); +bool is_event_supported(u8 type, u64 config); #endif /* __PERF_PRINT_EVENTS_H */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index aa5156c2bcff..d4c9b4cd35ef 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -39,5 +39,4 @@ util/affinity.c util/rwsem.c util/hashmap.c util/perf_regs.c -util/pmu-hybrid.c util/fncache.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 0faea4c75eed..a7b2cb05dc86 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -22,6 +22,7 @@ #include "util/bpf-filter.h" #include "util/env.h" #include "util/pmu.h" +#include "util/pmus.h" #include <internal/lib.h> #include "util.h" @@ -50,6 +51,14 @@ #endif /* + * Avoid bringing in event parsing. + */ +int parse_event(struct evlist *evlist __maybe_unused, const char *str __maybe_unused) +{ + return 0; +} + +/* * Provide these two so that we don't have to link against callchain.c and * start dragging hist.c, etc. */ @@ -94,6 +103,11 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, return EOF; } +int perf_pmus__num_core_pmus(void) +{ + return 1; +} + bool evsel__is_aux_event(const struct evsel *evsel __maybe_unused) { return false; @@ -1480,3 +1494,7 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cp int fd, int group_fd, unsigned long flags) { } + +void evlist__free_stats(struct evlist *evlist) +{ +} diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 039d0365ad41..603091317bed 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -67,8 +67,6 @@ INTERP my_perl; #define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(unsigned short) * 8)) - 1) -static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX); - extern struct scripting_context *scripting_context; static char *cur_field_name; @@ -262,6 +260,7 @@ static SV *perl_process_callchain(struct perf_sample *sample, struct evsel *evsel, struct addr_location *al) { + struct callchain_cursor *cursor; AV *list; list = newAV(); @@ -271,18 +270,20 @@ static SV *perl_process_callchain(struct perf_sample *sample, if (!symbol_conf.use_callchain || !sample->callchain) goto exit; - if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel, + cursor = get_tls_callchain_cursor(); + + if (thread__resolve_callchain(al->thread, cursor, evsel, sample, NULL, NULL, scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } - callchain_cursor_commit(&callchain_cursor); + callchain_cursor_commit(cursor); while (1) { HV *elem; struct callchain_cursor_node *node; - node = callchain_cursor_current(&callchain_cursor); + node = callchain_cursor_current(cursor); if (!node) break; @@ -330,7 +331,7 @@ static SV *perl_process_callchain(struct perf_sample *sample, } } - callchain_cursor_advance(&callchain_cursor); + callchain_cursor_advance(cursor); av_push(list, newRV_noinc((SV*)elem)); } @@ -353,7 +354,9 @@ static void perl_process_tracepoint(struct perf_sample *sample, void *data = sample->raw_data; unsigned long long nsecs = sample->time; const char *comm = thread__comm_str(thread); + DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX); + bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX); dSP; if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 41d4f9e6a8b7..94312741443a 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -93,8 +93,6 @@ PyMODINIT_FUNC PyInit_perf_trace_context(void); #define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(unsigned short) * 8)) - 1) -static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX); - #define N_COMMON_FIELDS 7 static char *cur_field_name; @@ -419,6 +417,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, struct addr_location *al) { PyObject *pylist; + struct callchain_cursor *cursor; pylist = PyList_New(0); if (!pylist) @@ -427,19 +426,20 @@ static PyObject *python_process_callchain(struct perf_sample *sample, if (!symbol_conf.use_callchain || !sample->callchain) goto exit; - if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel, + cursor = get_tls_callchain_cursor(); + if (thread__resolve_callchain(al->thread, cursor, evsel, sample, NULL, NULL, scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } - callchain_cursor_commit(&callchain_cursor); + callchain_cursor_commit(cursor); while (1) { PyObject *pyelem; struct callchain_cursor_node *node; - node = callchain_cursor_current(&callchain_cursor); + node = callchain_cursor_current(cursor); if (!node) break; @@ -471,9 +471,11 @@ static PyObject *python_process_callchain(struct perf_sample *sample, struct addr_location node_al; unsigned long offset; + addr_location__init(&node_al); node_al.addr = map__map_ip(map, node->ip); - node_al.map = map; + node_al.map = map__get(map); offset = get_offset(node->ms.sym, &node_al); + addr_location__exit(&node_al); pydict_set_item_string_decref( pyelem, "sym_off", @@ -493,7 +495,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, _PyUnicode_FromString(dsoname)); } - callchain_cursor_advance(&callchain_cursor); + callchain_cursor_advance(cursor); PyList_Append(pylist, pyelem); Py_DECREF(pyelem); } @@ -541,6 +543,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample, pydict_set_item_string_decref(pyelem, "cycles", PyLong_FromUnsignedLongLong(entries[i].flags.cycles)); + addr_location__init(&al); thread__find_map_fb(thread, sample->cpumode, entries[i].from, &al); dsoname = get_dsoname(al.map); @@ -553,6 +556,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample, pydict_set_item_string_decref(pyelem, "to_dsoname", _PyUnicode_FromString(dsoname)); + addr_location__exit(&al); PyList_Append(pylist, pyelem); Py_DECREF(pyelem); } @@ -596,7 +600,6 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, PyObject *pylist; u64 i; char bf[512]; - struct addr_location al; pylist = PyList_New(0); if (!pylist) @@ -607,7 +610,9 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, for (i = 0; i < br->nr; i++) { PyObject *pyelem; + struct addr_location al; + addr_location__init(&al); pyelem = PyDict_New(); if (!pyelem) Py_FatalError("couldn't create Python dictionary"); @@ -646,6 +651,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, PyList_Append(pylist, pyelem); Py_DECREF(pyelem); + addr_location__exit(&al); } exit: @@ -733,6 +739,9 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch bf[0] = 0; + if (size <= 0) + return; + if (!regs || !regs->regs) return; @@ -760,17 +769,18 @@ static void set_regs_in_dict(PyObject *dict, * 10 chars is for register name. */ int size = __sw_hweight64(attr->sample_regs_intr) * 28; - char bf[size]; + char *bf = malloc(size); - regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf)); + regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); pydict_set_item_string_decref(dict, "iregs", _PyUnicode_FromString(bf)); - regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf)); + regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size); pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); + free(bf); } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, @@ -934,6 +944,9 @@ static void python_process_tracepoint(struct perf_sample *sample, unsigned long long nsecs = sample->time; const char *comm = thread__comm_str(al->thread); const char *default_handler_name = "trace_unhandled"; + DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX); + + bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX); if (!event) { snprintf(handler_name, sizeof(handler_name), @@ -1162,11 +1175,11 @@ static int python_export_thread(struct db_export *dbe, struct thread *thread, t = tuple_new(5); - tuple_set_d64(t, 0, thread->db_id); + tuple_set_d64(t, 0, thread__db_id(thread)); tuple_set_d64(t, 1, machine->db_id); tuple_set_d64(t, 2, main_thread_db_id); - tuple_set_s32(t, 3, thread->pid_); - tuple_set_s32(t, 4, thread->tid); + tuple_set_s32(t, 3, thread__pid(thread)); + tuple_set_s32(t, 4, thread__tid(thread)); call_object(tables->thread_handler, t, "thread_table"); @@ -1185,7 +1198,7 @@ static int python_export_comm(struct db_export *dbe, struct comm *comm, tuple_set_d64(t, 0, comm->db_id); tuple_set_string(t, 1, comm__str(comm)); - tuple_set_d64(t, 2, thread->db_id); + tuple_set_d64(t, 2, thread__db_id(thread)); tuple_set_d64(t, 3, comm->start); tuple_set_s32(t, 4, comm->exec); @@ -1206,7 +1219,7 @@ static int python_export_comm_thread(struct db_export *dbe, u64 db_id, tuple_set_d64(t, 0, db_id); tuple_set_d64(t, 1, comm->db_id); - tuple_set_d64(t, 2, thread->db_id); + tuple_set_d64(t, 2, thread__db_id(thread)); call_object(tables->comm_thread_handler, t, "comm_thread_table"); @@ -1291,7 +1304,7 @@ static void python_export_sample_table(struct db_export *dbe, tuple_set_d64(t, 0, es->db_id); tuple_set_d64(t, 1, es->evsel->db_id); tuple_set_d64(t, 2, maps__machine(es->al->maps)->db_id); - tuple_set_d64(t, 3, es->al->thread->db_id); + tuple_set_d64(t, 3, thread__db_id(es->al->thread)); tuple_set_d64(t, 4, es->comm_db_id); tuple_set_d64(t, 5, es->dso_db_id); tuple_set_d64(t, 6, es->sym_db_id); @@ -1381,7 +1394,7 @@ static int python_export_call_return(struct db_export *dbe, t = tuple_new(14); tuple_set_d64(t, 0, cr->db_id); - tuple_set_d64(t, 1, cr->thread->db_id); + tuple_set_d64(t, 1, thread__db_id(cr->thread)); tuple_set_d64(t, 2, comm_db_id); tuple_set_d64(t, 3, cr->cp->db_id); tuple_set_d64(t, 4, cr->call_time); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e2806791c76a..00d18c74c090 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -278,11 +278,6 @@ struct perf_session *__perf_session__new(struct perf_data *data, return ERR_PTR(ret); } -static void perf_session__delete_threads(struct perf_session *session) -{ - machine__delete_threads(&session->machines.host); -} - static void perf_decomp__release_events(struct decomp *next) { struct decomp *decomp; @@ -305,7 +300,6 @@ void perf_session__delete(struct perf_session *session) auxtrace__free(session); auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); - perf_session__delete_threads(session); perf_decomp__release_events(session->decomp_data.decomp); perf_env__exit(&session->header.env); machines__exit(&session->machines); @@ -2807,7 +2801,7 @@ static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid, if (!thread) return -ENOMEM; - thread->guest_cpu = guest_cpu; + thread__set_guest_cpu(thread, guest_cpu); thread__put(thread); return 0; diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index c294db713677..869738fc06c3 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -36,6 +36,10 @@ if cc_is_clang: vars[var] = sub("-fno-semantic-interposition", "", vars[var]) if not clang_has_option("-ffat-lto-objects"): vars[var] = sub("-ffat-lto-objects", "", vars[var]) + if not clang_has_option("-ftree-loop-distribute-patterns"): + vars[var] = sub("-ftree-loop-distribute-patterns", "", vars[var]) + if not clang_has_option("-gno-variable-location-views"): + vars[var] = sub("-gno-variable-location-views", "", vars[var]) from setuptools import setup, Extension diff --git a/tools/perf/util/sharded_mutex.c b/tools/perf/util/sharded_mutex.c new file mode 100644 index 000000000000..e11e8d0945a7 --- /dev/null +++ b/tools/perf/util/sharded_mutex.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "sharded_mutex.h" + +#include <stdlib.h> + +struct sharded_mutex *sharded_mutex__new(size_t num_shards) +{ + struct sharded_mutex *result; + size_t size; + unsigned int bits; + + for (bits = 0; ((size_t)1 << bits) < num_shards; bits++) + ; + + size = sizeof(*result) + sizeof(struct mutex) * (1 << bits); + result = malloc(size); + if (!result) + return NULL; + + result->cap_bits = bits; + for (size_t i = 0; i < ((size_t)1 << bits); i++) + mutex_init(&result->mutexes[i]); + + return result; +} + +void sharded_mutex__delete(struct sharded_mutex *sm) +{ + for (size_t i = 0; i < ((size_t)1 << sm->cap_bits); i++) + mutex_destroy(&sm->mutexes[i]); + + free(sm); +} diff --git a/tools/perf/util/sharded_mutex.h b/tools/perf/util/sharded_mutex.h new file mode 100644 index 000000000000..7325e969eee3 --- /dev/null +++ b/tools/perf/util/sharded_mutex.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef PERF_SHARDED_MUTEX_H +#define PERF_SHARDED_MUTEX_H + +#include "mutex.h" +#include "hashmap.h" + +/* + * In a situation where a lock is needed per object, having a mutex can be + * relatively memory expensive (40 bytes on x86-64). If the object can be + * constantly hashed, a sharded mutex is an alternative global pool of mutexes + * where the mutex is looked up from a hash value. This can lead to collisions + * if the number of shards isn't large enough. + */ +struct sharded_mutex { + /* mutexes array is 1<<cap_bits in size. */ + unsigned int cap_bits; + struct mutex mutexes[]; +}; + +struct sharded_mutex *sharded_mutex__new(size_t num_shards); +void sharded_mutex__delete(struct sharded_mutex *sm); + +static inline struct mutex *sharded_mutex__get_mutex(struct sharded_mutex *sm, size_t hash) +{ + return &sm->mutexes[hash_bits(hash, sm->cap_bits)]; +} + +#endif /* PERF_SHARDED_MUTEX_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 650cd8df4041..6aa1c7f2b444 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -108,7 +108,7 @@ static int64_t cmp_null(const void *l, const void *r) static int64_t sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) { - return right->thread->tid - left->thread->tid; + return thread__tid(right->thread) - thread__tid(left->thread); } static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf, @@ -117,7 +117,7 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf, const char *comm = thread__comm_str(he->thread); width = max(7U, width) - 8; - return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid, + return repsep_snprintf(bf, size, "%7d:%-*.*s", thread__tid(he->thread), width, width, comm ?: ""); } @@ -128,7 +128,7 @@ static int hist_entry__thread_filter(struct hist_entry *he, int type, const void if (type != HIST_FILTER__THREAD) return -1; - return th && he->thread != th; + return th && RC_CHK_ACCESS(he->thread) != RC_CHK_ACCESS(th); } struct sort_entry sort_thread = { @@ -643,7 +643,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e) sf = __get_srcline(map__dso(map), map__rip_2objdump(map, e->ip), e->ms.sym, false, true, true, e->ip); - if (!strcmp(sf, SRCLINE_UNKNOWN)) + if (sf == SRCLINE_UNKNOWN) return no_srcfile; p = strchr(sf, ':'); if (p && *sf) { @@ -1543,8 +1543,10 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) !l_dso->id.ino && !l_dso->id.ino_generation) { /* userspace anonymous */ - if (left->thread->pid_ > right->thread->pid_) return -1; - if (left->thread->pid_ < right->thread->pid_) return 1; + if (thread__pid(left->thread) > thread__pid(right->thread)) + return -1; + if (thread__pid(left->thread) < thread__pid(right->thread)) + return 1; } addr: diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index cfca03abd6f8..034b496df297 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -21,8 +21,12 @@ #include "symbol.h" #include "subcmd/run-command.h" +/* If addr2line doesn't return data for 1 second then timeout. */ +int addr2line_timeout_ms = 1 * 1000; bool srcline_full_filename; +char *srcline__unknown = (char *)"??:0"; + static const char *dso__name(struct dso *dso) { const char *dso_name; @@ -385,7 +389,7 @@ static int filename_split(char *filename, unsigned int *line_nr) *line_nr = strtoul(sep, NULL, 0); return 1; } - + pr_debug("addr2line missing ':' in filename split\n"); return 0; } @@ -406,7 +410,7 @@ static struct child_process *addr2line_subprocess_init(const char *addr2line_pat const char *argv[] = { addr2line_path ?: "addr2line", "-e", binary_path, - "-i", "-f", NULL + "-a", "-i", "-f", NULL }; struct child_process *a2l = zalloc(sizeof(*a2l)); int start_command_status = 0; @@ -441,7 +445,7 @@ enum a2l_style { LLVM, }; -static enum a2l_style addr2line_configure(struct child_process *a2l) +static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name) { static bool cached; static enum a2l_style style; @@ -450,6 +454,7 @@ static enum a2l_style addr2line_configure(struct child_process *a2l) char buf[128]; struct io io; int ch; + int lines; if (write(a2l->in, ",\n", 2) != 2) return BROKEN; @@ -459,19 +464,32 @@ static enum a2l_style addr2line_configure(struct child_process *a2l) if (ch == ',') { style = LLVM; cached = true; - } else if (ch == '?') { + lines = 1; + pr_debug("Detected LLVM addr2line style\n"); + } else if (ch == '0') { style = GNU_BINUTILS; cached = true; + lines = 3; + pr_debug("Detected binutils addr2line style\n"); } else { - style = BROKEN; + if (!symbol_conf.disable_add2line_warn) { + char *output = NULL; + size_t output_len; + + io__getline(&io, &output, &output_len); + pr_warning("%s %s: addr2line configuration failed\n", + __func__, dso_name); + pr_warning("\t%c%s", ch, output); + } + pr_debug("Unknown/broken addr2line style\n"); + return BROKEN; } - do { + while (lines) { ch = io__get_char(&io); - } while (ch > 0 && ch != '\n'); - if (style == GNU_BINUTILS) { - do { - ch = io__get_char(&io); - } while (ch > 0 && ch != '\n'); + if (ch <= 0) + break; + if (ch == '\n') + lines--; } /* Ignore SIGPIPE in the event addr2line exits. */ signal(SIGPIPE, SIG_IGN); @@ -481,6 +499,9 @@ static enum a2l_style addr2line_configure(struct child_process *a2l) static int read_addr2line_record(struct io *io, enum a2l_style style, + const char *dso_name, + u64 addr, + bool first, char **function, char **filename, unsigned int *line_nr) @@ -505,23 +526,74 @@ static int read_addr2line_record(struct io *io, if (line_nr != NULL) *line_nr = 0; + /* + * Read the first line. Without an error this will be: + * - for the first line an address like 0x1234, + * - the binutils sentinel 0x0000000000000000, + * - the llvm-addr2line the sentinel ',' character, + * - the function name line for an inlined function. + */ if (io__getline(io, &line, &line_len) < 0 || !line_len) goto error; + pr_debug("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line); if (style == LLVM && line_len == 2 && line[0] == ',') { + /* Found the llvm-addr2line sentinel character. */ zfree(&line); return 0; + } else if (style == GNU_BINUTILS && (!first || addr != 0)) { + int zero_count = 0, non_zero_count = 0; + /* + * Check for binutils sentinel ignoring it for the case the + * requested address is 0. + */ + + /* A given address should always start 0x. */ + if (line_len >= 2 || line[0] != '0' || line[1] != 'x') { + for (size_t i = 2; i < line_len; i++) { + if (line[i] == '0') + zero_count++; + else if (line[i] != '\n') + non_zero_count++; + } + if (!non_zero_count) { + int ch; + + if (first && !zero_count) { + /* Line was erroneous just '0x'. */ + goto error; + } + /* + * Line was 0x0..0, the sentinel for binutils. Remove + * the function and filename lines. + */ + zfree(&line); + do { + ch = io__get_char(io); + } while (ch > 0 && ch != '\n'); + do { + ch = io__get_char(io); + } while (ch > 0 && ch != '\n'); + return 0; + } + } } + /* Read the second function name line (if inline data then this is the first line). */ + if (first && (io__getline(io, &line, &line_len) < 0 || !line_len)) + goto error; + pr_debug("%s %s: addr2line read line: %s", __func__, dso_name, line); if (function != NULL) *function = strdup(strim(line)); zfree(&line); line_len = 0; + /* Read the third filename and line number line. */ if (io__getline(io, &line, &line_len) < 0 || !line_len) goto error; + pr_debug("%s %s: addr2line filename:number : %s", __func__, dso_name, line); if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 && style == GNU_BINUTILS) { ret = 0; @@ -574,15 +646,14 @@ static int addr2line(const char *dso_name, u64 addr, int len; char buf[128]; ssize_t written; - struct io io; + struct io io = { .eof = false }; enum a2l_style a2l_style; if (!a2l) { if (!filename__has_section(dso_name, ".debug_line")) goto out; - dso->a2l = addr2line_subprocess_init(symbol_conf.addr2line_path, - dso_name); + dso->a2l = addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name); a2l = dso->a2l; } @@ -591,22 +662,18 @@ static int addr2line(const char *dso_name, u64 addr, pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); goto out; } - a2l_style = addr2line_configure(a2l); - if (a2l_style == BROKEN) { - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s: addr2line configuration failed\n", __func__); + a2l_style = addr2line_configure(a2l, dso_name); + if (a2l_style == BROKEN) goto out; - } /* - * Send our request and then *deliberately* send something that can't be interpreted as - * a valid address to ask addr2line about (namely, ","). This causes addr2line to first - * write out the answer to our request, in an unbounded/unknown number of records, and - * then to write out the lines "??" and "??:0", for GNU binutils, or "," for - * llvm-addr2line, so that we can detect when it has finished giving us anything - * useful. With GNU binutils, we have to be careful about the first record, though, - * because it may be genuinely unknown, in which case we'll get two sets of "??"/"??:0" - * lines. + * Send our request and then *deliberately* send something that can't be + * interpreted as a valid address to ask addr2line about (namely, + * ","). This causes addr2line to first write out the answer to our + * request, in an unbounded/unknown number of records, and then to write + * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or "," + * for llvm-addr2line, so that we can detect when it has finished giving + * us anything useful. */ len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr); written = len > 0 ? write(a2l->in, buf, len) : -1; @@ -616,8 +683,8 @@ static int addr2line(const char *dso_name, u64 addr, goto out; } io__init(&io, a2l->out, buf, sizeof(buf)); - - switch (read_addr2line_record(&io, a2l_style, + io.timeout_ms = addr2line_timeout_ms; + switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true, &record_function, &record_filename, &record_line_nr)) { case -1: if (!symbol_conf.disable_add2line_warn) @@ -625,17 +692,23 @@ static int addr2line(const char *dso_name, u64 addr, goto out; case 0: /* - * The first record was invalid, so return failure, but first read another - * record, since we asked a junk question and have to clear the answer out. + * The first record was invalid, so return failure, but first + * read another record, since we sent a sentinel ',' for the + * sake of detected the last inlined function. Treat this as the + * first of a record as the ',' generates a new start with GNU + * binutils, also force a non-zero address as we're no longer + * reading that record. */ - switch (read_addr2line_record(&io, a2l_style, NULL, NULL, NULL)) { + switch (read_addr2line_record(&io, a2l_style, dso_name, + /*addr=*/1, /*first=*/true, + NULL, NULL, NULL)) { case -1: if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: could not read delimiter record\n", + pr_warning("%s %s: could not read sentinel record\n", __func__, dso_name); break; case 0: - /* As expected. */ + /* The sentinel as expected. */ break; default: if (!symbol_conf.disable_add2line_warn) @@ -645,6 +718,7 @@ static int addr2line(const char *dso_name, u64 addr, } goto out; default: + /* First record as expected. */ break; } @@ -665,9 +739,16 @@ static int addr2line(const char *dso_name, u64 addr, } } - /* We have to read the records even if we don't care about the inline info. */ + /* + * We have to read the records even if we don't care about the inline + * info. This isn't the first record and force the address to non-zero + * as we're reading records beyond the first. + */ while ((record_status = read_addr2line_record(&io, a2l_style, + dso_name, + /*addr=*/1, + /*first=*/false, &record_function, &record_filename, &record_line_nr)) == 1) { @@ -686,6 +767,10 @@ static int addr2line(const char *dso_name, u64 addr, out: free(record_function); free(record_filename); + if (io.eof) { + dso->a2l = NULL; + addr2line_subprocess_cleanup(a2l); + } return ret; } @@ -804,10 +889,15 @@ out: return NULL; } -void free_srcline(char *srcline) +void zfree_srcline(char **srcline) { - if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0) - free(srcline); + if (*srcline == NULL) + return; + + if (*srcline != SRCLINE_UNKNOWN) + free(*srcline); + + *srcline = NULL; } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, @@ -880,7 +970,7 @@ void srcline__tree_delete(struct rb_root_cached *tree) pos = rb_entry(next, struct srcline_node, rb_node); next = rb_next(&pos->rb_node); rb_erase_cached(&pos->rb_node, tree); - free_srcline(pos->srcline); + zfree_srcline(&pos->srcline); zfree(&pos); } } @@ -903,7 +993,7 @@ void inline_node__delete(struct inline_node *node) list_for_each_entry_safe(ilist, tmp, &node->val, list) { list_del_init(&ilist->list); - free_srcline(ilist->srcline); + zfree_srcline(&ilist->srcline); /* only the inlined symbols are owned by the list */ if (ilist->symbol && ilist->symbol->inlined) symbol__delete(ilist->symbol); diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index b11a0aaaa676..75010d39ea28 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -9,13 +9,14 @@ struct dso; struct symbol; +extern int addr2line_timeout_ms; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool show_addr, u64 ip); char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool show_addr, bool unwind_inlines, u64 ip); -void free_srcline(char *srcline); +void zfree_srcline(char **srcline); char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line); /* insert the srcline into the DSO, which will take ownership */ @@ -25,7 +26,8 @@ char *srcline__tree_find(struct rb_root_cached *tree, u64 addr); /* delete all srclines within the tree */ void srcline__tree_delete(struct rb_root_cached *tree); -#define SRCLINE_UNKNOWN ((char *) "??:0") +extern char *srcline__unknown; +#define SRCLINE_UNKNOWN srcline__unknown struct inline_list { struct symbol *symbol; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bf5a6c14dfcd..7329b3340f88 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -19,12 +19,13 @@ #include <api/fs/fs.h> #include "util.h" #include "iostat.h" -#include "pmu-hybrid.h" -#include "evlist-hybrid.h" +#include "pmu.h" +#include "pmus.h" #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" +#define MGROUP_LEN 50 #define METRIC_LEN 38 #define EVNAME_LEN 32 #define COUNTS_LEN 18 @@ -36,6 +37,7 @@ static int aggr_header_lens[] = { [AGGR_CORE] = 18, + [AGGR_CACHE] = 22, [AGGR_DIE] = 12, [AGGR_SOCKET] = 6, [AGGR_NODE] = 6, @@ -46,6 +48,7 @@ static int aggr_header_lens[] = { static const char *aggr_header_csv[] = { [AGGR_CORE] = "core,cpus,", + [AGGR_CACHE] = "cache,cpus,", [AGGR_DIE] = "die,cpus,", [AGGR_SOCKET] = "socket,cpus,", [AGGR_NONE] = "cpu,", @@ -56,6 +59,7 @@ static const char *aggr_header_csv[] = { static const char *aggr_header_std[] = { [AGGR_CORE] = "core", + [AGGR_CACHE] = "cache", [AGGR_DIE] = "die", [AGGR_SOCKET] = "socket", [AGGR_NONE] = "cpu", @@ -193,6 +197,10 @@ static void print_aggr_id_std(struct perf_stat_config *config, case AGGR_CORE: snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core); break; + case AGGR_CACHE: + snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d", + id.socket, id.die, id.cache_lvl, id.cache); + break; case AGGR_DIE: snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die); break; @@ -239,6 +247,10 @@ static void print_aggr_id_csv(struct perf_stat_config *config, fprintf(output, "S%d-D%d-C%d%s%d%s", id.socket, id.die, id.core, sep, aggr_nr, sep); break; + case AGGR_CACHE: + fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s", + id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep); + break; case AGGR_DIE: fprintf(output, "S%d-D%d%s%d%s", id.socket, id.die, sep, aggr_nr, sep); @@ -284,6 +296,10 @@ static void print_aggr_id_json(struct perf_stat_config *config, fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", id.socket, id.die, id.core, aggr_nr); break; + case AGGR_CACHE: + fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ", + id.socket, id.die, id.cache_lvl, id.cache, aggr_nr); + break; case AGGR_DIE: fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", id.socket, id.die, aggr_nr); @@ -349,16 +365,27 @@ static void new_line_std(struct perf_stat_config *config __maybe_unused, os->newline = true; } -static void do_new_line_std(struct perf_stat_config *config, - struct outstate *os) +static inline void __new_line_std_csv(struct perf_stat_config *config, + struct outstate *os) { fputc('\n', os->fh); if (os->prefix) fputs(os->prefix, os->fh); aggr_printout(config, os->evsel, os->id, os->aggr_nr); +} + +static inline void __new_line_std(struct outstate *os) +{ + fprintf(os->fh, " "); +} + +static void do_new_line_std(struct perf_stat_config *config, + struct outstate *os) +{ + __new_line_std_csv(config, os); if (config->aggr_mode == AGGR_NONE) fprintf(os->fh, " "); - fprintf(os->fh, " "); + __new_line_std(os); } static void print_metric_std(struct perf_stat_config *config, @@ -393,10 +420,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) struct outstate *os = ctx; int i; - fputc('\n', os->fh); - if (os->prefix) - fprintf(os->fh, "%s", os->prefix); - aggr_printout(config, os->evsel, os->id, os->aggr_nr); + __new_line_std_csv(config, os); for (i = 0; i < os->nfields; i++) fputs(config->csv_sep, os->fh); } @@ -447,6 +471,54 @@ static void new_line_json(struct perf_stat_config *config, void *ctx) aggr_printout(config, os->evsel, os->id, os->aggr_nr); } +static void print_metricgroup_header_json(struct perf_stat_config *config, + void *ctx, + const char *metricgroup_name) +{ + if (!metricgroup_name) + return; + + fprintf(config->output, "\"metricgroup\" : \"%s\"}", metricgroup_name); + new_line_json(config, ctx); +} + +static void print_metricgroup_header_csv(struct perf_stat_config *config, + void *ctx, + const char *metricgroup_name) +{ + struct outstate *os = ctx; + int i; + + if (!metricgroup_name) { + /* Leave space for running and enabling */ + for (i = 0; i < os->nfields - 2; i++) + fputs(config->csv_sep, os->fh); + return; + } + + for (i = 0; i < os->nfields; i++) + fputs(config->csv_sep, os->fh); + fprintf(config->output, "%s", metricgroup_name); + new_line_csv(config, ctx); +} + +static void print_metricgroup_header_std(struct perf_stat_config *config, + void *ctx, + const char *metricgroup_name) +{ + struct outstate *os = ctx; + int n; + + if (!metricgroup_name) { + __new_line_std(os); + return; + } + + n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name); + + fprintf(config->output, "%*s", MGROUP_LEN - n - 1, ""); +} + /* Filter out some columns that don't work well in metrics only mode */ static bool valid_only_metric(const char *unit) @@ -677,25 +749,44 @@ static bool is_mixed_hw_group(struct evsel *counter) return false; } +static bool evlist__has_hybrid(struct evlist *evlist) +{ + struct evsel *evsel; + + if (perf_pmus__num_core_pmus() == 1) + return false; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.is_pmu_core) + return true; + } + + return false; +} + static void printout(struct perf_stat_config *config, struct outstate *os, double uval, u64 run, u64 ena, double noise, int aggr_idx) { struct perf_stat_output_ctx out; print_metric_t pm; new_line_t nl; + print_metricgroup_header_t pmh; bool ok = true; struct evsel *counter = os->evsel; if (config->csv_output) { pm = config->metric_only ? print_metric_only_csv : print_metric_csv; nl = config->metric_only ? new_line_metric : new_line_csv; + pmh = print_metricgroup_header_csv; os->nfields = 4 + (counter->cgrp ? 1 : 0); } else if (config->json_output) { pm = config->metric_only ? print_metric_only_json : print_metric_json; nl = config->metric_only ? new_line_metric : new_line_json; + pmh = print_metricgroup_header_json; } else { pm = config->metric_only ? print_metric_only : print_metric_std; nl = config->metric_only ? new_line_metric : new_line_std; + pmh = print_metricgroup_header_std; } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { @@ -717,10 +808,11 @@ static void printout(struct perf_stat_config *config, struct outstate *os, out.print_metric = pm; out.new_line = nl; + out.print_metricgroup_header = pmh; out.ctx = os; out.force_header = false; - if (!config->metric_only) { + if (!config->metric_only && !counter->default_metricgroup) { abs_printout(config, os->id, os->aggr_nr, counter, uval, ok); print_noise(config, counter, noise, /*before_metric=*/true); @@ -728,8 +820,31 @@ static void printout(struct perf_stat_config *config, struct outstate *os, } if (ok) { - perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, - &out, &config->metric_events); + if (!config->metric_only && counter->default_metricgroup) { + void *from = NULL; + + aggr_printout(config, os->evsel, os->id, os->aggr_nr); + /* Print out all the metricgroup with the same metric event. */ + do { + int num = 0; + + /* Print out the new line for the next new metricgroup. */ + if (from) { + if (config->json_output) + new_line_json(config, (void *)os); + else + __new_line_std_csv(config, os); + } + + print_noise(config, counter, noise, /*before_metric=*/true); + print_running(config, run, ena, /*before_metric=*/true); + from = perf_stat__print_shadow_stats_metricgroup(config, counter, aggr_idx, + &num, from, &out, + &config->metric_events); + } while (from != NULL); + } else + perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, + &out, &config->metric_events); } else { pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); } @@ -859,6 +974,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; + if (perf_stat__skip_metric_event(counter, &config->metric_events, ena, run)) + return; + if (val == 0 && should_skip_zero_counter(config, counter, &id)) return; @@ -1125,6 +1243,7 @@ static void print_header_interval_std(struct perf_stat_config *config, case AGGR_NODE: case AGGR_SOCKET: case AGGR_DIE: + case AGGR_CACHE: case AGGR_CORE: fprintf(output, "#%*s %-*s cpus", INTERVAL_LEN - 1, "time", @@ -1425,6 +1544,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf switch (config->aggr_mode) { case AGGR_CORE: + case AGGR_CACHE: case AGGR_DIE: case AGGR_SOCKET: case AGGR_NODE: diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 1566a206ba42..1c5c3eeba4cf 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -539,6 +539,106 @@ out: return ratio; } +static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, + struct evsel *evsel, + void *ctxp, + const char *name, + struct perf_stat_output_ctx *out) +{ + bool need_full_name = perf_pmus__num_core_pmus() > 1; + static const char *last_name; + static const char *last_pmu; + char full_name[64]; + + /* + * A metricgroup may have several metric events, + * e.g.,TopdownL1 on e-core of ADL. + * The name has been output by the first metric + * event. Only align with other metics from + * different metric events. + */ + if (last_name && !strcmp(last_name, name)) { + if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) { + out->print_metricgroup_header(config, ctxp, NULL); + return; + } + } + + if (need_full_name) + scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name); + else + scnprintf(full_name, sizeof(full_name), "%s", name); + + out->print_metricgroup_header(config, ctxp, full_name); + + last_name = name; + last_pmu = evsel->pmu_name; +} + +/** + * perf_stat__print_shadow_stats_metricgroup - Print out metrics associated with the evsel + * For the non-default, all metrics associated + * with the evsel are printed. + * For the default mode, only the metrics from + * the same metricgroup and the name of the + * metricgroup are printed. To print the metrics + * from the next metricgroup (if available), + * invoke the function with correspoinding + * metric_expr. + */ +void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, + struct evsel *evsel, + int aggr_idx, + int *num, + void *from, + struct perf_stat_output_ctx *out, + struct rblist *metric_events) +{ + struct metric_event *me; + struct metric_expr *mexp = from; + void *ctxp = out->ctx; + bool header_printed = false; + const char *name = NULL; + + me = metricgroup__lookup(metric_events, evsel, false); + if (me == NULL) + return NULL; + + if (!mexp) + mexp = list_first_entry(&me->head, typeof(*mexp), nd); + + list_for_each_entry_from(mexp, &me->head, nd) { + /* Print the display name of the Default metricgroup */ + if (!config->metric_only && me->is_default) { + if (!name) + name = mexp->default_metricgroup_name; + /* + * Two or more metricgroup may share the same metric + * event, e.g., TopdownL1 and TopdownL2 on SPR. + * Return and print the prefix, e.g., noise, running + * for the next metricgroup. + */ + if (strcmp(name, mexp->default_metricgroup_name)) + return (void *)mexp; + /* Only print the name of the metricgroup once */ + if (!header_printed) { + header_printed = true; + perf_stat__print_metricgroup_header(config, evsel, ctxp, + name, out); + } + } + + if ((*num)++ > 0) + out->new_line(config, ctxp); + generic_metric(config, mexp->metric_expr, mexp->metric_threshold, + mexp->metric_events, mexp->metric_refs, evsel->name, + mexp->metric_name, mexp->metric_unit, mexp->runtime, + aggr_idx, out); + } + + return NULL; +} + void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, double avg, int aggr_idx, @@ -565,7 +665,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, }; print_metric_t print_metric = out->print_metric; void *ctxp = out->ctx; - struct metric_event *me; int num = 1; if (config->iostat_run) { @@ -592,18 +691,26 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, } } - if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { - struct metric_expr *mexp; + perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx, + &num, NULL, out, metric_events); - list_for_each_entry (mexp, &me->head, nd) { - if (num++ > 0) - out->new_line(config, ctxp); - generic_metric(config, mexp->metric_expr, mexp->metric_threshold, - mexp->metric_events, mexp->metric_refs, evsel->name, - mexp->metric_name, mexp->metric_unit, mexp->runtime, - aggr_idx, out); - } - } if (num == 0) print_metric(config, ctxp, NULL, NULL, NULL, 0); } + +/** + * perf_stat__skip_metric_event - Skip the evsel in the Default metricgroup, + * if it's not running or not the metric event. + */ +bool perf_stat__skip_metric_event(struct evsel *evsel, + struct rblist *metric_events, + u64 ena, u64 run) +{ + if (!evsel->default_metricgroup) + return false; + + if (!ena || !run) + return true; + + return !metricgroup__lookup(metric_events, evsel, false); +} diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0f7b8a8cdea6..967e583392c7 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -264,6 +264,28 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist) evsel__copy_prev_raw_counts(evsel); } +static void evsel__copy_res_stats(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + + /* + * For GLOBAL aggregation mode, it updates the counts for each run + * in the evsel->stats.res_stats. See perf_stat_process_counter(). + */ + *ps->aggr[0].counts.values = avg_stats(&ps->res_stats); +} + +void evlist__copy_res_stats(struct perf_stat_config *config, struct evlist *evlist) +{ + struct evsel *evsel; + + if (config->aggr_mode != AGGR_GLOBAL) + return; + + evlist__for_each_entry(evlist, evsel) + evsel__copy_res_stats(evsel); +} + static size_t pkg_id_hash(long __key, void *ctx __maybe_unused) { uint64_t *key = (uint64_t *) __key; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index e35e188237c8..325d0fad1842 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -48,6 +48,7 @@ enum aggr_mode { AGGR_GLOBAL, AGGR_SOCKET, AGGR_DIE, + AGGR_CACHE, AGGR_CORE, AGGR_THREAD, AGGR_UNSET, @@ -64,6 +65,7 @@ typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, str struct perf_stat_config { enum aggr_mode aggr_mode; + u32 aggr_level; bool scale; bool no_inherit; bool identifier; @@ -156,11 +158,16 @@ typedef void (*print_metric_t)(struct perf_stat_config *config, const char *fmt, double val); typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); +/* Used to print the display name of the Default metricgroup for now. */ +typedef void (*print_metricgroup_header_t)(struct perf_stat_config *config, + void *ctx, const char *metricgroup_name); + void perf_stat__reset_shadow_stats(void); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; new_line_t new_line; + print_metricgroup_header_t print_metricgroup_header; bool force_header; }; @@ -169,6 +176,16 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, double avg, int aggr_idx, struct perf_stat_output_ctx *out, struct rblist *metric_events); +bool perf_stat__skip_metric_event(struct evsel *evsel, + struct rblist *metric_events, + u64 ena, u64 run); +void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, + struct evsel *evsel, + int aggr_idx, + int *num, + void *from, + struct perf_stat_output_ctx *out, + struct rblist *metric_events); int evlist__alloc_stats(struct perf_stat_config *config, struct evlist *evlist, bool alloc_raw); @@ -180,6 +197,7 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist); int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr); void evlist__reset_aggr_stats(struct evlist *evlist); +void evlist__copy_res_stats(struct perf_stat_config *config, struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 63882a4db5c7..8bd466d1c2bd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -42,6 +42,10 @@ #define EM_AARCH64 183 /* ARM 64 bit */ #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif + #ifndef ELF32_ST_VISIBILITY #define ELF32_ST_VISIBILITY(o) ((o) & 0x03) #endif @@ -438,6 +442,10 @@ static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, *plt_header_size = 32; *plt_entry_size = 16; return true; + case EM_LOONGARCH: + *plt_header_size = 32; + *plt_entry_size = 16; + return true; case EM_SPARC: *plt_header_size = 48; *plt_entry_size = 12; @@ -1389,11 +1397,11 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, /* Ensure maps are correctly ordered */ if (kmaps) { int err; + struct map *tmp = map__get(map); - map__get(map); maps__remove(kmaps, map); err = maps__insert(kmaps, map); - map__put(map); + map__put(tmp); if (err) return err; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 6b9c55784b56..d275d3bef7d5 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1458,16 +1458,18 @@ static int dso__load_kcore(struct dso *dso, struct map *map, list_del_init(&new_node->node); if (RC_CHK_ACCESS(new_map) == RC_CHK_ACCESS(replacement_map)) { + struct map *map_ref; + map__set_start(map, map__start(new_map)); map__set_end(map, map__end(new_map)); map__set_pgoff(map, map__pgoff(new_map)); map__set_map_ip(map, map__map_ip_ptr(new_map)); map__set_unmap_ip(map, map__unmap_ip_ptr(new_map)); /* Ensure maps are correctly ordered */ - map__get(map); - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(map); + map_ref = map__get(map); + maps__remove(kmaps, map_ref); + err = maps__insert(kmaps, map_ref); + map__put(map_ref); map__put(new_map); if (err) goto out_err; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 7558735543c2..5ca8665dd2c1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -9,6 +9,7 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <stdio.h> +#include "addr_location.h" #include "path.h" #include "symbol_conf.h" #include "spark.h" @@ -120,22 +121,6 @@ struct ref_reloc_sym { u64 unrelocated_addr; }; -struct addr_location { - struct thread *thread; - struct maps *maps; - struct map *map; - struct symbol *sym; - const char *srcline; - u64 addr; - char level; - u8 filtered; - u8 cpumode; - s32 cpu; - s32 socket; -}; - -void addr_location__put(struct addr_location *al); - int dso__load(struct dso *dso, struct map *map); int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index b2e4afa5efa1..45714a2785fd 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1375,6 +1375,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool, ADD(AGGR_MODE, config->aggr_mode) ADD(INTERVAL, config->interval) ADD(SCALE, config->scale) + ADD(AGGR_LEVEL, config->aggr_level) WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX, "stat config terms unbalanced\n"); diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 313eccef6cb4..63be7b58761d 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -17,31 +17,31 @@ #if defined(__x86_64__) #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; -static const char **syscalltbl_native = syscalltbl_x86_64; +static const char *const *syscalltbl_native = syscalltbl_x86_64; #elif defined(__s390x__) #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; -static const char **syscalltbl_native = syscalltbl_s390_64; +static const char *const *syscalltbl_native = syscalltbl_s390_64; #elif defined(__powerpc64__) #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID; -static const char **syscalltbl_native = syscalltbl_powerpc_64; +static const char *const *syscalltbl_native = syscalltbl_powerpc_64; #elif defined(__powerpc__) #include <asm/syscalls_32.c> const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID; -static const char **syscalltbl_native = syscalltbl_powerpc_32; +static const char *const *syscalltbl_native = syscalltbl_powerpc_32; #elif defined(__aarch64__) #include <asm/syscalls.c> const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; -static const char **syscalltbl_native = syscalltbl_arm64; +static const char *const *syscalltbl_native = syscalltbl_arm64; #elif defined(__mips__) #include <asm/syscalls_n64.c> const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID; -static const char **syscalltbl_native = syscalltbl_mips_n64; +static const char *const *syscalltbl_native = syscalltbl_mips_n64; #elif defined(__loongarch__) #include <asm/syscalls.c> const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; -static const char **syscalltbl_native = syscalltbl_loongarch; +static const char *const *syscalltbl_native = syscalltbl_loongarch; #endif struct syscall { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 880f1af7f6ad..d582cae8e105 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -17,7 +17,6 @@ struct target { bool default_per_cpu; bool per_thread; bool use_bpf; - bool hybrid; int initial_delay; const char *attr_map; }; diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 4b85c1728012..374d142e7390 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -112,7 +112,7 @@ struct thread_stack { */ static inline bool thread_stack__per_cpu(struct thread *thread) { - return !(thread->tid || thread->pid_); + return !(thread__tid(thread) || thread__pid(thread)); } static int thread_stack__grow(struct thread_stack *ts) @@ -155,8 +155,8 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, ts->br_stack_sz = br_stack_sz; } - if (thread->maps && maps__machine(thread->maps)) { - struct machine *machine = maps__machine(thread->maps); + if (thread__maps(thread) && maps__machine(thread__maps(thread))) { + struct machine *machine = maps__machine(thread__maps(thread)); const char *arch = perf_env__arch(machine->env); ts->kernel_start = machine__kernel_start(machine); @@ -175,7 +175,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, bool callstack, unsigned int br_stack_sz) { - struct thread_stack *ts = thread->ts, *new_ts; + struct thread_stack *ts = thread__ts(thread), *new_ts; unsigned int old_sz = ts ? ts->arr_sz : 0; unsigned int new_sz = 1; @@ -189,8 +189,8 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, if (ts) memcpy(new_ts, ts, old_sz * sizeof(*ts)); new_ts->arr_sz = new_sz; - zfree(&thread->ts); - thread->ts = new_ts; + free(thread__ts(thread)); + thread__set_ts(thread, new_ts); ts = new_ts; } @@ -207,7 +207,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) { - struct thread_stack *ts = thread->ts; + struct thread_stack *ts = thread__ts(thread); if (cpu < 0) cpu = 0; @@ -232,7 +232,7 @@ static inline struct thread_stack *thread__stack(struct thread *thread, if (thread_stack__per_cpu(thread)) return thread__cpu_stack(thread, cpu); - return thread->ts; + return thread__ts(thread); } static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, @@ -363,7 +363,7 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) int thread_stack__flush(struct thread *thread) { - struct thread_stack *ts = thread->ts; + struct thread_stack *ts = thread__ts(thread); unsigned int pos; int err = 0; @@ -502,13 +502,14 @@ static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) void thread_stack__free(struct thread *thread) { - struct thread_stack *ts = thread->ts; + struct thread_stack *ts = thread__ts(thread); unsigned int pos; if (ts) { for (pos = 0; pos < ts->arr_sz; pos++) __thread_stack__free(thread, ts + pos); - zfree(&thread->ts); + free(thread__ts(thread)); + thread__set_ts(thread, NULL); } } @@ -1127,7 +1128,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->rstate = X86_RETPOLINE_POSSIBLE; /* Flush stack on exec */ - if (ts->comm != comm && thread->pid_ == thread->tid) { + if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) { err = __thread_stack__flush(thread, ts); if (err) return err; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 4b5bdc277baa..0b166404c5c3 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -21,38 +21,40 @@ int thread__init_maps(struct thread *thread, struct machine *machine) { - pid_t pid = thread->pid_; + pid_t pid = thread__pid(thread); - if (pid == thread->tid || pid == -1) { - thread->maps = maps__new(machine); + if (pid == thread__tid(thread) || pid == -1) { + thread__set_maps(thread, maps__new(machine)); } else { struct thread *leader = __machine__findnew_thread(machine, pid, pid); + if (leader) { - thread->maps = maps__get(leader->maps); + thread__set_maps(thread, maps__get(thread__maps(leader))); thread__put(leader); } } - return thread->maps ? 0 : -1; + return thread__maps(thread) ? 0 : -1; } struct thread *thread__new(pid_t pid, pid_t tid) { char *comm_str; struct comm *comm; - struct thread *thread = zalloc(sizeof(*thread)); - - if (thread != NULL) { - thread->pid_ = pid; - thread->tid = tid; - thread->ppid = -1; - thread->cpu = -1; - thread->guest_cpu = -1; - thread->lbr_stitch_enable = false; - INIT_LIST_HEAD(&thread->namespaces_list); - INIT_LIST_HEAD(&thread->comm_list); - init_rwsem(&thread->namespaces_lock); - init_rwsem(&thread->comm_lock); + RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread)); + struct thread *thread; + + if (ADD_RC_CHK(thread, _thread) != NULL) { + thread__set_pid(thread, pid); + thread__set_tid(thread, tid); + thread__set_ppid(thread, -1); + thread__set_cpu(thread, -1); + thread__set_guest_cpu(thread, -1); + thread__set_lbr_stitch_enable(thread, false); + INIT_LIST_HEAD(thread__namespaces_list(thread)); + INIT_LIST_HEAD(thread__comm_list(thread)); + init_rwsem(thread__namespaces_lock(thread)); + init_rwsem(thread__comm_lock(thread)); comm_str = malloc(32); if (!comm_str) @@ -64,12 +66,11 @@ struct thread *thread__new(pid_t pid, pid_t tid) if (!comm) goto err_thread; - list_add(&comm->list, &thread->comm_list); - refcount_set(&thread->refcnt, 1); - RB_CLEAR_NODE(&thread->rb_node); + list_add(&comm->list, thread__comm_list(thread)); + refcount_set(thread__refcnt(thread), 1); /* Thread holds first ref to nsdata. */ - thread->nsinfo = nsinfo__new(pid); - srccode_state_init(&thread->srccode_state); + RC_CHK_ACCESS(thread)->nsinfo = nsinfo__new(pid); + srccode_state_init(thread__srccode_state(thread)); } return thread; @@ -84,89 +85,69 @@ void thread__delete(struct thread *thread) struct namespaces *namespaces, *tmp_namespaces; struct comm *comm, *tmp_comm; - BUG_ON(!RB_EMPTY_NODE(&thread->rb_node)); - thread_stack__free(thread); - if (thread->maps) { - maps__put(thread->maps); - thread->maps = NULL; + if (thread__maps(thread)) { + maps__put(thread__maps(thread)); + thread__set_maps(thread, NULL); } - down_write(&thread->namespaces_lock); + down_write(thread__namespaces_lock(thread)); list_for_each_entry_safe(namespaces, tmp_namespaces, - &thread->namespaces_list, list) { + thread__namespaces_list(thread), list) { list_del_init(&namespaces->list); namespaces__free(namespaces); } - up_write(&thread->namespaces_lock); + up_write(thread__namespaces_lock(thread)); - down_write(&thread->comm_lock); - list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { + down_write(thread__comm_lock(thread)); + list_for_each_entry_safe(comm, tmp_comm, thread__comm_list(thread), list) { list_del_init(&comm->list); comm__free(comm); } - up_write(&thread->comm_lock); + up_write(thread__comm_lock(thread)); - nsinfo__zput(thread->nsinfo); - srccode_state_free(&thread->srccode_state); + nsinfo__zput(RC_CHK_ACCESS(thread)->nsinfo); + srccode_state_free(thread__srccode_state(thread)); - exit_rwsem(&thread->namespaces_lock); - exit_rwsem(&thread->comm_lock); + exit_rwsem(thread__namespaces_lock(thread)); + exit_rwsem(thread__comm_lock(thread)); thread__free_stitch_list(thread); - free(thread); + RC_CHK_FREE(thread); } struct thread *thread__get(struct thread *thread) { - if (thread) - refcount_inc(&thread->refcnt); - return thread; + struct thread *result; + + if (RC_CHK_GET(result, thread)) + refcount_inc(thread__refcnt(thread)); + + return result; } void thread__put(struct thread *thread) { - if (thread && refcount_dec_and_test(&thread->refcnt)) { - /* - * Remove it from the dead threads list, as last reference is - * gone, if it is in a dead threads list. - * - * We may not be there anymore if say, the machine where it was - * stored was already deleted, so we already removed it from - * the dead threads and some other piece of code still keeps a - * reference. - * - * This is what 'perf sched' does and finally drops it in - * perf_sched__lat(), where it calls perf_sched__read_events(), - * that processes the events by creating a session and deleting - * it, which ends up destroying the list heads for the dead - * threads, but before it does that it removes all threads from - * it using list_del_init(). - * - * So we need to check here if it is in a dead threads list and - * if so, remove it before finally deleting the thread, to avoid - * an use after free situation. - */ - if (!list_empty(&thread->node)) - list_del_init(&thread->node); + if (thread && refcount_dec_and_test(thread__refcnt(thread))) thread__delete(thread); - } + else + RC_CHK_PUT(thread); } -static struct namespaces *__thread__namespaces(const struct thread *thread) +static struct namespaces *__thread__namespaces(struct thread *thread) { - if (list_empty(&thread->namespaces_list)) + if (list_empty(thread__namespaces_list(thread))) return NULL; - return list_first_entry(&thread->namespaces_list, struct namespaces, list); + return list_first_entry(thread__namespaces_list(thread), struct namespaces, list); } struct namespaces *thread__namespaces(struct thread *thread) { struct namespaces *ns; - down_read(&thread->namespaces_lock); + down_read(thread__namespaces_lock(thread)); ns = __thread__namespaces(thread); - up_read(&thread->namespaces_lock); + up_read(thread__namespaces_lock(thread)); return ns; } @@ -180,7 +161,7 @@ static int __thread__set_namespaces(struct thread *thread, u64 timestamp, if (!new) return -ENOMEM; - list_add(&new->list, &thread->namespaces_list); + list_add(&new->list, thread__namespaces_list(thread)); if (timestamp && curr) { /* @@ -200,25 +181,25 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp, { int ret; - down_write(&thread->namespaces_lock); + down_write(thread__namespaces_lock(thread)); ret = __thread__set_namespaces(thread, timestamp, event); - up_write(&thread->namespaces_lock); + up_write(thread__namespaces_lock(thread)); return ret; } -struct comm *thread__comm(const struct thread *thread) +struct comm *thread__comm(struct thread *thread) { - if (list_empty(&thread->comm_list)) + if (list_empty(thread__comm_list(thread))) return NULL; - return list_first_entry(&thread->comm_list, struct comm, list); + return list_first_entry(thread__comm_list(thread), struct comm, list); } -struct comm *thread__exec_comm(const struct thread *thread) +struct comm *thread__exec_comm(struct thread *thread) { struct comm *comm, *last = NULL, *second_last = NULL; - list_for_each_entry(comm, &thread->comm_list, list) { + list_for_each_entry(comm, thread__comm_list(thread), list) { if (comm->exec) return comm; second_last = last; @@ -231,7 +212,7 @@ struct comm *thread__exec_comm(const struct thread *thread) * thread, that is very probably wrong. Prefer a later comm to avoid * that case. */ - if (second_last && !last->start && thread->pid_ == thread->tid) + if (second_last && !last->start && thread__pid(thread) == thread__tid(thread)) return second_last; return last; @@ -243,7 +224,7 @@ static int ____thread__set_comm(struct thread *thread, const char *str, struct comm *new, *curr = thread__comm(thread); /* Override the default :tid entry */ - if (!thread->comm_set) { + if (!thread__comm_set(thread)) { int err = comm__override(curr, str, timestamp, exec); if (err) return err; @@ -251,13 +232,13 @@ static int ____thread__set_comm(struct thread *thread, const char *str, new = comm__new(str, timestamp, exec); if (!new) return -ENOMEM; - list_add(&new->list, &thread->comm_list); + list_add(&new->list, thread__comm_list(thread)); if (exec) - unwind__flush_access(thread->maps); + unwind__flush_access(thread__maps(thread)); } - thread->comm_set = true; + thread__set_comm_set(thread, true); return 0; } @@ -267,9 +248,9 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, { int ret; - down_write(&thread->comm_lock); + down_write(thread__comm_lock(thread)); ret = ____thread__set_comm(thread, str, timestamp, exec); - up_write(&thread->comm_lock); + up_write(thread__comm_lock(thread)); return ret; } @@ -281,7 +262,7 @@ int thread__set_comm_from_proc(struct thread *thread) int err = -1; if (!(snprintf(path, sizeof(path), "%d/task/%d/comm", - thread->pid_, thread->tid) >= (int)sizeof(path)) && + thread__pid(thread), thread__tid(thread)) >= (int)sizeof(path)) && procfs__read_str(path, &comm, &sz) == 0) { comm[sz - 1] = '\0'; err = thread__set_comm(thread, comm, 0); @@ -290,7 +271,7 @@ int thread__set_comm_from_proc(struct thread *thread) return err; } -static const char *__thread__comm_str(const struct thread *thread) +static const char *__thread__comm_str(struct thread *thread) { const struct comm *comm = thread__comm(thread); @@ -304,9 +285,9 @@ const char *thread__comm_str(struct thread *thread) { const char *str; - down_read(&thread->comm_lock); + down_read(thread__comm_lock(thread)); str = __thread__comm_str(thread); - up_read(&thread->comm_lock); + up_read(thread__comm_lock(thread)); return str; } @@ -315,23 +296,23 @@ static int __thread__comm_len(struct thread *thread, const char *comm) { if (!comm) return 0; - thread->comm_len = strlen(comm); + thread__set_comm_len(thread, strlen(comm)); - return thread->comm_len; + return thread__var_comm_len(thread); } /* CHECKME: it should probably better return the max comm len from its comm list */ int thread__comm_len(struct thread *thread) { - int comm_len = thread->comm_len; + int comm_len = thread__var_comm_len(thread); if (!comm_len) { const char *comm; - down_read(&thread->comm_lock); + down_read(thread__comm_lock(thread)); comm = __thread__comm_str(thread); comm_len = __thread__comm_len(thread, comm); - up_read(&thread->comm_lock); + up_read(thread__comm_lock(thread)); } return comm_len; @@ -339,33 +320,33 @@ int thread__comm_len(struct thread *thread) size_t thread__fprintf(struct thread *thread, FILE *fp) { - return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) + - maps__fprintf(thread->maps, fp); + return fprintf(fp, "Thread %d %s\n", thread__tid(thread), thread__comm_str(thread)) + + maps__fprintf(thread__maps(thread), fp); } int thread__insert_map(struct thread *thread, struct map *map) { int ret; - ret = unwind__prepare_access(thread->maps, map, NULL); + ret = unwind__prepare_access(thread__maps(thread), map, NULL); if (ret) return ret; - maps__fixup_overlappings(thread->maps, map, stderr); - return maps__insert(thread->maps, map); + maps__fixup_overlappings(thread__maps(thread), map, stderr); + return maps__insert(thread__maps(thread), map); } static int __thread__prepare_access(struct thread *thread) { bool initialized = false; int err = 0; - struct maps *maps = thread->maps; + struct maps *maps = thread__maps(thread); struct map_rb_node *rb_node; down_read(maps__lock(maps)); maps__for_each_entry(maps, rb_node) { - err = unwind__prepare_access(thread->maps, rb_node->map, &initialized); + err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized); if (err || initialized) break; } @@ -388,21 +369,22 @@ static int thread__prepare_access(struct thread *thread) static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone) { /* This is new thread, we share map groups for process. */ - if (thread->pid_ == parent->pid_) + if (thread__pid(thread) == thread__pid(parent)) return thread__prepare_access(thread); - if (thread->maps == parent->maps) { + if (thread__maps(thread) == thread__maps(parent)) { pr_debug("broken map groups on thread %d/%d parent %d/%d\n", - thread->pid_, thread->tid, parent->pid_, parent->tid); + thread__pid(thread), thread__tid(thread), + thread__pid(parent), thread__tid(parent)); return 0; } /* But this one is new process, copy maps. */ - return do_maps_clone ? maps__clone(thread, parent->maps) : 0; + return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0; } int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) { - if (parent->comm_set) { + if (thread__comm_set(parent)) { const char *comm = thread__comm_str(parent); int err; if (!comm) @@ -412,7 +394,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo return err; } - thread->ppid = parent->tid; + thread__set_ppid(thread, thread__tid(parent)); return thread__clone_maps(thread, parent, do_maps_clone); } @@ -436,13 +418,13 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, struct thread *thread__main_thread(struct machine *machine, struct thread *thread) { - if (thread->pid_ == thread->tid) + if (thread__pid(thread) == thread__tid(thread)) return thread__get(thread); - if (thread->pid_ == -1) + if (thread__pid(thread) == -1) return NULL; - return machine__find_thread(machine, thread->pid_, thread->pid_); + return machine__find_thread(machine, thread__pid(thread), thread__pid(thread)); } int thread__memcpy(struct thread *thread, struct machine *machine, @@ -456,24 +438,31 @@ int thread__memcpy(struct thread *thread, struct machine *machine, if (machine__kernel_ip(machine, ip)) cpumode = PERF_RECORD_MISC_KERNEL; - if (!thread__find_map(thread, cpumode, ip, &al)) - return -1; + addr_location__init(&al); + if (!thread__find_map(thread, cpumode, ip, &al)) { + addr_location__exit(&al); + return -1; + } dso = map__dso(al.map); - if( !dso || dso->data.status == DSO_DATA_STATUS_ERROR || map__load(al.map) < 0) + if (!dso || dso->data.status == DSO_DATA_STATUS_ERROR || map__load(al.map) < 0) { + addr_location__exit(&al); return -1; + } offset = map__map_ip(al.map, ip); if (is64bit) *is64bit = dso->is_64_bit; + addr_location__exit(&al); + return dso__data_read_offset(dso, machine, offset, buf, len); } void thread__free_stitch_list(struct thread *thread) { - struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct lbr_stitch *lbr_stitch = thread__lbr_stitch(thread); struct stitch_list *pos, *tmp; if (!lbr_stitch) @@ -490,5 +479,6 @@ void thread__free_stitch_list(struct thread *thread) } zfree(&lbr_stitch->prev_lbr_cursor); - zfree(&thread->lbr_stitch); + free(thread__lbr_stitch(thread)); + thread__set_lbr_stitch(thread, NULL); } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 395c626699a9..9068a21ce0fa 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -15,6 +15,7 @@ #include "rwsem.h" #include "event.h" #include "callchain.h" +#include <internal/rc_check.h> struct addr_location; struct map; @@ -29,11 +30,12 @@ struct lbr_stitch { struct callchain_cursor_node *prev_lbr_cursor; }; -struct thread { - union { - struct rb_node rb_node; - struct list_head node; - }; +struct thread_rb_node { + struct rb_node rb_node; + struct thread *thread; +}; + +DECLARE_RC_STRUCT(thread) { struct maps *maps; pid_t pid_; /* Not all tools update this */ pid_t tid; @@ -43,7 +45,6 @@ struct thread { refcount_t refcnt; bool comm_set; int comm_len; - bool dead; /* if set thread has exited */ struct list_head namespaces_list; struct rw_semaphore namespaces_lock; struct list_head comm_list; @@ -81,11 +82,6 @@ static inline void __thread__zput(struct thread **thread) #define thread__zput(thread) __thread__zput(&thread) -static inline void thread__exited(struct thread *thread) -{ - thread->dead = true; -} - struct namespaces *thread__namespaces(struct thread *thread); int thread__set_namespaces(struct thread *thread, u64 timestamp, struct perf_record_namespaces *event); @@ -101,8 +97,8 @@ static inline int thread__set_comm(struct thread *thread, const char *comm, int thread__set_comm_from_proc(struct thread *thread); int thread__comm_len(struct thread *thread); -struct comm *thread__comm(const struct thread *thread); -struct comm *thread__exec_comm(const struct thread *thread); +struct comm *thread__comm(struct thread *thread); +struct comm *thread__exec_comm(struct thread *thread); const char *thread__comm_str(struct thread *thread); int thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); @@ -126,14 +122,194 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, int thread__memcpy(struct thread *thread, struct machine *machine, void *buf, u64 ip, int len, bool *is64bit); +static inline struct maps *thread__maps(struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->maps; +} + +static inline void thread__set_maps(struct thread *thread, struct maps *maps) +{ + RC_CHK_ACCESS(thread)->maps = maps; +} + +static inline pid_t thread__pid(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->pid_; +} + +static inline void thread__set_pid(struct thread *thread, pid_t pid_) +{ + RC_CHK_ACCESS(thread)->pid_ = pid_; +} + +static inline pid_t thread__tid(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->tid; +} + +static inline void thread__set_tid(struct thread *thread, pid_t tid) +{ + RC_CHK_ACCESS(thread)->tid = tid; +} + +static inline pid_t thread__ppid(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->ppid; +} + +static inline void thread__set_ppid(struct thread *thread, pid_t ppid) +{ + RC_CHK_ACCESS(thread)->ppid = ppid; +} + +static inline int thread__cpu(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->cpu; +} + +static inline void thread__set_cpu(struct thread *thread, int cpu) +{ + RC_CHK_ACCESS(thread)->cpu = cpu; +} + +static inline int thread__guest_cpu(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->guest_cpu; +} + +static inline void thread__set_guest_cpu(struct thread *thread, int guest_cpu) +{ + RC_CHK_ACCESS(thread)->guest_cpu = guest_cpu; +} + +static inline refcount_t *thread__refcnt(struct thread *thread) +{ + return &RC_CHK_ACCESS(thread)->refcnt; +} + +static inline bool thread__comm_set(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->comm_set; +} + +static inline void thread__set_comm_set(struct thread *thread, bool set) +{ + RC_CHK_ACCESS(thread)->comm_set = set; +} + +static inline int thread__var_comm_len(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->comm_len; +} + +static inline void thread__set_comm_len(struct thread *thread, int len) +{ + RC_CHK_ACCESS(thread)->comm_len = len; +} + +static inline struct list_head *thread__namespaces_list(struct thread *thread) +{ + return &RC_CHK_ACCESS(thread)->namespaces_list; +} + +static inline int thread__namespaces_list_empty(const struct thread *thread) +{ + return list_empty(&RC_CHK_ACCESS(thread)->namespaces_list); +} + +static inline struct rw_semaphore *thread__namespaces_lock(struct thread *thread) +{ + return &RC_CHK_ACCESS(thread)->namespaces_lock; +} + +static inline struct list_head *thread__comm_list(struct thread *thread) +{ + return &RC_CHK_ACCESS(thread)->comm_list; +} + +static inline struct rw_semaphore *thread__comm_lock(struct thread *thread) +{ + return &RC_CHK_ACCESS(thread)->comm_lock; +} + +static inline u64 thread__db_id(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->db_id; +} + +static inline void thread__set_db_id(struct thread *thread, u64 db_id) +{ + RC_CHK_ACCESS(thread)->db_id = db_id; +} + static inline void *thread__priv(struct thread *thread) { - return thread->priv; + return RC_CHK_ACCESS(thread)->priv; } static inline void thread__set_priv(struct thread *thread, void *p) { - thread->priv = p; + RC_CHK_ACCESS(thread)->priv = p; +} + +static inline struct thread_stack *thread__ts(struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->ts; +} + +static inline void thread__set_ts(struct thread *thread, struct thread_stack *ts) +{ + RC_CHK_ACCESS(thread)->ts = ts; +} + +static inline struct nsinfo *thread__nsinfo(struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->nsinfo; +} + +static inline struct srccode_state *thread__srccode_state(struct thread *thread) +{ + return &RC_CHK_ACCESS(thread)->srccode_state; +} + +static inline bool thread__filter(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->filter; +} + +static inline void thread__set_filter(struct thread *thread, bool filter) +{ + RC_CHK_ACCESS(thread)->filter = filter; +} + +static inline int thread__filter_entry_depth(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->filter_entry_depth; +} + +static inline void thread__set_filter_entry_depth(struct thread *thread, int depth) +{ + RC_CHK_ACCESS(thread)->filter_entry_depth = depth; +} + +static inline bool thread__lbr_stitch_enable(const struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->lbr_stitch_enable; +} + +static inline void thread__set_lbr_stitch_enable(struct thread *thread, bool en) +{ + RC_CHK_ACCESS(thread)->lbr_stitch_enable = en; +} + +static inline struct lbr_stitch *thread__lbr_stitch(struct thread *thread) +{ + return RC_CHK_ACCESS(thread)->lbr_stitch; +} + +static inline void thread__set_lbr_stitch(struct thread *thread, struct lbr_stitch *lbrs) +{ + RC_CHK_ACCESS(thread)->lbr_stitch = lbrs; } static inline bool thread__is_filtered(struct thread *thread) @@ -144,12 +320,12 @@ static inline bool thread__is_filtered(struct thread *thread) } if (symbol_conf.pid_list && - !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) { + !intlist__has_entry(symbol_conf.pid_list, thread__pid(thread))) { return true; } if (symbol_conf.tid_list && - !intlist__has_entry(symbol_conf.tid_list, thread->tid)) { + !intlist__has_entry(symbol_conf.tid_list, thread__tid(thread))) { return true; } diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index bdccfc511b7e..83eea968482e 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -90,8 +90,12 @@ static int __report_module(struct addr_location *al, u64 ip, static int report_module(u64 ip, struct unwind_info *ui) { struct addr_location al; + int res; - return __report_module(&al, ip, ui); + addr_location__init(&al); + res = __report_module(&al, ip, ui); + addr_location__exit(&al); + return res; } /* @@ -104,8 +108,11 @@ static int entry(u64 ip, struct unwind_info *ui) struct unwind_entry *e = &ui->entries[ui->idx++]; struct addr_location al; - if (__report_module(&al, ip, ui)) + addr_location__init(&al); + if (__report_module(&al, ip, ui)) { + addr_location__exit(&al); return -1; + } e->ip = ip; e->ms.maps = al.maps; @@ -116,6 +123,7 @@ static int entry(u64 ip, struct unwind_info *ui) al.sym ? al.sym->name : "''", ip, al.map ? map__map_ip(al.map, ip) : (u64) 0); + addr_location__exit(&al); return 0; } @@ -136,17 +144,22 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr, ssize_t size; struct dso *dso; + addr_location__init(&al); if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); - return -1; + goto out_fail; } dso = map__dso(al.map); if (!dso) - return -1; + goto out_fail; size = dso__data_read_addr(dso, al.map, ui->machine, addr, (u8 *) data, sizeof(*data)); + addr_location__exit(&al); return !(size == sizeof(*data)); +out_fail: + addr_location__exit(&al); + return -1; } static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *result, @@ -230,7 +243,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct unwind_info *ui, ui_buf = { .sample = data, .thread = thread, - .machine = RC_CHK_ACCESS(thread->maps)->machine, + .machine = RC_CHK_ACCESS(thread__maps(thread))->machine, .cb = cb, .arg = arg, .max_stack = max_stack, @@ -260,11 +273,11 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (err) goto out; - err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui); + err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread__tid(thread), &callbacks, ui); if (err) goto out; - err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui); + err = dwfl_getthread_frames(ui->dwfl, thread__tid(thread), frame_callback, ui); if (err && ui->max_stack != max_stack) err = 0; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 83dd79dcd597..ebfde537b99b 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -325,7 +325,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, return -EINVAL; } - maps__for_each_entry(ui->thread->maps, map_node) { + maps__for_each_entry(thread__maps(ui->thread), map_node) { struct map *map = map_node->map; u64 start = map__start(map); @@ -416,7 +416,13 @@ static int read_unwind_spec_debug_frame(struct dso *dso, static struct map *find_map(unw_word_t ip, struct unwind_info *ui) { struct addr_location al; - return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al); + struct map *ret; + + addr_location__init(&al); + thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al); + ret = map__get(al.map); + addr_location__exit(&al); + return ret; } static int @@ -435,8 +441,10 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, return -EINVAL; dso = map__dso(map); - if (!dso) + if (!dso) { + map__put(map); return -EINVAL; + } pr_debug("unwind: find_proc_info dso %s\n", dso->name); @@ -471,11 +479,11 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, memset(&di, 0, sizeof(di)); if (dwarf_find_debug_frame(0, &di, ip, base, symfile, start, map__end(map))) - return dwarf_search_unwind_table(as, ip, &di, pi, - need_unwind_info, arg); + ret = dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); } #endif - + map__put(map); return ret; } @@ -529,12 +537,14 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, dso = map__dso(map); - if (!dso) + if (!dso) { + map__put(map); return -1; + } size = dso__data_read_addr(dso, map, ui->machine, addr, (u8 *) data, sizeof(*data)); - + map__put(map); return !(size == sizeof(*data)); } @@ -631,7 +641,9 @@ static int entry(u64 ip, struct thread *thread, { struct unwind_entry e; struct addr_location al; + int ret; + addr_location__init(&al); e.ms.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al); e.ip = ip; e.ms.map = al.map; @@ -642,7 +654,9 @@ static int entry(u64 ip, struct thread *thread, ip, al.map ? map__map_ip(al.map, ip) : (u64) 0); - return cb(&e, arg); + ret = cb(&e, arg); + addr_location__exit(&al); + return ret; } static void display_error(int err) @@ -719,7 +733,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, */ if (max_stack - 1 > 0) { WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL"); - addr_space = maps__addr_space(ui->thread->maps); + addr_space = maps__addr_space(thread__maps(ui->thread)); if (addr_space == NULL) return -1; @@ -769,7 +783,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct unwind_info ui = { .sample = data, .thread = thread, - .machine = maps__machine(thread->maps), + .machine = maps__machine(thread__maps(thread)), .best_effort = best_effort }; diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 375d23d9a590..76cd63de80a8 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -89,7 +89,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct perf_sample *data, int max_stack, bool best_effort) { - const struct unwind_libunwind_ops *ops = maps__unwind_libunwind_ops(thread->maps); + const struct unwind_libunwind_ops *ops = maps__unwind_libunwind_ops(thread__maps(thread)); if (ops) return ops->get_entries(cb, arg, thread, data, max_stack, best_effort); diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index ec777ee11493..ae3eee69b659 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -146,7 +146,7 @@ static enum dso_type machine__thread_dso_type(struct machine *machine, enum dso_type dso_type = DSO__TYPE_UNKNOWN; struct map_rb_node *rb_node; - maps__for_each_entry(thread->maps, rb_node) { + maps__for_each_entry(thread__maps(thread), rb_node) { struct dso *dso = map__dso(rb_node->map); if (!dso || dso->long_name[0] != '/') |