diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-07-16 22:36:42 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-07-16 22:36:42 +0200 |
commit | 09211e2530ab4905ec16edecc27022d6b247419d (patch) | |
tree | ca894cb0a6b0b51dfc8c73316319e2a714aac256 /tools | |
parent | b29c6574699dc475da5dbff8db19297b203aacce (diff) | |
parent | b49364f36cfdb6d540ac961102d7ffaf84279bb6 (diff) | |
download | lwn-09211e2530ab4905ec16edecc27022d6b247419d.tar.gz lwn-09211e2530ab4905ec16edecc27022d6b247419d.zip |
Merge tag 'perf-core-for-mingo-20160715' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Allow reading from a backward ring buffer (one setup via sys_perf_event_open()
with perf_event_attr.write_backward = 1) (Wang Nan)
Infrastructure changes:
- Fix the build on Android NDK r12b (initially just for ARM), that is now port
of my perf-build container collection and will get tested prior to sending
patches upstream (Arnaldo Carvalho de Melo)
- Add correct header for IPv6 definitions
- Fix bitsperlong.h fallout (Arnaldo Carvalho de Melo, Peter Zijlstra)
- Use base 0 (auto) in filename__read_ull(), so that we can handle hex values too (Jiri Olsa)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/include/asm-generic/bitsperlong.h | 23 | ||||
-rw-r--r-- | tools/include/linux/compiler.h | 11 | ||||
-rw-r--r-- | tools/lib/api/fd/array.h | 1 | ||||
-rw-r--r-- | tools/lib/api/fs/fs.c | 7 | ||||
-rw-r--r-- | tools/lib/traceevent/event-parse.c | 3 | ||||
-rw-r--r-- | tools/objtool/Makefile | 2 | ||||
-rw-r--r-- | tools/objtool/builtin-check.c | 2 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-record.txt | 22 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 113 | ||||
-rw-r--r-- | tools/perf/perf.c | 2 | ||||
-rw-r--r-- | tools/perf/perf.h | 2 | ||||
-rw-r--r-- | tools/perf/tests/backward-ring-buffer.c | 14 | ||||
-rw-r--r-- | tools/perf/util/evlist.c | 269 | ||||
-rw-r--r-- | tools/perf/util/evlist.h | 47 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 16 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 3 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 20 | ||||
-rw-r--r-- | tools/perf/util/parse-events.h | 2 | ||||
-rw-r--r-- | tools/perf/util/parse-events.l | 2 | ||||
-rw-r--r-- | tools/perf/util/session.c | 22 | ||||
-rw-r--r-- | tools/perf/util/sort.c | 8 | ||||
-rw-r--r-- | tools/perf/util/util.h | 2 |
22 files changed, 441 insertions, 152 deletions
diff --git a/tools/include/asm-generic/bitsperlong.h b/tools/include/asm-generic/bitsperlong.h index cfd661c6fc17..45eca517efb3 100644 --- a/tools/include/asm-generic/bitsperlong.h +++ b/tools/include/asm-generic/bitsperlong.h @@ -3,31 +3,12 @@ #include <uapi/asm-generic/bitsperlong.h> -/* - * In the kernel, where this file comes from, we can rely on CONFIG_64BIT, - * here we have to make amends with what the various compilers provides us - * to figure out if we're on a 64-bit machine... - */ #ifdef __SIZEOF_LONG__ -# if __SIZEOF_LONG__ == 8 -# define CONFIG_64BIT -# endif +#define BITS_PER_LONG (__CHAR_BIT__ * __SIZEOF_LONG__) #else -# ifdef __WORDSIZE -# if __WORDSIZE == 64 -# define CONFIG_64BIT -# endif -# else -# error Failed to determine BITS_PER_LONG value -# endif +#define BITS_PER_LONG __WORDSIZE #endif -#ifdef CONFIG_64BIT -#define BITS_PER_LONG 64 -#else -#define BITS_PER_LONG 32 -#endif /* CONFIG_64BIT */ - #if BITS_PER_LONG != __BITS_PER_LONG #error Inconsistent word size. Check asm/bitsperlong.h #endif diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index fa7208a32d76..e33fc1df3935 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -9,6 +9,17 @@ # define __always_inline inline __attribute__((always_inline)) #endif +#ifdef __ANDROID__ +/* + * FIXME: Big hammer to get rid of tons of: + * "warning: always_inline function might not be inlinable" + * + * At least on android-ndk-r12/platforms/android-24/arch-arm + */ +#undef __always_inline +#define __always_inline inline +#endif + #define __user #ifndef __attribute_const__ diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h index e87fd800fa8d..71287dddc05f 100644 --- a/tools/lib/api/fd/array.h +++ b/tools/lib/api/fd/array.h @@ -22,6 +22,7 @@ struct fdarray { struct pollfd *entries; union { int idx; + void *ptr; } *priv; }; diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 08556cf2c70d..ba7094b945ff 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -283,6 +283,11 @@ int filename__read_int(const char *filename, int *value) return err; } +/* + * Parses @value out of @filename with strtoull. + * By using 0 for base, the strtoull detects the + * base automatically (see man strtoull). + */ int filename__read_ull(const char *filename, unsigned long long *value) { char line[64]; @@ -292,7 +297,7 @@ int filename__read_ull(const char *filename, unsigned long long *value) return -1; if (read(fd, line, sizeof(line)) > 0) { - *value = strtoull(line, NULL, 10); + *value = strtoull(line, NULL, 0); if (*value != ULLONG_MAX) err = 0; } diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 3a7bd175f73c..664c90c8e22b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -23,6 +23,7 @@ * Frederic Weisbecker gave his permission to relicense the code to * the Lesser General Public License. */ +#include <inttypes.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -33,7 +34,7 @@ #include <limits.h> #include <linux/string.h> -#include <netinet/ip6.h> +#include <netinet/in.h> #include "event-parse.h" #include "event-utils.h" diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 9a3110cac604..1f75b0a046cc 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -26,7 +26,7 @@ OBJTOOL_IN := $(OBJTOOL)-in.o all: $(OBJTOOL) -INCLUDES := -I$(srctree)/tools/include +INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) LDFLAGS += -lelf $(LIBSUBCMD) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 92d84b277032..4ed30f45c6da 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -664,7 +664,7 @@ static int add_func_switch_tables(struct objtool_file *file, struct symbol *func) { struct instruction *insn, *prev_jump; - struct rela *text_rela, *rodata_rela, *prev_rela; + struct rela *text_rela, *rodata_rela, *prev_rela = NULL; int ret; prev_jump = NULL; diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5b46b1d1a37c..69966abf65d1 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -367,6 +367,28 @@ options. 'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj in config file is set to true. +--tail-synthesize:: +Instead of collecting non-sample events (for example, fork, comm, mmap) at +the beginning of record, collect them during finalizing an output file. +The collected non-sample events reflects the status of the system when +record is finished. + +--overwrite:: +Makes all events use an overwritable ring buffer. An overwritable ring +buffer works like a flight recorder: when it gets full, the kernel will +overwrite the oldest records, that thus will never make it to the +perf.data file. + +When '--overwrite' and '--switch-output' are used perf records and drops +events until it receives a signal, meaning that something unusual was +detected that warrants taking a snapshot of the most current events, +those fitting in the ring buffer at that moment. + +'overwrite' attribute can also be set or canceled for an event using +config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'. + +Implies --tail-synthesize. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d9f5cc3a3667..8f2c16d9275f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -119,11 +119,10 @@ backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) } static int -rb_find_range(struct perf_evlist *evlist, - void *data, int mask, u64 head, u64 old, - u64 *start, u64 *end) +rb_find_range(void *data, int mask, u64 head, u64 old, + u64 *start, u64 *end, bool backward) { - if (!evlist->backward) { + if (!backward) { *start = old; *end = head; return 0; @@ -132,9 +131,10 @@ rb_find_range(struct perf_evlist *evlist, return backward_rb_find_range(data, mask, head, start, end); } -static int record__mmap_read(struct record *rec, struct perf_evlist *evlist, int idx) +static int +record__mmap_read(struct record *rec, struct perf_mmap *md, + bool overwrite, bool backward) { - struct perf_mmap *md = &evlist->mmap[idx]; u64 head = perf_mmap__read_head(md); u64 old = md->prev; u64 end = head, start = old; @@ -143,8 +143,8 @@ static int record__mmap_read(struct record *rec, struct perf_evlist *evlist, int void *buf; int rc = 0; - if (rb_find_range(evlist, data, md->mask, head, - old, &start, &end)) + if (rb_find_range(data, md->mask, head, + old, &start, &end, backward)) return -1; if (start == end) @@ -157,7 +157,7 @@ static int record__mmap_read(struct record *rec, struct perf_evlist *evlist, int WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->prev = head; - perf_evlist__mmap_consume(evlist, idx); + perf_mmap__consume(md, overwrite || backward); return 0; } @@ -182,7 +182,7 @@ static int record__mmap_read(struct record *rec, struct perf_evlist *evlist, int } md->prev = head; - perf_evlist__mmap_consume(evlist, idx); + perf_mmap__consume(md, overwrite || backward); out: return rc; } @@ -498,20 +498,30 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; -static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist) +static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, + bool backward) { u64 bytes_written = rec->bytes_written; int i; int rc = 0; + struct perf_mmap *maps; if (!evlist) return 0; + maps = backward ? evlist->backward_mmap : evlist->mmap; + if (!maps) + return 0; + + if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) + return 0; + for (i = 0; i < evlist->nr_mmaps; i++) { - struct auxtrace_mmap *mm = &evlist->mmap[i].auxtrace_mmap; + struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; - if (evlist->mmap[i].base) { - if (record__mmap_read(rec, evlist, i) != 0) { + if (maps[i].base) { + if (record__mmap_read(rec, &maps[i], + evlist->overwrite, backward) != 0) { rc = -1; goto out; } @@ -531,6 +541,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli if (bytes_written != rec->bytes_written) rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); + if (backward) + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); out: return rc; } @@ -539,11 +551,11 @@ static int record__mmap_read_all(struct record *rec) { int err; - err = record__mmap_read_evlist(rec, rec->evlist); + err = record__mmap_read_evlist(rec, rec->evlist, false); if (err) return err; - return err; + return record__mmap_read_evlist(rec, rec->evlist, true); } static void record__init_features(struct record *rec) @@ -592,13 +604,16 @@ record__finish_output(struct record *rec) return; } -static int record__synthesize_workload(struct record *rec) +static int record__synthesize_workload(struct record *rec, bool tail) { struct { struct thread_map map; struct thread_map_data map_data; } thread_map; + if (rec->opts.tail_synthesize != tail) + return 0; + thread_map.map.nr = 1; thread_map.map.map[0].pid = rec->evlist->workload.pid; thread_map.map.map[0].comm = NULL; @@ -609,7 +624,7 @@ static int record__synthesize_workload(struct record *rec) rec->opts.proc_map_timeout); } -static int record__synthesize(struct record *rec); +static int record__synthesize(struct record *rec, bool tail); static int record__switch_output(struct record *rec, bool at_exit) @@ -620,6 +635,10 @@ record__switch_output(struct record *rec, bool at_exit) /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; + record__synthesize(rec, true); + if (target__none(&rec->opts.target)) + record__synthesize_workload(rec, true); + rec->samples = 0; record__finish_output(rec); err = fetch_current_timestamp(timestamp, sizeof(timestamp)); @@ -642,7 +661,7 @@ record__switch_output(struct record *rec, bool at_exit) /* Output tracking events */ if (!at_exit) { - record__synthesize(rec); + record__synthesize(rec, false); /* * In 'perf record --switch-output' without -a, @@ -654,7 +673,7 @@ record__switch_output(struct record *rec, bool at_exit) * perf_event__synthesize_thread_map() for those events. */ if (target__none(&rec->opts.target)) - record__synthesize_workload(rec); + record__synthesize_workload(rec, false); } return fd; } @@ -689,8 +708,12 @@ perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused static const struct perf_event_mmap_page * perf_evlist__pick_pc(struct perf_evlist *evlist) { - if (evlist && evlist->mmap && evlist->mmap[0].base) - return evlist->mmap[0].base; + if (evlist) { + if (evlist->mmap && evlist->mmap[0].base) + return evlist->mmap[0].base; + if (evlist->backward_mmap && evlist->backward_mmap[0].base) + return evlist->backward_mmap[0].base; + } return NULL; } @@ -704,7 +727,7 @@ static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) return NULL; } -static int record__synthesize(struct record *rec) +static int record__synthesize(struct record *rec, bool tail) { struct perf_session *session = rec->session; struct machine *machine = &session->machines.host; @@ -714,6 +737,9 @@ static int record__synthesize(struct record *rec) int fd = perf_data_file__fd(file); int err = 0; + if (rec->opts.tail_synthesize != tail) + return 0; + if (file->is_pipe) { err = perf_event__synthesize_attrs(tool, session, process_synthesized_event); @@ -877,7 +903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine = &session->machines.host; - err = record__synthesize(rec); + err = record__synthesize(rec, false); if (err < 0) goto out_child; @@ -937,6 +963,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) for (;;) { unsigned long long hits = rec->samples; + /* + * rec->evlist->bkw_mmap_state is possible to be + * BKW_MMAP_EMPTY here: when done == true and + * hits != rec->samples in previous round. + * + * perf_evlist__toggle_bkw_mmap ensure we never + * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. + */ + if (trigger_is_hit(&switch_output_trigger) || done || draining) + perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); + if (record__mmap_read_all(rec) < 0) { trigger_error(&auxtrace_snapshot_trigger); trigger_error(&switch_output_trigger); @@ -956,8 +993,26 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } if (trigger_is_hit(&switch_output_trigger)) { + /* + * If switch_output_trigger is hit, the data in + * overwritable ring buffer should have been collected, + * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. + * + * If SIGUSR2 raise after or during record__mmap_read_all(), + * record__mmap_read_all() didn't collect data from + * overwritable ring buffer. Read again. + */ + if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) + continue; trigger_ready(&switch_output_trigger); + /* + * Reenable events in overwrite ring buffer after + * record__mmap_read_all(): we should have collected + * data from it. + */ + perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); + if (!quiet) fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", waking); @@ -1012,6 +1067,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (!quiet) fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); + if (target__none(&rec->opts.target)) + record__synthesize_workload(rec, true); + out_child: if (forks) { int exit_status; @@ -1030,6 +1088,7 @@ out_child: } else status = err; + record__synthesize(rec, true); /* this will be recalculated during process_buildids() */ rec->samples = 0; @@ -1354,6 +1413,9 @@ struct option __record_options[] = { OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, &record.opts.no_inherit_set, "child tasks do not inherit counters"), + OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, + "synthesize non-sample events at the end of output"), + OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", "number of mmap data pages and AUX area tracing mmap pages", @@ -1564,6 +1626,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) } } + if (record.opts.overwrite) + record.opts.tail_synthesize = true; + if (rec->evlist->nr_entries == 0 && perf_evlist__add_default(rec->evlist) < 0) { pr_err("Not enough memory for event selector list\n"); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4b2ff021434c..64c06961bfe4 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -503,7 +503,7 @@ void pthread__unblock_sigwinch(void) static void cache_line_size(int *cacheline_sizep) { if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep)) - perror("cannot determine cache line size"); + pr_debug("cannot determine cache line size"); } #endif diff --git a/tools/perf/perf.h b/tools/perf/perf.h index cd8f1b150f9e..a7e0f1497244 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -59,6 +59,8 @@ struct record_opts { bool record_switch_events; bool all_kernel; bool all_user; + bool tail_synthesize; + bool overwrite; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index f20ea4c0d0cb..615780cbfe1d 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -31,8 +31,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count, for (i = 0; i < evlist->nr_mmaps; i++) { union perf_event *event; - perf_evlist__mmap_read_catchup(evlist, i); - while ((event = perf_evlist__mmap_read_backward(evlist, i)) != NULL) { + perf_mmap__read_catchup(&evlist->backward_mmap[i]); + while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) { const u32 type = event->header.type; switch (type) { @@ -108,7 +108,11 @@ int test__backward_ring_buffer(int subtest __maybe_unused) } bzero(&parse_error, sizeof(parse_error)); - err = parse_events(evlist, "syscalls:sys_enter_prctl", &parse_error); + /* + * Set backward bit, ring buffer should be writing from end. Record + * it in aux evlist + */ + err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error); if (err) { pr_debug("Failed to parse tracepoint event, try use root\n"); ret = TEST_SKIP; @@ -117,10 +121,6 @@ int test__backward_ring_buffer(int subtest __maybe_unused) perf_evlist__config(evlist, &opts, NULL); - /* Set backward bit, ring buffer should be writing from end */ - evlist__for_each_entry(evlist, evsel) - evsel->attr.write_backward = 1; - err = perf_evlist__open(evlist); if (err < 0) { pr_debug("perf_evlist__open: %s\n", diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 862e69c2690d..2a40b8e1def7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -15,6 +15,7 @@ #include "evlist.h" #include "evsel.h" #include "debug.h" +#include "asm/bug.h" #include <unistd.h> #include "parse-events.h" @@ -27,8 +28,8 @@ #include <linux/log2.h> #include <linux/err.h> -static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); -static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); +static void perf_mmap__munmap(struct perf_mmap *map); +static void perf_mmap__put(struct perf_mmap *map); #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) @@ -44,7 +45,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, perf_evlist__set_maps(evlist, cpus, threads); fdarray__init(&evlist->pollfd, 64); evlist->workload.pid = -1; - evlist->backward = false; + evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; } struct perf_evlist *perf_evlist__new(void) @@ -122,6 +123,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { zfree(&evlist->mmap); + zfree(&evlist->backward_mmap); fdarray__exit(&evlist->pollfd); } @@ -465,7 +467,8 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) return 0; } -static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent) +static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, + struct perf_mmap *map, short revent) { int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); /* @@ -473,7 +476,7 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx * close the associated evlist->mmap[] entry. */ if (pos >= 0) { - evlist->pollfd.priv[pos].idx = idx; + evlist->pollfd.priv[pos].ptr = map; fcntl(fd, F_SETFL, O_NONBLOCK); } @@ -483,15 +486,16 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) { - return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN); + return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); } static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, void *arg __maybe_unused) { - struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); + struct perf_mmap *map = fda->priv[fd].ptr; - perf_evlist__mmap_put(evlist, fda->priv[fd].idx); + if (map) + perf_mmap__put(map); } int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) @@ -688,8 +692,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) { int i; + if (!evlist->backward_mmap) + return 0; + for (i = 0; i < evlist->nr_mmaps; i++) { - int fd = evlist->mmap[i].fd; + int fd = evlist->backward_mmap[i].fd; int err; if (fd < 0) @@ -701,12 +708,12 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) return 0; } -int perf_evlist__pause(struct perf_evlist *evlist) +static int perf_evlist__pause(struct perf_evlist *evlist) { return perf_evlist__set_paused(evlist, true); } -int perf_evlist__resume(struct perf_evlist *evlist) +static int perf_evlist__resume(struct perf_evlist *evlist) { return perf_evlist__set_paused(evlist, false); } @@ -781,9 +788,8 @@ broken_event: return event; } -union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) +union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) { - struct perf_mmap *md = &evlist->mmap[idx]; u64 head; u64 old = md->prev; @@ -795,13 +801,12 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int head = perf_mmap__read_head(md); - return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev); + return perf_mmap__read(md, check_messup, old, head, &md->prev); } union perf_event * -perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) +perf_mmap__read_backward(struct perf_mmap *md) { - struct perf_mmap *md = &evlist->mmap[idx]; u64 head, end; u64 start = md->prev; @@ -836,16 +841,38 @@ perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) return perf_mmap__read(md, false, start, end, &md->prev); } -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) { - if (!evlist->backward) - return perf_evlist__mmap_read_forward(evlist, idx); - return perf_evlist__mmap_read_backward(evlist, idx); + struct perf_mmap *md = &evlist->mmap[idx]; + + /* + * Check messup is required for forward overwritable ring buffer: + * memory pointed by md->prev can be overwritten in this case. + * No need for read-write ring buffer: kernel stop outputting when + * it hit md->prev (perf_mmap__consume()). + */ + return perf_mmap__read_forward(md, evlist->overwrite); } -void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) +union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; + + /* + * No need to check messup for backward ring buffer: + * We can always read arbitrary long data from a backward + * ring buffer unless we forget to pause it before reading. + */ + return perf_mmap__read_backward(md); +} + +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +{ + return perf_evlist__mmap_read_forward(evlist, idx); +} + +void perf_mmap__read_catchup(struct perf_mmap *md) +{ u64 head; if (!atomic_read(&md->refcnt)) @@ -855,38 +882,44 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) md->prev = head; } +void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) +{ + perf_mmap__read_catchup(&evlist->mmap[idx]); +} + static bool perf_mmap__empty(struct perf_mmap *md) { return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; } -static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) +static void perf_mmap__get(struct perf_mmap *map) { - atomic_inc(&evlist->mmap[idx].refcnt); + atomic_inc(&map->refcnt); } -static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) +static void perf_mmap__put(struct perf_mmap *md) { - struct perf_mmap *md = &evlist->mmap[idx]; - BUG_ON(md->base && atomic_read(&md->refcnt) == 0); if (atomic_dec_and_test(&md->refcnt)) - __perf_evlist__munmap(evlist, idx); + perf_mmap__munmap(md); } -void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) +void perf_mmap__consume(struct perf_mmap *md, bool overwrite) { - struct perf_mmap *md = &evlist->mmap[idx]; - - if (!evlist->overwrite) { + if (!overwrite) { u64 old = md->prev; perf_mmap__write_tail(md, old); } if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) - perf_evlist__mmap_put(evlist, idx); + perf_mmap__put(md); +} + +void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) +{ + perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); } int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, @@ -917,44 +950,52 @@ void __weak auxtrace_mmap_params__set_idx( { } -static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) +static void perf_mmap__munmap(struct perf_mmap *map) { - if (evlist->mmap[idx].base != NULL) { - munmap(evlist->mmap[idx].base, evlist->mmap_len); - evlist->mmap[idx].base = NULL; - evlist->mmap[idx].fd = -1; - atomic_set(&evlist->mmap[idx].refcnt, 0); + if (map->base != NULL) { + munmap(map->base, perf_mmap__mmap_len(map)); + map->base = NULL; + map->fd = -1; + atomic_set(&map->refcnt, 0); } - auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); + auxtrace_mmap__munmap(&map->auxtrace_mmap); } -void perf_evlist__munmap(struct perf_evlist *evlist) +static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) { int i; - if (evlist->mmap == NULL) - return; + if (evlist->mmap) + for (i = 0; i < evlist->nr_mmaps; i++) + perf_mmap__munmap(&evlist->mmap[i]); - for (i = 0; i < evlist->nr_mmaps; i++) - __perf_evlist__munmap(evlist, i); + if (evlist->backward_mmap) + for (i = 0; i < evlist->nr_mmaps; i++) + perf_mmap__munmap(&evlist->backward_mmap[i]); +} +void perf_evlist__munmap(struct perf_evlist *evlist) +{ + perf_evlist__munmap_nofree(evlist); zfree(&evlist->mmap); + zfree(&evlist->backward_mmap); } -static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) +static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) { int i; + struct perf_mmap *map; evlist->nr_mmaps = cpu_map__nr(evlist->cpus); if (cpu_map__empty(evlist->cpus)) evlist->nr_mmaps = thread_map__nr(evlist->threads); - evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); - if (!evlist->mmap) - return -ENOMEM; + map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); + if (!map) + return NULL; for (i = 0; i < evlist->nr_mmaps; i++) - evlist->mmap[i].fd = -1; - return 0; + map[i].fd = -1; + return map; } struct mmap_params { @@ -963,8 +1004,8 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, - struct mmap_params *mp, int fd) +static int perf_mmap__mmap(struct perf_mmap *map, + struct mmap_params *mp, int fd) { /* * The last one will be done at perf_evlist__mmap_consume(), so that we @@ -979,21 +1020,21 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, * evlist layer can't just drop it when filtering events in * perf_evlist__filter_pollfd(). */ - atomic_set(&evlist->mmap[idx].refcnt, 2); - evlist->mmap[idx].prev = 0; - evlist->mmap[idx].mask = mp->mask; - evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, - MAP_SHARED, fd, 0); - if (evlist->mmap[idx].base == MAP_FAILED) { + atomic_set(&map->refcnt, 2); + map->prev = 0; + map->mask = mp->mask; + map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + MAP_SHARED, fd, 0); + if (map->base == MAP_FAILED) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", errno); - evlist->mmap[idx].base = NULL; + map->base = NULL; return -1; } - evlist->mmap[idx].fd = fd; + map->fd = fd; - if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, - &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, + &mp->auxtrace_mp, map->base, fd)) return -1; return 0; @@ -1003,23 +1044,36 @@ static bool perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, struct perf_evsel *evsel) { - if (evsel->overwrite) + if (evsel->attr.write_backward) return false; return true; } static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu, - int thread, int *output) + int thread, int *_output, int *_output_backward) { struct perf_evsel *evsel; int revent; evlist__for_each_entry(evlist, evsel) { + struct perf_mmap *maps = evlist->mmap; + int *output = _output; int fd; - if (evsel->overwrite != (evlist->overwrite && evlist->backward)) - continue; + if (evsel->attr.write_backward) { + output = _output_backward; + maps = evlist->backward_mmap; + + if (!maps) { + maps = perf_evlist__alloc_mmap(evlist); + if (!maps) + return -1; + evlist->backward_mmap = maps; + if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + } + } if (evsel->system_wide && thread) continue; @@ -1028,13 +1082,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, if (*output == -1) { *output = fd; - if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) + + if (perf_mmap__mmap(&maps[idx], mp, *output) < 0) return -1; } else { if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) return -1; - perf_evlist__mmap_get(evlist, idx); + perf_mmap__get(&maps[idx]); } revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; @@ -1047,8 +1102,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, * Therefore don't add it for polling. */ if (!evsel->system_wide && - __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) { - perf_evlist__mmap_put(evlist, idx); + __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { + perf_mmap__put(&maps[idx]); return -1; } @@ -1074,13 +1129,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per cpu\n"); for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; + int output_backward = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, true); for (thread = 0; thread < nr_threads; thread++) { if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output)) + thread, &output, &output_backward)) goto out_unmap; } } @@ -1088,8 +1144,7 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, return 0; out_unmap: - for (cpu = 0; cpu < nr_cpus; cpu++) - __perf_evlist__munmap(evlist, cpu); + perf_evlist__munmap_nofree(evlist); return -1; } @@ -1102,20 +1157,20 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per thread\n"); for (thread = 0; thread < nr_threads; thread++) { int output = -1; + int output_backward = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output)) + &output, &output_backward)) goto out_unmap; } return 0; out_unmap: - for (thread = 0; thread < nr_threads; thread++) - __perf_evlist__munmap(evlist, thread); + perf_evlist__munmap_nofree(evlist); return -1; } @@ -1248,7 +1303,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), }; - if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) + if (!evlist->mmap) + evlist->mmap = perf_evlist__alloc_mmap(evlist); + if (!evlist->mmap) return -ENOMEM; if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) @@ -1919,3 +1976,61 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, return NULL; } + +void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, + enum bkw_mmap_state state) +{ + enum bkw_mmap_state old_state = evlist->bkw_mmap_state; + enum action { + NONE, + PAUSE, + RESUME, + } action = NONE; + + if (!evlist->backward_mmap) + return; + + switch (old_state) { + case BKW_MMAP_NOTREADY: { + if (state != BKW_MMAP_RUNNING) + goto state_err;; + break; + } + case BKW_MMAP_RUNNING: { + if (state != BKW_MMAP_DATA_PENDING) + goto state_err; + action = PAUSE; + break; + } + case BKW_MMAP_DATA_PENDING: { + if (state != BKW_MMAP_EMPTY) + goto state_err; + break; + } + case BKW_MMAP_EMPTY: { + if (state != BKW_MMAP_RUNNING) + goto state_err; + action = RESUME; + break; + } + default: + WARN_ONCE(1, "Shouldn't get there\n"); + } + + evlist->bkw_mmap_state = state; + + switch (action) { + case PAUSE: + perf_evlist__pause(evlist); + break; + case RESUME: + perf_evlist__resume(evlist); + break; + case NONE: + default: + break; + } + +state_err: + return; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index afd087761a47..4fd034f22d2f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -35,6 +35,40 @@ struct perf_mmap { char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); }; +static inline size_t +perf_mmap__mmap_len(struct perf_mmap *map) +{ + return map->mask + 1 + page_size; +} + +/* + * State machine of bkw_mmap_state: + * + * .________________(forbid)_____________. + * | V + * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY + * ^ ^ | ^ | + * | |__(forbid)____/ |___(forbid)___/| + * | | + * \_________________(3)_______________/ + * + * NOTREADY : Backward ring buffers are not ready + * RUNNING : Backward ring buffers are recording + * DATA_PENDING : We are required to collect data from backward ring buffers + * EMPTY : We have collected data from backward ring buffers. + * + * (0): Setup backward ring buffer + * (1): Pause ring buffers for reading + * (2): Read from ring buffers + * (3): Resume ring buffers for recording + */ +enum bkw_mmap_state { + BKW_MMAP_NOTREADY, + BKW_MMAP_RUNNING, + BKW_MMAP_DATA_PENDING, + BKW_MMAP_EMPTY, +}; + struct perf_evlist { struct list_head entries; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; @@ -44,17 +78,18 @@ struct perf_evlist { bool overwrite; bool enabled; bool has_user_cpus; - bool backward; size_t mmap_len; int id_pos; int is_pos; u64 combined_sample_type; + enum bkw_mmap_state bkw_mmap_state; struct { int cork_fd; pid_t pid; } workload; struct fdarray pollfd; struct perf_mmap *mmap; + struct perf_mmap *backward_mmap; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; @@ -129,6 +164,14 @@ struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); +void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); + +union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); +union perf_event *perf_mmap__read_backward(struct perf_mmap *map); + +void perf_mmap__read_catchup(struct perf_mmap *md); +void perf_mmap__consume(struct perf_mmap *md, bool overwrite); + union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, @@ -139,8 +182,6 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx); void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); -int perf_evlist__pause(struct perf_evlist *evlist); -int perf_evlist__resume(struct perf_evlist *evlist); int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__close(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ba0f59fa3d5d..8c54df61fe64 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -695,6 +695,9 @@ static void apply_config_terms(struct perf_evsel *evsel, */ attr->inherit = term->val.inherit ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_OVERWRITE: + attr->write_backward = term->val.overwrite ? 1 : 0; + break; default: break; } @@ -776,6 +779,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; attr->inherit = !opts->no_inherit; + attr->write_backward = opts->overwrite ? 1 : 0; perf_evsel__set_sample_bit(evsel, IP); perf_evsel__set_sample_bit(evsel, TID); @@ -1377,6 +1381,9 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, int pid = -1, err; enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE; + if (perf_missing_features.write_backward && evsel->attr.write_backward) + return -EINVAL; + if (evsel->system_wide) nthreads = 1; else @@ -1407,11 +1414,6 @@ fallback_missing_features: if (perf_missing_features.lbr_flags) evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | PERF_SAMPLE_BRANCH_NO_CYCLES); - if (perf_missing_features.write_backward) { - if (evsel->overwrite) - return -EINVAL; - evsel->attr.write_backward = false; - } retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; @@ -1513,7 +1515,7 @@ try_fallback: */ if (!perf_missing_features.write_backward && evsel->attr.write_backward) { perf_missing_features.write_backward = true; - goto fallback_missing_features; + goto out_close; } else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) { perf_missing_features.clockid_wrong = true; goto fallback_missing_features; @@ -2422,7 +2424,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "We found oprofile daemon running, please stop it and try again."); break; case EINVAL: - if (evsel->overwrite && perf_missing_features.write_backward) + if (evsel->attr.write_backward && perf_missing_features.write_backward) return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel."); if (perf_missing_features.clockid) return scnprintf(msg, size, "clockid feature not supported."); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d73391e8740e..8a4a6c9f1480 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -45,6 +45,7 @@ enum { PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, PERF_EVSEL__CONFIG_TERM_MAX_STACK, + PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -59,6 +60,7 @@ struct perf_evsel_config_term { u64 stack_user; int max_stack; bool inherit; + bool overwrite; } val; }; @@ -114,7 +116,6 @@ struct perf_evsel { bool tracking; bool per_pkg; bool precise_max; - bool overwrite; /* parse modifier helper */ int exclude_GH; int nr_members; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 375af0e02831..6c913c3914fb 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -902,6 +902,8 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", + [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", + [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", }; static bool config_term_shrinked; @@ -994,6 +996,12 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NOINHERIT: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_OVERWRITE: + CHECK_TYPE_VAL(NUM); + break; + case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: + CHECK_TYPE_VAL(NUM); + break; case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; @@ -1046,6 +1054,8 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + case PARSE_EVENTS__TERM_TYPE_OVERWRITE: + case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: return config_term_common(attr, term, err); default: if (err) { @@ -1118,6 +1128,12 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); break; + case PARSE_EVENTS__TERM_TYPE_OVERWRITE: + ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); + break; + case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: + ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1); + break; default: break; } @@ -2412,9 +2428,9 @@ static void config_terms_list(char *buf, size_t buf_sz) char *parse_events_formats_error_string(char *additional_terms) { char *str; - /* "branch_type" is the longest name */ + /* "no-overwrite" is the longest name */ char static_terms[__PARSE_EVENTS__TERM_TYPE_NR * - (sizeof("branch_type") - 1)]; + (sizeof("no-overwrite") - 1)]; config_terms_list(static_terms, sizeof(static_terms)); /* valid terms */ diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b4aa7eb2df73..d1edbf8cc66a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -69,6 +69,8 @@ enum { PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, PARSE_EVENTS__TERM_TYPE_MAX_STACK, + PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, + PARSE_EVENTS__TERM_TYPE_OVERWRITE, __PARSE_EVENTS__TERM_TYPE_NR, }; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 3c15b33b2e84..7a2519435da0 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -202,6 +202,8 @@ stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } +overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } +no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 078d49626494..5d61242a6e64 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1499,10 +1499,27 @@ int perf_session__register_idle_thread(struct perf_session *session) return err; } +static void +perf_session__warn_order(const struct perf_session *session) +{ + const struct ordered_events *oe = &session->ordered_events; + struct perf_evsel *evsel; + bool should_warn = true; + + evlist__for_each_entry(session->evlist, evsel) { + if (evsel->attr.write_backward) + should_warn = false; + } + + if (!should_warn) + return; + if (oe->nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); +} + static void perf_session__warn_about_errors(const struct perf_session *session) { const struct events_stats *stats = &session->evlist->stats; - const struct ordered_events *oe = &session->ordered_events; if (session->tool->lost == perf_event__process_lost && stats->nr_events[PERF_RECORD_LOST] != 0) { @@ -1559,8 +1576,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session) stats->nr_unprocessable_samples); } - if (oe->nr_unordered_events != 0) - ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); + perf_session__warn_order(session); events_stats__auxtrace_error_warn(stats); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5854b4660a49..947d21f38398 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2381,6 +2381,9 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; + if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0) + return -EINVAL; + if (sd->entry == &sort_mem_daddr_sym) list->sym = 1; @@ -2424,7 +2427,10 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, if (*tok) { ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { - error("Invalid --sort key: `%s'", tok); + if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok))) + error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); + else + error("Invalid --sort key: `%s'", tok); break; } else if (ret == -ESRCH) { error("Unknown --sort key: `%s'", tok); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 6178cab82374..843cbba8f9d3 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -360,7 +360,7 @@ void print_binary(unsigned char *data, size_t len, size_t bytes_per_line, print_binary_t printer, void *extra); -#ifndef __GLIBC__ +#if !defined(__GLIBC__) && !defined(__ANDROID__) extern int sched_getcpu(void); #endif |