summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-07-16 22:36:42 +0200
committerIngo Molnar <mingo@kernel.org>2016-07-16 22:36:42 +0200
commit09211e2530ab4905ec16edecc27022d6b247419d (patch)
treeca894cb0a6b0b51dfc8c73316319e2a714aac256 /tools
parentb29c6574699dc475da5dbff8db19297b203aacce (diff)
parentb49364f36cfdb6d540ac961102d7ffaf84279bb6 (diff)
downloadlwn-09211e2530ab4905ec16edecc27022d6b247419d.tar.gz
lwn-09211e2530ab4905ec16edecc27022d6b247419d.zip
Merge tag 'perf-core-for-mingo-20160715' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Allow reading from a backward ring buffer (one setup via sys_perf_event_open() with perf_event_attr.write_backward = 1) (Wang Nan) Infrastructure changes: - Fix the build on Android NDK r12b (initially just for ARM), that is now port of my perf-build container collection and will get tested prior to sending patches upstream (Arnaldo Carvalho de Melo) - Add correct header for IPv6 definitions - Fix bitsperlong.h fallout (Arnaldo Carvalho de Melo, Peter Zijlstra) - Use base 0 (auto) in filename__read_ull(), so that we can handle hex values too (Jiri Olsa) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/include/asm-generic/bitsperlong.h23
-rw-r--r--tools/include/linux/compiler.h11
-rw-r--r--tools/lib/api/fd/array.h1
-rw-r--r--tools/lib/api/fs/fs.c7
-rw-r--r--tools/lib/traceevent/event-parse.c3
-rw-r--r--tools/objtool/Makefile2
-rw-r--r--tools/objtool/builtin-check.c2
-rw-r--r--tools/perf/Documentation/perf-record.txt22
-rw-r--r--tools/perf/builtin-record.c113
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/perf.h2
-rw-r--r--tools/perf/tests/backward-ring-buffer.c14
-rw-r--r--tools/perf/util/evlist.c269
-rw-r--r--tools/perf/util/evlist.h47
-rw-r--r--tools/perf/util/evsel.c16
-rw-r--r--tools/perf/util/evsel.h3
-rw-r--r--tools/perf/util/parse-events.c20
-rw-r--r--tools/perf/util/parse-events.h2
-rw-r--r--tools/perf/util/parse-events.l2
-rw-r--r--tools/perf/util/session.c22
-rw-r--r--tools/perf/util/sort.c8
-rw-r--r--tools/perf/util/util.h2
22 files changed, 441 insertions, 152 deletions
diff --git a/tools/include/asm-generic/bitsperlong.h b/tools/include/asm-generic/bitsperlong.h
index cfd661c6fc17..45eca517efb3 100644
--- a/tools/include/asm-generic/bitsperlong.h
+++ b/tools/include/asm-generic/bitsperlong.h
@@ -3,31 +3,12 @@
#include <uapi/asm-generic/bitsperlong.h>
-/*
- * In the kernel, where this file comes from, we can rely on CONFIG_64BIT,
- * here we have to make amends with what the various compilers provides us
- * to figure out if we're on a 64-bit machine...
- */
#ifdef __SIZEOF_LONG__
-# if __SIZEOF_LONG__ == 8
-# define CONFIG_64BIT
-# endif
+#define BITS_PER_LONG (__CHAR_BIT__ * __SIZEOF_LONG__)
#else
-# ifdef __WORDSIZE
-# if __WORDSIZE == 64
-# define CONFIG_64BIT
-# endif
-# else
-# error Failed to determine BITS_PER_LONG value
-# endif
+#define BITS_PER_LONG __WORDSIZE
#endif
-#ifdef CONFIG_64BIT
-#define BITS_PER_LONG 64
-#else
-#define BITS_PER_LONG 32
-#endif /* CONFIG_64BIT */
-
#if BITS_PER_LONG != __BITS_PER_LONG
#error Inconsistent word size. Check asm/bitsperlong.h
#endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index fa7208a32d76..e33fc1df3935 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -9,6 +9,17 @@
# define __always_inline inline __attribute__((always_inline))
#endif
+#ifdef __ANDROID__
+/*
+ * FIXME: Big hammer to get rid of tons of:
+ * "warning: always_inline function might not be inlinable"
+ *
+ * At least on android-ndk-r12/platforms/android-24/arch-arm
+ */
+#undef __always_inline
+#define __always_inline inline
+#endif
+
#define __user
#ifndef __attribute_const__
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index e87fd800fa8d..71287dddc05f 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -22,6 +22,7 @@ struct fdarray {
struct pollfd *entries;
union {
int idx;
+ void *ptr;
} *priv;
};
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 08556cf2c70d..ba7094b945ff 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -283,6 +283,11 @@ int filename__read_int(const char *filename, int *value)
return err;
}
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 0 for base, the strtoull detects the
+ * base automatically (see man strtoull).
+ */
int filename__read_ull(const char *filename, unsigned long long *value)
{
char line[64];
@@ -292,7 +297,7 @@ int filename__read_ull(const char *filename, unsigned long long *value)
return -1;
if (read(fd, line, sizeof(line)) > 0) {
- *value = strtoull(line, NULL, 10);
+ *value = strtoull(line, NULL, 0);
if (*value != ULLONG_MAX)
err = 0;
}
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 3a7bd175f73c..664c90c8e22b 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -23,6 +23,7 @@
* Frederic Weisbecker gave his permission to relicense the code to
* the Lesser General Public License.
*/
+#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -33,7 +34,7 @@
#include <limits.h>
#include <linux/string.h>
-#include <netinet/ip6.h>
+#include <netinet/in.h>
#include "event-parse.h"
#include "event-utils.h"
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 9a3110cac604..1f75b0a046cc 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -26,7 +26,7 @@ OBJTOOL_IN := $(OBJTOOL)-in.o
all: $(OBJTOOL)
-INCLUDES := -I$(srctree)/tools/include
+INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi
CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
LDFLAGS += -lelf $(LIBSUBCMD)
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 92d84b277032..4ed30f45c6da 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -664,7 +664,7 @@ static int add_func_switch_tables(struct objtool_file *file,
struct symbol *func)
{
struct instruction *insn, *prev_jump;
- struct rela *text_rela, *rodata_rela, *prev_rela;
+ struct rela *text_rela, *rodata_rela, *prev_rela = NULL;
int ret;
prev_jump = NULL;
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 5b46b1d1a37c..69966abf65d1 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -367,6 +367,28 @@ options.
'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
in config file is set to true.
+--tail-synthesize::
+Instead of collecting non-sample events (for example, fork, comm, mmap) at
+the beginning of record, collect them during finalizing an output file.
+The collected non-sample events reflects the status of the system when
+record is finished.
+
+--overwrite::
+Makes all events use an overwritable ring buffer. An overwritable ring
+buffer works like a flight recorder: when it gets full, the kernel will
+overwrite the oldest records, that thus will never make it to the
+perf.data file.
+
+When '--overwrite' and '--switch-output' are used perf records and drops
+events until it receives a signal, meaning that something unusual was
+detected that warrants taking a snapshot of the most current events,
+those fitting in the ring buffer at that moment.
+
+'overwrite' attribute can also be set or canceled for an event using
+config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.
+
+Implies --tail-synthesize.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d9f5cc3a3667..8f2c16d9275f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -119,11 +119,10 @@ backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
}
static int
-rb_find_range(struct perf_evlist *evlist,
- void *data, int mask, u64 head, u64 old,
- u64 *start, u64 *end)
+rb_find_range(void *data, int mask, u64 head, u64 old,
+ u64 *start, u64 *end, bool backward)
{
- if (!evlist->backward) {
+ if (!backward) {
*start = old;
*end = head;
return 0;
@@ -132,9 +131,10 @@ rb_find_range(struct perf_evlist *evlist,
return backward_rb_find_range(data, mask, head, start, end);
}
-static int record__mmap_read(struct record *rec, struct perf_evlist *evlist, int idx)
+static int
+record__mmap_read(struct record *rec, struct perf_mmap *md,
+ bool overwrite, bool backward)
{
- struct perf_mmap *md = &evlist->mmap[idx];
u64 head = perf_mmap__read_head(md);
u64 old = md->prev;
u64 end = head, start = old;
@@ -143,8 +143,8 @@ static int record__mmap_read(struct record *rec, struct perf_evlist *evlist, int
void *buf;
int rc = 0;
- if (rb_find_range(evlist, data, md->mask, head,
- old, &start, &end))
+ if (rb_find_range(data, md->mask, head,
+ old, &start, &end, backward))
return -1;
if (start == end)
@@ -157,7 +157,7 @@ static int record__mmap_read(struct record *rec, struct perf_evlist *evlist, int
WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
md->prev = head;
- perf_evlist__mmap_consume(evlist, idx);
+ perf_mmap__consume(md, overwrite || backward);
return 0;
}
@@ -182,7 +182,7 @@ static int record__mmap_read(struct record *rec, struct perf_evlist *evlist, int
}
md->prev = head;
- perf_evlist__mmap_consume(evlist, idx);
+ perf_mmap__consume(md, overwrite || backward);
out:
return rc;
}
@@ -498,20 +498,30 @@ static struct perf_event_header finished_round_event = {
.type = PERF_RECORD_FINISHED_ROUND,
};
-static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist)
+static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
+ bool backward)
{
u64 bytes_written = rec->bytes_written;
int i;
int rc = 0;
+ struct perf_mmap *maps;
if (!evlist)
return 0;
+ maps = backward ? evlist->backward_mmap : evlist->mmap;
+ if (!maps)
+ return 0;
+
+ if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
+ return 0;
+
for (i = 0; i < evlist->nr_mmaps; i++) {
- struct auxtrace_mmap *mm = &evlist->mmap[i].auxtrace_mmap;
+ struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
- if (evlist->mmap[i].base) {
- if (record__mmap_read(rec, evlist, i) != 0) {
+ if (maps[i].base) {
+ if (record__mmap_read(rec, &maps[i],
+ evlist->overwrite, backward) != 0) {
rc = -1;
goto out;
}
@@ -531,6 +541,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (bytes_written != rec->bytes_written)
rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
+ if (backward)
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
out:
return rc;
}
@@ -539,11 +551,11 @@ static int record__mmap_read_all(struct record *rec)
{
int err;
- err = record__mmap_read_evlist(rec, rec->evlist);
+ err = record__mmap_read_evlist(rec, rec->evlist, false);
if (err)
return err;
- return err;
+ return record__mmap_read_evlist(rec, rec->evlist, true);
}
static void record__init_features(struct record *rec)
@@ -592,13 +604,16 @@ record__finish_output(struct record *rec)
return;
}
-static int record__synthesize_workload(struct record *rec)
+static int record__synthesize_workload(struct record *rec, bool tail)
{
struct {
struct thread_map map;
struct thread_map_data map_data;
} thread_map;
+ if (rec->opts.tail_synthesize != tail)
+ return 0;
+
thread_map.map.nr = 1;
thread_map.map.map[0].pid = rec->evlist->workload.pid;
thread_map.map.map[0].comm = NULL;
@@ -609,7 +624,7 @@ static int record__synthesize_workload(struct record *rec)
rec->opts.proc_map_timeout);
}
-static int record__synthesize(struct record *rec);
+static int record__synthesize(struct record *rec, bool tail);
static int
record__switch_output(struct record *rec, bool at_exit)
@@ -620,6 +635,10 @@ record__switch_output(struct record *rec, bool at_exit)
/* Same Size: "2015122520103046"*/
char timestamp[] = "InvalidTimestamp";
+ record__synthesize(rec, true);
+ if (target__none(&rec->opts.target))
+ record__synthesize_workload(rec, true);
+
rec->samples = 0;
record__finish_output(rec);
err = fetch_current_timestamp(timestamp, sizeof(timestamp));
@@ -642,7 +661,7 @@ record__switch_output(struct record *rec, bool at_exit)
/* Output tracking events */
if (!at_exit) {
- record__synthesize(rec);
+ record__synthesize(rec, false);
/*
* In 'perf record --switch-output' without -a,
@@ -654,7 +673,7 @@ record__switch_output(struct record *rec, bool at_exit)
* perf_event__synthesize_thread_map() for those events.
*/
if (target__none(&rec->opts.target))
- record__synthesize_workload(rec);
+ record__synthesize_workload(rec, false);
}
return fd;
}
@@ -689,8 +708,12 @@ perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused
static const struct perf_event_mmap_page *
perf_evlist__pick_pc(struct perf_evlist *evlist)
{
- if (evlist && evlist->mmap && evlist->mmap[0].base)
- return evlist->mmap[0].base;
+ if (evlist) {
+ if (evlist->mmap && evlist->mmap[0].base)
+ return evlist->mmap[0].base;
+ if (evlist->backward_mmap && evlist->backward_mmap[0].base)
+ return evlist->backward_mmap[0].base;
+ }
return NULL;
}
@@ -704,7 +727,7 @@ static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
return NULL;
}
-static int record__synthesize(struct record *rec)
+static int record__synthesize(struct record *rec, bool tail)
{
struct perf_session *session = rec->session;
struct machine *machine = &session->machines.host;
@@ -714,6 +737,9 @@ static int record__synthesize(struct record *rec)
int fd = perf_data_file__fd(file);
int err = 0;
+ if (rec->opts.tail_synthesize != tail)
+ return 0;
+
if (file->is_pipe) {
err = perf_event__synthesize_attrs(tool, session,
process_synthesized_event);
@@ -877,7 +903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
machine = &session->machines.host;
- err = record__synthesize(rec);
+ err = record__synthesize(rec, false);
if (err < 0)
goto out_child;
@@ -937,6 +963,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
for (;;) {
unsigned long long hits = rec->samples;
+ /*
+ * rec->evlist->bkw_mmap_state is possible to be
+ * BKW_MMAP_EMPTY here: when done == true and
+ * hits != rec->samples in previous round.
+ *
+ * perf_evlist__toggle_bkw_mmap ensure we never
+ * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
+ */
+ if (trigger_is_hit(&switch_output_trigger) || done || draining)
+ perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
+
if (record__mmap_read_all(rec) < 0) {
trigger_error(&auxtrace_snapshot_trigger);
trigger_error(&switch_output_trigger);
@@ -956,8 +993,26 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
if (trigger_is_hit(&switch_output_trigger)) {
+ /*
+ * If switch_output_trigger is hit, the data in
+ * overwritable ring buffer should have been collected,
+ * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
+ *
+ * If SIGUSR2 raise after or during record__mmap_read_all(),
+ * record__mmap_read_all() didn't collect data from
+ * overwritable ring buffer. Read again.
+ */
+ if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
+ continue;
trigger_ready(&switch_output_trigger);
+ /*
+ * Reenable events in overwrite ring buffer after
+ * record__mmap_read_all(): we should have collected
+ * data from it.
+ */
+ perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
+
if (!quiet)
fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
waking);
@@ -1012,6 +1067,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (!quiet)
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
+ if (target__none(&rec->opts.target))
+ record__synthesize_workload(rec, true);
+
out_child:
if (forks) {
int exit_status;
@@ -1030,6 +1088,7 @@ out_child:
} else
status = err;
+ record__synthesize(rec, true);
/* this will be recalculated during process_buildids() */
rec->samples = 0;
@@ -1354,6 +1413,9 @@ struct option __record_options[] = {
OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
&record.opts.no_inherit_set,
"child tasks do not inherit counters"),
+ OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
+ "synthesize non-sample events at the end of output"),
+ OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
"number of mmap data pages and AUX area tracing mmap pages",
@@ -1564,6 +1626,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
}
}
+ if (record.opts.overwrite)
+ record.opts.tail_synthesize = true;
+
if (rec->evlist->nr_entries == 0 &&
perf_evlist__add_default(rec->evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 4b2ff021434c..64c06961bfe4 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -503,7 +503,7 @@ void pthread__unblock_sigwinch(void)
static void cache_line_size(int *cacheline_sizep)
{
if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
- perror("cannot determine cache line size");
+ pr_debug("cannot determine cache line size");
}
#endif
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cd8f1b150f9e..a7e0f1497244 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -59,6 +59,8 @@ struct record_opts {
bool record_switch_events;
bool all_kernel;
bool all_user;
+ bool tail_synthesize;
+ bool overwrite;
unsigned int freq;
unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages;
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index f20ea4c0d0cb..615780cbfe1d 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -31,8 +31,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count,
for (i = 0; i < evlist->nr_mmaps; i++) {
union perf_event *event;
- perf_evlist__mmap_read_catchup(evlist, i);
- while ((event = perf_evlist__mmap_read_backward(evlist, i)) != NULL) {
+ perf_mmap__read_catchup(&evlist->backward_mmap[i]);
+ while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) {
const u32 type = event->header.type;
switch (type) {
@@ -108,7 +108,11 @@ int test__backward_ring_buffer(int subtest __maybe_unused)
}
bzero(&parse_error, sizeof(parse_error));
- err = parse_events(evlist, "syscalls:sys_enter_prctl", &parse_error);
+ /*
+ * Set backward bit, ring buffer should be writing from end. Record
+ * it in aux evlist
+ */
+ err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error);
if (err) {
pr_debug("Failed to parse tracepoint event, try use root\n");
ret = TEST_SKIP;
@@ -117,10 +121,6 @@ int test__backward_ring_buffer(int subtest __maybe_unused)
perf_evlist__config(evlist, &opts, NULL);
- /* Set backward bit, ring buffer should be writing from end */
- evlist__for_each_entry(evlist, evsel)
- evsel->attr.write_backward = 1;
-
err = perf_evlist__open(evlist);
if (err < 0) {
pr_debug("perf_evlist__open: %s\n",
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 862e69c2690d..2a40b8e1def7 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -15,6 +15,7 @@
#include "evlist.h"
#include "evsel.h"
#include "debug.h"
+#include "asm/bug.h"
#include <unistd.h>
#include "parse-events.h"
@@ -27,8 +28,8 @@
#include <linux/log2.h>
#include <linux/err.h>
-static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
-static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
+static void perf_mmap__munmap(struct perf_mmap *map);
+static void perf_mmap__put(struct perf_mmap *map);
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
@@ -44,7 +45,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
perf_evlist__set_maps(evlist, cpus, threads);
fdarray__init(&evlist->pollfd, 64);
evlist->workload.pid = -1;
- evlist->backward = false;
+ evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
}
struct perf_evlist *perf_evlist__new(void)
@@ -122,6 +123,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
zfree(&evlist->mmap);
+ zfree(&evlist->backward_mmap);
fdarray__exit(&evlist->pollfd);
}
@@ -465,7 +467,8 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
return 0;
}
-static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent)
+static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+ struct perf_mmap *map, short revent)
{
int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
/*
@@ -473,7 +476,7 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx
* close the associated evlist->mmap[] entry.
*/
if (pos >= 0) {
- evlist->pollfd.priv[pos].idx = idx;
+ evlist->pollfd.priv[pos].ptr = map;
fcntl(fd, F_SETFL, O_NONBLOCK);
}
@@ -483,15 +486,16 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
{
- return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN);
+ return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
}
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
void *arg __maybe_unused)
{
- struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
+ struct perf_mmap *map = fda->priv[fd].ptr;
- perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
+ if (map)
+ perf_mmap__put(map);
}
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
@@ -688,8 +692,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
{
int i;
+ if (!evlist->backward_mmap)
+ return 0;
+
for (i = 0; i < evlist->nr_mmaps; i++) {
- int fd = evlist->mmap[i].fd;
+ int fd = evlist->backward_mmap[i].fd;
int err;
if (fd < 0)
@@ -701,12 +708,12 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
return 0;
}
-int perf_evlist__pause(struct perf_evlist *evlist)
+static int perf_evlist__pause(struct perf_evlist *evlist)
{
return perf_evlist__set_paused(evlist, true);
}
-int perf_evlist__resume(struct perf_evlist *evlist)
+static int perf_evlist__resume(struct perf_evlist *evlist)
{
return perf_evlist__set_paused(evlist, false);
}
@@ -781,9 +788,8 @@ broken_event:
return event;
}
-union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
+union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup)
{
- struct perf_mmap *md = &evlist->mmap[idx];
u64 head;
u64 old = md->prev;
@@ -795,13 +801,12 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int
head = perf_mmap__read_head(md);
- return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev);
+ return perf_mmap__read(md, check_messup, old, head, &md->prev);
}
union perf_event *
-perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
+perf_mmap__read_backward(struct perf_mmap *md)
{
- struct perf_mmap *md = &evlist->mmap[idx];
u64 head, end;
u64 start = md->prev;
@@ -836,16 +841,38 @@ perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
return perf_mmap__read(md, false, start, end, &md->prev);
}
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
{
- if (!evlist->backward)
- return perf_evlist__mmap_read_forward(evlist, idx);
- return perf_evlist__mmap_read_backward(evlist, idx);
+ struct perf_mmap *md = &evlist->mmap[idx];
+
+ /*
+ * Check messup is required for forward overwritable ring buffer:
+ * memory pointed by md->prev can be overwritten in this case.
+ * No need for read-write ring buffer: kernel stop outputting when
+ * it hit md->prev (perf_mmap__consume()).
+ */
+ return perf_mmap__read_forward(md, evlist->overwrite);
}
-void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
+union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
{
struct perf_mmap *md = &evlist->mmap[idx];
+
+ /*
+ * No need to check messup for backward ring buffer:
+ * We can always read arbitrary long data from a backward
+ * ring buffer unless we forget to pause it before reading.
+ */
+ return perf_mmap__read_backward(md);
+}
+
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+ return perf_evlist__mmap_read_forward(evlist, idx);
+}
+
+void perf_mmap__read_catchup(struct perf_mmap *md)
+{
u64 head;
if (!atomic_read(&md->refcnt))
@@ -855,38 +882,44 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
md->prev = head;
}
+void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
+{
+ perf_mmap__read_catchup(&evlist->mmap[idx]);
+}
+
static bool perf_mmap__empty(struct perf_mmap *md)
{
return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
}
-static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
+static void perf_mmap__get(struct perf_mmap *map)
{
- atomic_inc(&evlist->mmap[idx].refcnt);
+ atomic_inc(&map->refcnt);
}
-static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
+static void perf_mmap__put(struct perf_mmap *md)
{
- struct perf_mmap *md = &evlist->mmap[idx];
-
BUG_ON(md->base && atomic_read(&md->refcnt) == 0);
if (atomic_dec_and_test(&md->refcnt))
- __perf_evlist__munmap(evlist, idx);
+ perf_mmap__munmap(md);
}
-void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
+void perf_mmap__consume(struct perf_mmap *md, bool overwrite)
{
- struct perf_mmap *md = &evlist->mmap[idx];
-
- if (!evlist->overwrite) {
+ if (!overwrite) {
u64 old = md->prev;
perf_mmap__write_tail(md, old);
}
if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
- perf_evlist__mmap_put(evlist, idx);
+ perf_mmap__put(md);
+}
+
+void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
+{
+ perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite);
}
int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
@@ -917,44 +950,52 @@ void __weak auxtrace_mmap_params__set_idx(
{
}
-static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
+static void perf_mmap__munmap(struct perf_mmap *map)
{
- if (evlist->mmap[idx].base != NULL) {
- munmap(evlist->mmap[idx].base, evlist->mmap_len);
- evlist->mmap[idx].base = NULL;
- evlist->mmap[idx].fd = -1;
- atomic_set(&evlist->mmap[idx].refcnt, 0);
+ if (map->base != NULL) {
+ munmap(map->base, perf_mmap__mmap_len(map));
+ map->base = NULL;
+ map->fd = -1;
+ atomic_set(&map->refcnt, 0);
}
- auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
+ auxtrace_mmap__munmap(&map->auxtrace_mmap);
}
-void perf_evlist__munmap(struct perf_evlist *evlist)
+static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
{
int i;
- if (evlist->mmap == NULL)
- return;
+ if (evlist->mmap)
+ for (i = 0; i < evlist->nr_mmaps; i++)
+ perf_mmap__munmap(&evlist->mmap[i]);
- for (i = 0; i < evlist->nr_mmaps; i++)
- __perf_evlist__munmap(evlist, i);
+ if (evlist->backward_mmap)
+ for (i = 0; i < evlist->nr_mmaps; i++)
+ perf_mmap__munmap(&evlist->backward_mmap[i]);
+}
+void perf_evlist__munmap(struct perf_evlist *evlist)
+{
+ perf_evlist__munmap_nofree(evlist);
zfree(&evlist->mmap);
+ zfree(&evlist->backward_mmap);
}
-static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
+static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
{
int i;
+ struct perf_mmap *map;
evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
if (cpu_map__empty(evlist->cpus))
evlist->nr_mmaps = thread_map__nr(evlist->threads);
- evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
- if (!evlist->mmap)
- return -ENOMEM;
+ map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+ if (!map)
+ return NULL;
for (i = 0; i < evlist->nr_mmaps; i++)
- evlist->mmap[i].fd = -1;
- return 0;
+ map[i].fd = -1;
+ return map;
}
struct mmap_params {
@@ -963,8 +1004,8 @@ struct mmap_params {
struct auxtrace_mmap_params auxtrace_mp;
};
-static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
- struct mmap_params *mp, int fd)
+static int perf_mmap__mmap(struct perf_mmap *map,
+ struct mmap_params *mp, int fd)
{
/*
* The last one will be done at perf_evlist__mmap_consume(), so that we
@@ -979,21 +1020,21 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
* evlist layer can't just drop it when filtering events in
* perf_evlist__filter_pollfd().
*/
- atomic_set(&evlist->mmap[idx].refcnt, 2);
- evlist->mmap[idx].prev = 0;
- evlist->mmap[idx].mask = mp->mask;
- evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
- MAP_SHARED, fd, 0);
- if (evlist->mmap[idx].base == MAP_FAILED) {
+ atomic_set(&map->refcnt, 2);
+ map->prev = 0;
+ map->mask = mp->mask;
+ map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
+ MAP_SHARED, fd, 0);
+ if (map->base == MAP_FAILED) {
pr_debug2("failed to mmap perf event ring buffer, error %d\n",
errno);
- evlist->mmap[idx].base = NULL;
+ map->base = NULL;
return -1;
}
- evlist->mmap[idx].fd = fd;
+ map->fd = fd;
- if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
- &mp->auxtrace_mp, evlist->mmap[idx].base, fd))
+ if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
+ &mp->auxtrace_mp, map->base, fd))
return -1;
return 0;
@@ -1003,23 +1044,36 @@ static bool
perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
struct perf_evsel *evsel)
{
- if (evsel->overwrite)
+ if (evsel->attr.write_backward)
return false;
return true;
}
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int cpu,
- int thread, int *output)
+ int thread, int *_output, int *_output_backward)
{
struct perf_evsel *evsel;
int revent;
evlist__for_each_entry(evlist, evsel) {
+ struct perf_mmap *maps = evlist->mmap;
+ int *output = _output;
int fd;
- if (evsel->overwrite != (evlist->overwrite && evlist->backward))
- continue;
+ if (evsel->attr.write_backward) {
+ output = _output_backward;
+ maps = evlist->backward_mmap;
+
+ if (!maps) {
+ maps = perf_evlist__alloc_mmap(evlist);
+ if (!maps)
+ return -1;
+ evlist->backward_mmap = maps;
+ if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+ }
+ }
if (evsel->system_wide && thread)
continue;
@@ -1028,13 +1082,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
if (*output == -1) {
*output = fd;
- if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
+
+ if (perf_mmap__mmap(&maps[idx], mp, *output) < 0)
return -1;
} else {
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1;
- perf_evlist__mmap_get(evlist, idx);
+ perf_mmap__get(&maps[idx]);
}
revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
@@ -1047,8 +1102,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
* Therefore don't add it for polling.
*/
if (!evsel->system_wide &&
- __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) {
- perf_evlist__mmap_put(evlist, idx);
+ __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
+ perf_mmap__put(&maps[idx]);
return -1;
}
@@ -1074,13 +1129,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per cpu\n");
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
+ int output_backward = -1;
auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
true);
for (thread = 0; thread < nr_threads; thread++) {
if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
- thread, &output))
+ thread, &output, &output_backward))
goto out_unmap;
}
}
@@ -1088,8 +1144,7 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
return 0;
out_unmap:
- for (cpu = 0; cpu < nr_cpus; cpu++)
- __perf_evlist__munmap(evlist, cpu);
+ perf_evlist__munmap_nofree(evlist);
return -1;
}
@@ -1102,20 +1157,20 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
pr_debug2("perf event ring buffer mmapped per thread\n");
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
+ int output_backward = -1;
auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
false);
if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
- &output))
+ &output, &output_backward))
goto out_unmap;
}
return 0;
out_unmap:
- for (thread = 0; thread < nr_threads; thread++)
- __perf_evlist__munmap(evlist, thread);
+ perf_evlist__munmap_nofree(evlist);
return -1;
}
@@ -1248,7 +1303,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
.prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
};
- if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
+ if (!evlist->mmap)
+ evlist->mmap = perf_evlist__alloc_mmap(evlist);
+ if (!evlist->mmap)
return -ENOMEM;
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
@@ -1919,3 +1976,61 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
return NULL;
}
+
+void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
+ enum bkw_mmap_state state)
+{
+ enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
+ enum action {
+ NONE,
+ PAUSE,
+ RESUME,
+ } action = NONE;
+
+ if (!evlist->backward_mmap)
+ return;
+
+ switch (old_state) {
+ case BKW_MMAP_NOTREADY: {
+ if (state != BKW_MMAP_RUNNING)
+ goto state_err;;
+ break;
+ }
+ case BKW_MMAP_RUNNING: {
+ if (state != BKW_MMAP_DATA_PENDING)
+ goto state_err;
+ action = PAUSE;
+ break;
+ }
+ case BKW_MMAP_DATA_PENDING: {
+ if (state != BKW_MMAP_EMPTY)
+ goto state_err;
+ break;
+ }
+ case BKW_MMAP_EMPTY: {
+ if (state != BKW_MMAP_RUNNING)
+ goto state_err;
+ action = RESUME;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Shouldn't get there\n");
+ }
+
+ evlist->bkw_mmap_state = state;
+
+ switch (action) {
+ case PAUSE:
+ perf_evlist__pause(evlist);
+ break;
+ case RESUME:
+ perf_evlist__resume(evlist);
+ break;
+ case NONE:
+ default:
+ break;
+ }
+
+state_err:
+ return;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index afd087761a47..4fd034f22d2f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -35,6 +35,40 @@ struct perf_mmap {
char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
};
+static inline size_t
+perf_mmap__mmap_len(struct perf_mmap *map)
+{
+ return map->mask + 1 + page_size;
+}
+
+/*
+ * State machine of bkw_mmap_state:
+ *
+ * .________________(forbid)_____________.
+ * | V
+ * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ * ^ ^ | ^ |
+ * | |__(forbid)____/ |___(forbid)___/|
+ * | |
+ * \_________________(3)_______________/
+ *
+ * NOTREADY : Backward ring buffers are not ready
+ * RUNNING : Backward ring buffers are recording
+ * DATA_PENDING : We are required to collect data from backward ring buffers
+ * EMPTY : We have collected data from backward ring buffers.
+ *
+ * (0): Setup backward ring buffer
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum bkw_mmap_state {
+ BKW_MMAP_NOTREADY,
+ BKW_MMAP_RUNNING,
+ BKW_MMAP_DATA_PENDING,
+ BKW_MMAP_EMPTY,
+};
+
struct perf_evlist {
struct list_head entries;
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
@@ -44,17 +78,18 @@ struct perf_evlist {
bool overwrite;
bool enabled;
bool has_user_cpus;
- bool backward;
size_t mmap_len;
int id_pos;
int is_pos;
u64 combined_sample_type;
+ enum bkw_mmap_state bkw_mmap_state;
struct {
int cork_fd;
pid_t pid;
} workload;
struct fdarray pollfd;
struct perf_mmap *mmap;
+ struct perf_mmap *backward_mmap;
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *selected;
@@ -129,6 +164,14 @@ struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
+void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state);
+
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup);
+union perf_event *perf_mmap__read_backward(struct perf_mmap *map);
+
+void perf_mmap__read_catchup(struct perf_mmap *md);
+void perf_mmap__consume(struct perf_mmap *md, bool overwrite);
+
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
@@ -139,8 +182,6 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
-int perf_evlist__pause(struct perf_evlist *evlist);
-int perf_evlist__resume(struct perf_evlist *evlist);
int perf_evlist__open(struct perf_evlist *evlist);
void perf_evlist__close(struct perf_evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ba0f59fa3d5d..8c54df61fe64 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -695,6 +695,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
*/
attr->inherit = term->val.inherit ? 1 : 0;
break;
+ case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
+ attr->write_backward = term->val.overwrite ? 1 : 0;
+ break;
default:
break;
}
@@ -776,6 +779,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
attr->inherit = !opts->no_inherit;
+ attr->write_backward = opts->overwrite ? 1 : 0;
perf_evsel__set_sample_bit(evsel, IP);
perf_evsel__set_sample_bit(evsel, TID);
@@ -1377,6 +1381,9 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
int pid = -1, err;
enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
+ if (perf_missing_features.write_backward && evsel->attr.write_backward)
+ return -EINVAL;
+
if (evsel->system_wide)
nthreads = 1;
else
@@ -1407,11 +1414,6 @@ fallback_missing_features:
if (perf_missing_features.lbr_flags)
evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
PERF_SAMPLE_BRANCH_NO_CYCLES);
- if (perf_missing_features.write_backward) {
- if (evsel->overwrite)
- return -EINVAL;
- evsel->attr.write_backward = false;
- }
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
@@ -1513,7 +1515,7 @@ try_fallback:
*/
if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
perf_missing_features.write_backward = true;
- goto fallback_missing_features;
+ goto out_close;
} else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
perf_missing_features.clockid_wrong = true;
goto fallback_missing_features;
@@ -2422,7 +2424,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
"We found oprofile daemon running, please stop it and try again.");
break;
case EINVAL:
- if (evsel->overwrite && perf_missing_features.write_backward)
+ if (evsel->attr.write_backward && perf_missing_features.write_backward)
return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
if (perf_missing_features.clockid)
return scnprintf(msg, size, "clockid feature not supported.");
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d73391e8740e..8a4a6c9f1480 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -45,6 +45,7 @@ enum {
PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_INHERIT,
PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+ PERF_EVSEL__CONFIG_TERM_OVERWRITE,
PERF_EVSEL__CONFIG_TERM_MAX,
};
@@ -59,6 +60,7 @@ struct perf_evsel_config_term {
u64 stack_user;
int max_stack;
bool inherit;
+ bool overwrite;
} val;
};
@@ -114,7 +116,6 @@ struct perf_evsel {
bool tracking;
bool per_pkg;
bool precise_max;
- bool overwrite;
/* parse modifier helper */
int exclude_GH;
int nr_members;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 375af0e02831..6c913c3914fb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -902,6 +902,8 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
[PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
[PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
[PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
+ [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
+ [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
};
static bool config_term_shrinked;
@@ -994,6 +996,12 @@ do { \
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
CHECK_TYPE_VAL(NUM);
break;
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ CHECK_TYPE_VAL(NUM);
+ break;
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
@@ -1046,6 +1054,8 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_INHERIT:
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
return config_term_common(attr, term, err);
default:
if (err) {
@@ -1118,6 +1128,12 @@ do { \
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
break;
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1);
+ break;
default:
break;
}
@@ -2412,9 +2428,9 @@ static void config_terms_list(char *buf, size_t buf_sz)
char *parse_events_formats_error_string(char *additional_terms)
{
char *str;
- /* "branch_type" is the longest name */
+ /* "no-overwrite" is the longest name */
char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
- (sizeof("branch_type") - 1)];
+ (sizeof("no-overwrite") - 1)];
config_terms_list(static_terms, sizeof(static_terms));
/* valid terms */
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index b4aa7eb2df73..d1edbf8cc66a 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -69,6 +69,8 @@ enum {
PARSE_EVENTS__TERM_TYPE_NOINHERIT,
PARSE_EVENTS__TERM_TYPE_INHERIT,
PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+ PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
+ PARSE_EVENTS__TERM_TYPE_OVERWRITE,
__PARSE_EVENTS__TERM_TYPE_NR,
};
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 3c15b33b2e84..7a2519435da0 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -202,6 +202,8 @@ stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
+overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
+no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 078d49626494..5d61242a6e64 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1499,10 +1499,27 @@ int perf_session__register_idle_thread(struct perf_session *session)
return err;
}
+static void
+perf_session__warn_order(const struct perf_session *session)
+{
+ const struct ordered_events *oe = &session->ordered_events;
+ struct perf_evsel *evsel;
+ bool should_warn = true;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->attr.write_backward)
+ should_warn = false;
+ }
+
+ if (!should_warn)
+ return;
+ if (oe->nr_unordered_events != 0)
+ ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+}
+
static void perf_session__warn_about_errors(const struct perf_session *session)
{
const struct events_stats *stats = &session->evlist->stats;
- const struct ordered_events *oe = &session->ordered_events;
if (session->tool->lost == perf_event__process_lost &&
stats->nr_events[PERF_RECORD_LOST] != 0) {
@@ -1559,8 +1576,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
stats->nr_unprocessable_samples);
}
- if (oe->nr_unordered_events != 0)
- ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+ perf_session__warn_order(session);
events_stats__auxtrace_error_warn(stats);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5854b4660a49..947d21f38398 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2381,6 +2381,9 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
if (sort__mode != SORT_MODE__MEMORY)
return -EINVAL;
+ if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0)
+ return -EINVAL;
+
if (sd->entry == &sort_mem_daddr_sym)
list->sym = 1;
@@ -2424,7 +2427,10 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
if (*tok) {
ret = sort_dimension__add(list, tok, evlist, level);
if (ret == -EINVAL) {
- error("Invalid --sort key: `%s'", tok);
+ if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
+ error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+ else
+ error("Invalid --sort key: `%s'", tok);
break;
} else if (ret == -ESRCH) {
error("Unknown --sort key: `%s'", tok);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 6178cab82374..843cbba8f9d3 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -360,7 +360,7 @@ void print_binary(unsigned char *data, size_t len,
size_t bytes_per_line, print_binary_t printer,
void *extra);
-#ifndef __GLIBC__
+#if !defined(__GLIBC__) && !defined(__ANDROID__)
extern int sched_getcpu(void);
#endif