summaryrefslogtreecommitdiff
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-02-11 14:35:35 +1100
committerPaul Mackerras <paulus@samba.org>2009-02-11 15:06:59 +1100
commit0475f9ea8e2cc030298908949e0d5da9f2fc2cfe (patch)
treeeb2585d92e00ae4c7fc7e0654ffacde7e8a57e1c /kernel/perf_counter.c
parentd278c48435625cb6b7edcf6a547620768b175709 (diff)
downloadlwn-0475f9ea8e2cc030298908949e0d5da9f2fc2cfe.tar.gz
lwn-0475f9ea8e2cc030298908949e0d5da9f2fc2cfe.zip
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature This extends the perf_counter_hw_event struct with bits that specify that events in user, kernel and/or hypervisor mode should not be counted (i.e. should be excluded), and adds code to program the PMU mode selection bits accordingly on x86 and powerpc. For software counters, we don't currently have the infrastructure to distinguish which mode an event occurs in, so we currently fail the counter initialization if the setting of the hw_event.exclude_* bits would require us to distinguish. Context switches and CPU migrations are currently considered to occur in kernel mode. On x86, this changes the previous policy that only root can count kernel events. Now non-root users can count kernel events or exclude them. Non-root users still can't use NMI events, though. On x86 we don't appear to have any way to control whether hypervisor events are counted or not, so hw_event.exclude_hv is ignored. On powerpc, the selection of whether to count events in user, kernel and/or hypervisor mode is PMU-wide, not per-counter, so this adds a check that the hw_event.exclude_* settings are the same as other events on the PMU. Counters being added to a group have to have the same settings as the other hardware counters in the group. Counters and groups can only be enabled in hw_perf_group_sched_in or power_perf_enable if they have the same settings as any other counters already on the PMU. If we are not running on a hypervisor, the exclude_hv setting is ignored (by forcing it to 0) since we can't ever get any hypervisor events. Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 544193cbc478..89d5e3fe9700 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1567,11 +1567,25 @@ sw_perf_counter_init(struct perf_counter *counter)
{
const struct hw_perf_counter_ops *hw_ops = NULL;
+ /*
+ * Software counters (currently) can't in general distinguish
+ * between user, kernel and hypervisor events.
+ * However, context switches and cpu migrations are considered
+ * to be kernel events, and page faults are never hypervisor
+ * events.
+ */
switch (counter->hw_event.type) {
case PERF_COUNT_CPU_CLOCK:
- hw_ops = &perf_ops_cpu_clock;
+ if (!(counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel ||
+ counter->hw_event.exclude_hv))
+ hw_ops = &perf_ops_cpu_clock;
break;
case PERF_COUNT_TASK_CLOCK:
+ if (counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel ||
+ counter->hw_event.exclude_hv)
+ break;
/*
* If the user instantiates this as a per-cpu counter,
* use the cpu_clock counter instead.
@@ -1582,13 +1596,17 @@ sw_perf_counter_init(struct perf_counter *counter)
hw_ops = &perf_ops_cpu_clock;
break;
case PERF_COUNT_PAGE_FAULTS:
- hw_ops = &perf_ops_page_faults;
+ if (!(counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel))
+ hw_ops = &perf_ops_page_faults;
break;
case PERF_COUNT_CONTEXT_SWITCHES:
- hw_ops = &perf_ops_context_switches;
+ if (!counter->hw_event.exclude_kernel)
+ hw_ops = &perf_ops_context_switches;
break;
case PERF_COUNT_CPU_MIGRATIONS:
- hw_ops = &perf_ops_cpu_migrations;
+ if (!counter->hw_event.exclude_kernel)
+ hw_ops = &perf_ops_cpu_migrations;
break;
default:
break;