diff options
author | Stephane Eranian <eranian@google.com> | 2011-06-06 16:57:03 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-07-01 11:06:36 +0200 |
commit | efc9f05df2dd171280dcb736a4d973ffefd5508e (patch) | |
tree | ccc1cee8f1cc0ad5391732eb3637b685b4b155a0 /arch/x86/kernel/cpu/perf_event.c | |
parent | a7ac67ea021b4603095d2aa458bc41641238f22c (diff) | |
download | lwn-efc9f05df2dd171280dcb736a4d973ffefd5508e.tar.gz lwn-efc9f05df2dd171280dcb736a4d973ffefd5508e.zip |
perf_events: Update Intel extra regs shared constraints management
This patch improves the code managing the extra shared registers
used for offcore_response events on Intel Nehalem/Westmere. The
idea is to use static allocation instead of dynamic allocation.
This simplifies greatly the get and put constraint routines for
those events.
The patch also renames per_core to shared_regs because the same
data structure gets used whether or not HT is on. When HT is
off, those events still need to coordination because they use
a extra MSR that has to be shared within an event group.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110606145703.GA7258@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 78 |
1 files changed, 61 insertions, 17 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5b86ec51534c..019fda7489e7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -45,6 +45,29 @@ do { \ #endif /* + * | NHM/WSM | SNB | + * register ------------------------------- + * | HT | no HT | HT | no HT | + *----------------------------------------- + * offcore | core | core | cpu | core | + * lbr_sel | core | core | cpu | core | + * ld_lat | cpu | core | cpu | core | + *----------------------------------------- + * + * Given that there is a small number of shared regs, + * we can pre-allocate their slot in the per-cpu + * per-core reg tables. + */ +enum extra_reg_type { + EXTRA_REG_NONE = -1, /* not used */ + + EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ + EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + + EXTRA_REG_MAX /* number of entries needed */ +}; + +/* * best effort, GUP based copy_from_user() that assumes IRQ or NMI context */ static unsigned long @@ -132,11 +155,10 @@ struct cpu_hw_events { struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; /* - * Intel percore register state. - * Coordinate shared resources between HT threads. + * manage shared (per-core, per-cpu) registers + * used on Intel NHM/WSM/SNB */ - int percore_used; /* Used by this CPU? */ - struct intel_percore *per_core; + struct intel_shared_regs *shared_regs; /* * AMD specific bits @@ -187,26 +209,45 @@ struct cpu_hw_events { for ((e) = (c); (e)->weight; (e)++) /* + * Per register state. + */ +struct er_account { + raw_spinlock_t lock; /* per-core: protect structure */ + u64 config; /* extra MSR config */ + u64 reg; /* extra MSR number */ + atomic_t ref; /* reference count */ +}; + +/* * Extra registers for specific events. + * * Some events need large masks and require external MSRs. - * Define a mapping to these extra registers. + * Those extra MSRs end up being shared for all events on + * a PMU and sometimes between PMU of sibling HT threads. + * In either case, the kernel needs to handle conflicting + * accesses to those extra, shared, regs. The data structure + * to manage those registers is stored in cpu_hw_event. */ struct extra_reg { unsigned int event; unsigned int msr; u64 config_mask; u64 valid_mask; + int idx; /* per_xxx->regs[] reg index */ }; -#define EVENT_EXTRA_REG(e, ms, m, vm) { \ +#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ .event = (e), \ .msr = (ms), \ .config_mask = (m), \ .valid_mask = (vm), \ + .idx = EXTRA_REG_##i \ } -#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ - EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) -#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) + +#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) + +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) union perf_capabilities { struct { @@ -253,7 +294,6 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; - struct event_constraint *percore_constraints; void (*quirks)(void); int perfctr_second_write; @@ -400,10 +440,10 @@ static inline unsigned int x86_pmu_event_addr(int index) */ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) { + struct hw_perf_event_extra *reg; struct extra_reg *er; - event->hw.extra_reg = 0; - event->hw.extra_config = 0; + reg = &event->hw.extra_reg; if (!x86_pmu.extra_regs) return 0; @@ -413,8 +453,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; - event->hw.extra_reg = er->msr; - event->hw.extra_config = event->attr.config1; + + reg->idx = er->idx; + reg->config = event->attr.config1; + reg->reg = er->msr; break; } return 0; @@ -713,6 +755,9 @@ static int __x86_pmu_event_init(struct perf_event *event) event->hw.last_cpu = -1; event->hw.last_tag = ~0ULL; + /* mark unused */ + event->hw.extra_reg.idx = EXTRA_REG_NONE; + return x86_pmu.hw_config(event); } @@ -754,8 +799,8 @@ static void x86_pmu_disable(struct pmu *pmu) static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { - if (hwc->extra_reg) - wrmsrl(hwc->extra_reg, hwc->extra_config); + if (hwc->extra_reg.reg) + wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); wrmsrl(hwc->config_base, hwc->config | enable_mask); } @@ -1692,7 +1737,6 @@ static int validate_group(struct perf_event *event) fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); if (!fake_cpuc) goto out; - /* * the event is not yet connected with its * siblings therefore we must first collect |