summaryrefslogtreecommitdiff
path: root/arch/x86/events/intel/pt.c
diff options
context:
space:
mode:
authorAlexander Shishkin <alexander.shishkin@linux.intel.com>2019-11-05 10:27:01 +0200
committerIngo Molnar <mingo@kernel.org>2019-11-13 11:06:18 +0100
commit295c52ee1485e4dee660fc1a0e6ceed6c803c9d3 (patch)
treef7d128e0f95aa214fb7edd20b565eaae1577f16c /arch/x86/events/intel/pt.c
parent670638477aede0d7a355ced04b569214aa3feacd (diff)
downloadlwn-295c52ee1485e4dee660fc1a0e6ceed6c803c9d3.tar.gz
lwn-295c52ee1485e4dee660fc1a0e6ceed6c803c9d3.zip
perf/x86/intel/pt: Prevent redundant WRMSRs
With recent optimizations to AUX and PT buffer management code (high order AUX allocations, opportunistic Single Range Output), it is far more likely now that the output MSRs won't need reprogramming on every sched-in. To avoid needless WRMSRs of those registers, cache their values and only write them when needed. Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: https://lkml.kernel.org/r/20191105082701.78442-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/events/intel/pt.c')
-rw-r--r--arch/x86/events/intel/pt.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index c87d163c2917..1db7a51d9792 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -606,6 +606,7 @@ static inline phys_addr_t topa_pfn(struct topa *topa)
static void pt_config_buffer(struct pt_buffer *buf)
{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 reg, mask;
void *base;
@@ -617,11 +618,17 @@ static void pt_config_buffer(struct pt_buffer *buf)
mask = (u64)buf->cur_idx;
}
- wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(base));
+ reg = virt_to_phys(base);
+ if (pt->output_base != reg) {
+ pt->output_base = reg;
+ wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
+ }
reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
-
- wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+ if (pt->output_mask != reg) {
+ pt->output_mask = reg;
+ wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+ }
}
/**
@@ -930,21 +937,21 @@ static void pt_handle_status(struct pt *pt)
*/
static void pt_read_offset(struct pt_buffer *buf)
{
- u64 offset, base;
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
struct topa_page *tp;
if (!buf->single) {
- rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base);
- tp = phys_to_virt(base);
+ rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
+ tp = phys_to_virt(pt->output_base);
buf->cur = &tp->topa;
}
- rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+ rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
/* offset within current output region */
- buf->output_off = offset >> 32;
+ buf->output_off = pt->output_mask >> 32;
/* index of current output region within this table */
if (!buf->single)
- buf->cur_idx = (offset & 0xffffff80) >> 7;
+ buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
}
static struct topa_entry *