diff options
author | Will Deacon <will.deacon@arm.com> | 2018-07-27 14:39:04 +0100 |
---|---|---|
committer | Will Deacon <will.deacon@arm.com> | 2018-07-27 14:39:04 +0100 |
commit | ba70ffa7d20d771ae47a1597799da84980aafe15 (patch) | |
tree | a25f393d7c97683673f12396369a8a6f95e136dc | |
parent | c5157101e7793b42a56e07368c7f4cb73fb58008 (diff) | |
parent | 809092dc3e606f3508b53baa624b27bfff8f0e7f (diff) | |
download | lwn-ba70ffa7d20d771ae47a1597799da84980aafe15.tar.gz lwn-ba70ffa7d20d771ae47a1597799da84980aafe15.zip |
Merge branch 'for-next/perf' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into aarch64/for-next/core
Pull in arm perf updates, including support for 64-bit (chained) event
counters and some non-critical fixes for some of the system PMU drivers.
Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r-- | arch/arm/kernel/perf_event_v6.c | 14 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 15 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_xscale.c | 18 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_event.c | 251 | ||||
-rw-r--r-- | drivers/perf/arm-cci.c | 38 | ||||
-rw-r--r-- | drivers/perf/arm-ccn.c | 14 | ||||
-rw-r--r-- | drivers/perf/arm_pmu.c | 38 | ||||
-rw-r--r-- | drivers/perf/arm_pmu_platform.c | 2 | ||||
-rw-r--r-- | drivers/perf/hisilicon/hisi_uncore_pmu.c | 12 | ||||
-rw-r--r-- | include/linux/perf/arm_pmu.h | 11 |
10 files changed, 302 insertions, 111 deletions
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index be42c4f66a40..1ae99deeec54 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -233,7 +233,7 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr, return ret; } -static inline u32 armv6pmu_read_counter(struct perf_event *event) +static inline u64 armv6pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -251,7 +251,7 @@ static inline u32 armv6pmu_read_counter(struct perf_event *event) return value; } -static inline void armv6pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv6pmu_write_counter(struct perf_event *event, u64 value) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -411,6 +411,12 @@ armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, } } +static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + static void armv6pmu_disable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; @@ -491,11 +497,11 @@ static void armv6pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv6pmu_read_counter; cpu_pmu->write_counter = armv6pmu_write_counter; cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; cpu_pmu->start = armv6pmu_start; cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; } static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) @@ -542,11 +548,11 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv6pmu_read_counter; cpu_pmu->write_counter = armv6pmu_write_counter; cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; cpu_pmu->start = armv6pmu_start; cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6mpcore_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 57f01e059f39..a4fb0f8b8f84 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -743,7 +743,7 @@ static inline void armv7_pmnc_select_counter(int idx) isb(); } -static inline u32 armv7pmu_read_counter(struct perf_event *event) +static inline u64 armv7pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -763,7 +763,7 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event) return value; } -static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv7pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -1058,6 +1058,12 @@ static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + /* * Add an event filter to a given event. This will only work for PMUv2 PMUs. */ @@ -1167,10 +1173,10 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv7pmu_read_counter; cpu_pmu->write_counter = armv7pmu_write_counter; cpu_pmu->get_event_idx = armv7pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx; cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; - cpu_pmu->max_period = (1LLU << 32) - 1; }; static void armv7_read_num_pmnc_events(void *info) @@ -1638,6 +1644,7 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, bool venum_event = EVENT_VENUM(hwc->config_base); bool krait_event = EVENT_CPU(hwc->config_base); + armv7pmu_clear_event_idx(cpuc, event); if (venum_event || krait_event) { bit = krait_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); @@ -1967,6 +1974,7 @@ static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, bool venum_event = EVENT_VENUM(hwc->config_base); bool scorpion_event = EVENT_CPU(hwc->config_base); + armv7pmu_clear_event_idx(cpuc, event); if (venum_event || scorpion_event) { bit = scorpion_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); @@ -2030,6 +2038,7 @@ static struct platform_driver armv7_pmu_driver = { .driver = { .name = "armv7-pmu", .of_match_table = armv7_pmu_of_device_ids, + .suppress_bind_attrs = true, }, .probe = armv7_pmu_device_probe, }; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 88d1a76f5367..f6cdcacfb96d 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -292,6 +292,12 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, } } +static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + static void xscale1pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; @@ -316,7 +322,7 @@ static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 xscale1pmu_read_counter(struct perf_event *event) +static inline u64 xscale1pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -337,7 +343,7 @@ static inline u32 xscale1pmu_read_counter(struct perf_event *event) return val; } -static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val) +static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -370,11 +376,11 @@ static int xscale1pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = xscale1pmu_read_counter; cpu_pmu->write_counter = xscale1pmu_write_counter; cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; cpu_pmu->start = xscale1pmu_start; cpu_pmu->stop = xscale1pmu_stop; cpu_pmu->map_event = xscale_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } @@ -679,7 +685,7 @@ static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 xscale2pmu_read_counter(struct perf_event *event) +static inline u64 xscale2pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -706,7 +712,7 @@ static inline u32 xscale2pmu_read_counter(struct perf_event *event) return val; } -static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val) +static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -739,11 +745,11 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = xscale2pmu_read_counter; cpu_pmu->write_counter = xscale2pmu_write_counter; cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; cpu_pmu->start = xscale2pmu_start; cpu_pmu->stop = xscale2pmu_stop; cpu_pmu->map_event = xscale_map_event; cpu_pmu->num_events = 5; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 33147aacdafd..dfff5ed5c625 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -446,9 +446,16 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { }; PMU_FORMAT_ATTR(event, "config:0-15"); +PMU_FORMAT_ATTR(long, "config1:0"); + +static inline bool armv8pmu_event_is_64bit(struct perf_event *event) +{ + return event->attr.config1 & 0x1; +} static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, + &format_attr_long.attr, NULL, }; @@ -466,6 +473,21 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) /* + * We must chain two programmable counters for 64 bit events, + * except when we have allocated the 64bit cycle counter (for CPU + * cycles event). This must be called only when the event has + * a counter allocated. + */ +static inline bool armv8pmu_event_is_chained(struct perf_event *event) +{ + int idx = event->hw.idx; + + return !WARN_ON(idx < 0) && + armv8pmu_event_is_64bit(event) && + (idx != ARMV8_IDX_CYCLE_COUNTER); +} + +/* * ARMv8 low level PMU access */ @@ -503,34 +525,68 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline int armv8pmu_select_counter(int idx) +static inline void armv8pmu_select_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); write_sysreg(counter, pmselr_el0); isb(); +} - return idx; +static inline u32 armv8pmu_read_evcntr(int idx) +{ + armv8pmu_select_counter(idx); + return read_sysreg(pmxevcntr_el0); +} + +static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + u64 val = 0; + + val = armv8pmu_read_evcntr(idx); + if (armv8pmu_event_is_chained(event)) + val = (val << 32) | armv8pmu_read_evcntr(idx - 1); + return val; } -static inline u32 armv8pmu_read_counter(struct perf_event *event) +static inline u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u32 value = 0; + u64 value = 0; if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); - else if (armv8pmu_select_counter(idx) == idx) - value = read_sysreg(pmxevcntr_el0); + else + value = armv8pmu_read_hw_counter(event); return value; } -static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv8pmu_write_evcntr(int idx, u32 value) +{ + armv8pmu_select_counter(idx); + write_sysreg(value, pmxevcntr_el0); +} + +static inline void armv8pmu_write_hw_counter(struct perf_event *event, + u64 value) +{ + int idx = event->hw.idx; + + if (armv8pmu_event_is_chained(event)) { + armv8pmu_write_evcntr(idx, upper_32_bits(value)); + armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); + } else { + armv8pmu_write_evcntr(idx, value); + } +} + +static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -541,22 +597,43 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) { /* - * Set the upper 32bits as this is a 64bit counter but we only - * count using the lower 32bits and we want an interrupt when - * it overflows. + * The cycles counter is really a 64-bit counter. + * When treating it as a 32-bit counter, we only count + * the lower 32 bits, and set the upper 32-bits so that + * we get an interrupt upon 32-bit overflow. */ - u64 value64 = 0xffffffff00000000ULL | value; - - write_sysreg(value64, pmccntr_el0); - } else if (armv8pmu_select_counter(idx) == idx) - write_sysreg(value, pmxevcntr_el0); + if (!armv8pmu_event_is_64bit(event)) + value |= 0xffffffff00000000ULL; + write_sysreg(value, pmccntr_el0); + } else + armv8pmu_write_hw_counter(event, value); } static inline void armv8pmu_write_evtype(int idx, u32 val) { - if (armv8pmu_select_counter(idx) == idx) { - val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); + armv8pmu_select_counter(idx); + val &= ARMV8_PMU_EVTYPE_MASK; + write_sysreg(val, pmxevtyper_el0); +} + +static inline void armv8pmu_write_event_type(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * For chained events, the low counter is programmed to count + * the event of interest and the high counter is programmed + * with CHAIN event code with filters set to count at all ELs. + */ + if (armv8pmu_event_is_chained(event)) { + u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | + ARMV8_PMU_INCLUDE_EL2; + + armv8pmu_write_evtype(idx - 1, hwc->config_base); + armv8pmu_write_evtype(idx, chain_evt); + } else { + armv8pmu_write_evtype(idx, hwc->config_base); } } @@ -567,6 +644,16 @@ static inline int armv8pmu_enable_counter(int idx) return idx; } +static inline void armv8pmu_enable_event_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + + armv8pmu_enable_counter(idx); + if (armv8pmu_event_is_chained(event)) + armv8pmu_enable_counter(idx - 1); + isb(); +} + static inline int armv8pmu_disable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -574,6 +661,16 @@ static inline int armv8pmu_disable_counter(int idx) return idx; } +static inline void armv8pmu_disable_event_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_chained(event)) + armv8pmu_disable_counter(idx - 1); + armv8pmu_disable_counter(idx); +} + static inline int armv8pmu_enable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -581,6 +678,11 @@ static inline int armv8pmu_enable_intens(int idx) return idx; } +static inline int armv8pmu_enable_event_irq(struct perf_event *event) +{ + return armv8pmu_enable_intens(event->hw.idx); +} + static inline int armv8pmu_disable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -593,6 +695,11 @@ static inline int armv8pmu_disable_intens(int idx) return idx; } +static inline int armv8pmu_disable_event_irq(struct perf_event *event) +{ + return armv8pmu_disable_intens(event->hw.idx); +} + static inline u32 armv8pmu_getreset_flags(void) { u32 value; @@ -610,10 +717,8 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Enable counter and interrupt, and set the counter to count @@ -624,22 +729,22 @@ static void armv8pmu_enable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Set event (if destined for PMNx counters). */ - armv8pmu_write_evtype(idx, hwc->config_base); + armv8pmu_write_event_type(event); /* * Enable interrupt for this counter */ - armv8pmu_enable_intens(idx); + armv8pmu_enable_event_irq(event); /* * Enable counter */ - armv8pmu_enable_counter(idx); + armv8pmu_enable_event_counter(event); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -647,10 +752,8 @@ static void armv8pmu_enable_event(struct perf_event *event) static void armv8pmu_disable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Disable counter and interrupt @@ -660,13 +763,35 @@ static void armv8pmu_disable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Disable interrupt for this counter */ - armv8pmu_disable_intens(idx); + armv8pmu_disable_event_irq(event); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -694,6 +819,11 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) */ regs = get_irq_regs(); + /* + * Stop the PMU while processing the counter overflows + * to prevent skews in group events. + */ + armv8pmu_stop(cpu_pmu); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -718,6 +848,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } + armv8pmu_start(cpu_pmu); /* * Handle the pending perf events. @@ -731,32 +862,42 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) return IRQ_HANDLED; } -static void armv8pmu_start(struct arm_pmu *cpu_pmu) +static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx; - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; } -static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx; - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + /* + * Chaining requires two consecutive event counters, where + * the lower idx must be even. + */ + for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { + if (!test_and_set_bit(idx, cpuc->used_mask)) { + /* Check if the preceding even counter is available */ + if (!test_and_set_bit(idx - 1, cpuc->used_mask)) + return idx; + /* Release the Odd counter */ + clear_bit(idx, cpuc->used_mask); + } + } + return -EAGAIN; } static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { - int idx; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; @@ -770,13 +911,20 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } + if (armv8pmu_event_is_64bit(event)) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + else + return armv8pmu_get_single_idx(cpuc, cpu_pmu); +} - /* The counters are all in use. */ - return -EAGAIN; +static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx = event->hw.idx; + + clear_bit(idx, cpuc->used_mask); + if (armv8pmu_event_is_chained(event)) + clear_bit(idx - 1, cpuc->used_mask); } /* @@ -851,6 +999,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); + if (armv8pmu_event_is_64bit(event)) + event->hw.flags |= ARMPMU_EVT_64BIT; + /* Onl expose micro/arch events supported by this PMU */ if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { @@ -957,10 +1108,10 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv8pmu_read_counter, cpu_pmu->write_counter = armv8pmu_write_counter, cpu_pmu->get_event_idx = armv8pmu_get_event_idx, + cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx, cpu_pmu->start = armv8pmu_start, cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, - cpu_pmu->max_period = (1LLU << 32) - 1, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; return 0; diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 0d09d8e669cd..1bfeb160c5b1 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -53,6 +53,16 @@ enum { CCI_IF_MAX, }; +#define NUM_HW_CNTRS_CII_4XX 4 +#define NUM_HW_CNTRS_CII_5XX 8 +#define NUM_HW_CNTRS_MAX NUM_HW_CNTRS_CII_5XX + +#define FIXED_HW_CNTRS_CII_4XX 1 +#define FIXED_HW_CNTRS_CII_5XX 0 +#define FIXED_HW_CNTRS_MAX FIXED_HW_CNTRS_CII_4XX + +#define HW_CNTRS_MAX (NUM_HW_CNTRS_MAX + FIXED_HW_CNTRS_MAX) + struct event_range { u32 min; u32 max; @@ -633,8 +643,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) { int i; struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; - - DECLARE_BITMAP(mask, cci_pmu->num_cntrs); + DECLARE_BITMAP(mask, HW_CNTRS_MAX); bitmap_zero(mask, cci_pmu->num_cntrs); for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { @@ -940,7 +949,7 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) { int i; - DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); + DECLARE_BITMAP(saved_mask, HW_CNTRS_MAX); bitmap_zero(saved_mask, cci_pmu->num_cntrs); pmu_save_counters(cci_pmu, saved_mask); @@ -1245,7 +1254,7 @@ static int validate_group(struct perf_event *event) { struct perf_event *sibling, *leader = event->group_leader; struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)]; + unsigned long mask[BITS_TO_LONGS(HW_CNTRS_MAX)]; struct cci_pmu_hw_events fake_pmu = { /* * Initialise the fake PMU. We only need to populate the @@ -1403,6 +1412,11 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) char *name = model->name; u32 num_cntrs; + if (WARN_ON(model->num_hw_cntrs > NUM_HW_CNTRS_MAX)) + return -EINVAL; + if (WARN_ON(model->fixed_hw_cntrs > FIXED_HW_CNTRS_MAX)) + return -EINVAL; + pmu_event_attr_group.attrs = model->event_attrs; pmu_format_attr_group.attrs = model->format_attrs; @@ -1455,8 +1469,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { #ifdef CONFIG_ARM_CCI400_PMU [CCI400_R0] = { .name = "CCI_400", - .fixed_hw_cntrs = 1, /* Cycle counter */ - .num_hw_cntrs = 4, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */ + .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, .event_attrs = cci400_r0_pmu_event_attrs, @@ -1475,8 +1489,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { }, [CCI400_R1] = { .name = "CCI_400_r1", - .fixed_hw_cntrs = 1, /* Cycle counter */ - .num_hw_cntrs = 4, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */ + .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, .event_attrs = cci400_r1_pmu_event_attrs, @@ -1497,8 +1511,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { #ifdef CONFIG_ARM_CCI5xx_PMU [CCI500_R0] = { .name = "CCI_500", - .fixed_hw_cntrs = 0, - .num_hw_cntrs = 8, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX, + .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX, .cntr_size = SZ_64K, .format_attrs = cci5xx_pmu_format_attrs, .event_attrs = cci5xx_pmu_event_attrs, @@ -1521,8 +1535,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { }, [CCI550_R0] = { .name = "CCI_550", - .fixed_hw_cntrs = 0, - .num_hw_cntrs = 8, + .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX, + .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX, .cntr_size = SZ_64K, .format_attrs = cci5xx_pmu_format_attrs, .event_attrs = cci5xx_pmu_event_attrs, diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index b416ee18e6bb..4b15c36f4631 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1485,17 +1485,9 @@ static int arm_ccn_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ccn); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -EINVAL; - - if (!devm_request_mem_region(ccn->dev, res->start, - resource_size(res), pdev->name)) - return -EBUSY; - - ccn->base = devm_ioremap(ccn->dev, res->start, - resource_size(res)); - if (!ccn->base) - return -EFAULT; + ccn->base = devm_ioremap_resource(ccn->dev, res); + if (IS_ERR(ccn->base)) + return PTR_ERR(ccn->base); res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index a6347d487635..7f01f6f60b87 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -28,6 +28,14 @@ static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); +static inline u64 arm_pmu_event_max_period(struct perf_event *event) +{ + if (event->hw.flags & ARMPMU_EVT_64BIT) + return GENMASK_ULL(63, 0); + else + return GENMASK_ULL(31, 0); +} + static int armpmu_map_cache_event(const unsigned (*cache_map) [PERF_COUNT_HW_CACHE_MAX] @@ -114,8 +122,10 @@ int armpmu_event_set_period(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; + u64 max_period; int ret = 0; + max_period = arm_pmu_event_max_period(event); if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); @@ -136,12 +146,12 @@ int armpmu_event_set_period(struct perf_event *event) * effect we are reducing max_period to account for * interrupt latency (and we are being very conservative). */ - if (left > (armpmu->max_period >> 1)) - left = armpmu->max_period >> 1; + if (left > (max_period >> 1)) + left = (max_period >> 1); local64_set(&hwc->prev_count, (u64)-left); - armpmu->write_counter(event, (u64)(-left) & 0xffffffff); + armpmu->write_counter(event, (u64)(-left) & max_period); perf_event_update_userpage(event); @@ -153,6 +163,7 @@ u64 armpmu_event_update(struct perf_event *event) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u64 delta, prev_raw_count, new_raw_count; + u64 max_period = arm_pmu_event_max_period(event); again: prev_raw_count = local64_read(&hwc->prev_count); @@ -162,7 +173,7 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + delta = (new_raw_count - prev_raw_count) & max_period; local64_add(delta, &event->count); local64_sub(delta, &hwc->period_left); @@ -227,11 +238,10 @@ armpmu_del(struct perf_event *event, int flags) armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - if (armpmu->clear_event_idx) - armpmu->clear_event_idx(hw_events, event); - + armpmu->clear_event_idx(hw_events, event); perf_event_update_userpage(event); + /* Clear the allocated counter */ + hwc->idx = -1; } static int @@ -360,6 +370,7 @@ __hw_perf_event_init(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int mapping; + hwc->flags = 0; mapping = armpmu->map_event(event); if (mapping < 0) { @@ -402,7 +413,7 @@ __hw_perf_event_init(struct perf_event *event) * is far less likely to overtake the previous one unless * you have some serious IRQ latency issues. */ - hwc->sample_period = armpmu->max_period >> 1; + hwc->sample_period = arm_pmu_event_max_period(event) >> 1; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); } @@ -654,14 +665,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) int idx; for (idx = 0; idx < armpmu->num_events; idx++) { - /* - * If the counter is not used skip it, there is no - * need of stopping/restarting it. - */ - if (!test_bit(idx, hw_events->used_mask)) - continue; - event = hw_events->events[idx]; + if (!event) + continue; switch (cmd) { case CPU_PM_ENTER: diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 971ff336494a..96075cecb0ae 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -160,7 +160,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) static int armpmu_request_irqs(struct arm_pmu *armpmu) { struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int cpu, err; + int cpu, err = 0; for_each_cpu(cpu, &armpmu->supported_cpus) { int irq = per_cpu(hw_events->irq, cpu); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 44df61397a38..9efd2413240c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -350,19 +350,21 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) /* * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. - * If multi-threading is supported, SCCL_ID is in MPIDR[aff3] and CCL_ID - * is in MPIDR[aff2]; if not, SCCL_ID is in MPIDR[aff2] and CCL_ID is - * in MPIDR[aff1]. If this changes in future, this shall be updated. + * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2] + * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID + * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1]. */ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) { u64 mpidr = read_cpuid_mpidr(); if (mpidr & MPIDR_MT_BITMASK) { + int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + if (sccl_id) - *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); + *sccl_id = aff2 >> 3; if (ccl_id) - *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + *ccl_id = aff2 & 0x7; } else { if (sccl_id) *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index ad5444491975..10f92e1d8e7b 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -25,6 +25,12 @@ */ #define ARMPMU_MAX_HWEVENTS 32 +/* + * ARM PMU hw_event flags + */ +/* Event uses a 64bit counter */ +#define ARMPMU_EVT_64BIT 1 + #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xFFFF @@ -87,14 +93,13 @@ struct arm_pmu { struct perf_event *event); int (*set_event_filter)(struct hw_perf_event *evt, struct perf_event_attr *attr); - u32 (*read_counter)(struct perf_event *event); - void (*write_counter)(struct perf_event *event, u32 val); + u64 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u64 val); void (*start)(struct arm_pmu *); void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); int num_events; - u64 max_period; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); |