From 8d1a0ae724ad74ef7946a45e3b2d3e01f39df02b Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Wed, 13 Jan 2016 23:36:26 -0500 Subject: ARM: perf: Set ARMv7 SDER SUNIDEN bit ARMv7 counters other than the CPU cycle counter only work if the Secure Debug Enable Register (SDER) SUNIDEN bit is set. Since access to the SDER is only possible in secure state, it will only be done if the device tree property "secure-reg-access" is set. Without this: Performance counter stats for 'sleep 1': 14606094 cycles # 0.000 GHz 0 instructions # 0.00 insns per cycle After applying: Performance counter stats for 'sleep 1': 5843809 cycles 2566484 instructions # 0.44 insns per cycle 1.020144000 seconds time elapsed Some platforms (eg i.MX53) may also need additional platform specific setup. Acked-by: Rob Herring Signed-off-by: Martin Fuzzey Signed-off-by: Pooya Keshavarzi Signed-off-by: George G. Davis [will: add warning if property is found on arm64] Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.txt | 10 ++++++++++ arch/arm/kernel/perf_event_v7.c | 13 ++++++++++++- drivers/perf/arm_pmu.c | 9 +++++++++ include/linux/perf/arm_pmu.h | 1 + 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 56518839f52a..b6056d3bca06 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -46,6 +46,16 @@ Optional properties: - qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd events. +- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register + (SDER) is accessible. This will cause the driver to do + any setup required that is only possible in ARMv7 secure + state. If not present the ARMv7 SDER will not be touched, + which means the PMU may fail to operate unless external + code (bootloader or security monitor) has performed the + appropriate initialisation. Note that this property is + not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux + in Non-secure state. + Example: pmu { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4152158f6e6a..15063851cd10 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -712,6 +712,11 @@ static const struct attribute_group *armv7_pmuv2_attr_groups[] = { #define ARMV7_EXCLUDE_USER (1 << 30) #define ARMV7_INCLUDE_HYP (1 << 27) +/* + * Secure debug enable reg + */ +#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */ + static inline u32 armv7_pmnc_read(void) { u32 val; @@ -1094,7 +1099,13 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event, static void armv7pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events; + u32 idx, nb_cnt = cpu_pmu->num_events, val; + + if (cpu_pmu->secure_access) { + asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val)); + val |= ARMV7_SDER_SUNIDEN; + asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val)); + } /* The counter and interrupt enable registers are unknown at reset. */ for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 166637f2917c..eb5bee07526b 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -889,6 +889,15 @@ int arm_pmu_device_probe(struct platform_device *pdev, if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { init_fn = of_id->data; + pmu->secure_access = of_property_read_bool(pdev->dev.of_node, + "secure-reg-access"); + + /* arm64 systems boot only as non-secure */ + if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) { + pr_warn("ignoring \"secure-reg-access\" property for arm64\n"); + pmu->secure_access = false; + } + ret = of_pmu_irq_cfg(pmu); if (!ret) ret = init_fn(pmu); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 83b5e34c6580..2d5eaaa90078 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -104,6 +104,7 @@ struct arm_pmu { atomic_t active_events; struct mutex reserve_mutex; u64 max_period; + bool secure_access; /* 32-bit ARM only */ struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct notifier_block hotplug_nb; -- cgit v1.2.3 From c6b90653f1f7ea383734f8ce9e8df285a0c23f5b Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Thu, 4 Feb 2016 18:06:10 +0100 Subject: drivers/perf: arm_pmu: make info messages more verbose On a big.LITTLE system e.g. with Cortex A57 and A53 in case not all cores are online at PMU probe time we might get hw perfevents: failed to probe PMU! hw perfevents: failed to register PMU devices! making it unclear which cores failed, here. Add the device tree full name which failed and the error value resulting in a more verbose and helpful message like hw perfevents: /soc/pmu_a53: failed to probe PMU! Error -6 hw perfevents: /soc/pmu_a53: failed to register PMU devices! Error -6 Signed-off-by: Dirk Behme Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index eb5bee07526b..ca63a452393a 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -907,7 +907,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, } if (ret) { - pr_info("failed to probe PMU!\n"); + pr_info("%s: failed to probe PMU! Error %i\n", + node->full_name, ret); goto out_free; } @@ -927,7 +928,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, out_destroy: cpu_pmu_destroy(pmu); out_free: - pr_info("failed to register PMU devices!\n"); + pr_info("%s: failed to register PMU devices! Error %i\n", + node->full_name, ret); kfree(pmu); return ret; } -- cgit v1.2.3 From 5f140ccef3e1f15873c8e2c47d15b03099623ec0 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 18 Feb 2016 17:50:10 +0100 Subject: arm64: perf: Rename Cortex A57 events The implemented Cortex A57 events are strictly-speaking not A57 specific. They are ARM recommended implementation defined events and can be found on other ARMv8 SOCs like Cavium ThunderX too. Therefore rename these events to allow using them in other implementations too. Signed-off-by: Jan Glauber [will: capitalisation and ordering] Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f7ab14c4d5df..82f04b09a2f0 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -87,17 +87,17 @@ #define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F #define ARMV8_PMUV3_PERFCTR_L21_TLB 0x30 +/* ARMv8 implementation defined event types. */ +#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD 0x40 +#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST 0x41 +#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD 0x42 +#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST 0x43 +#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD 0x4C +#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST 0x4D + /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2 -/* ARMv8 Cortex-A57 and Cortex-A72 specific event types. */ -#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD 0x40 -#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST 0x41 -#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD 0x42 -#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST 0x43 -#define ARMV8_A57_PERFCTR_DTLB_REFILL_LD 0x4c -#define ARMV8_A57_PERFCTR_DTLB_REFILL_ST 0x4d - /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, @@ -174,16 +174,16 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { PERF_CACHE_MAP_ALL_UNSUPPORTED, - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST, + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST, [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST, [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, -- cgit v1.2.3 From d0aa2bffcf9847133fd75b9c829da4faded81977 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 18 Feb 2016 17:50:11 +0100 Subject: arm64/perf: Add Cavium ThunderX PMU support Support PMU events on Caviums ThunderX SOC. ThunderX supports some additional counters compared to the default ARMv8 PMUv3: - branch instructions counter - stall frontend & backend counters - L1 dcache load & store counters - L1 icache counters - iTLB & dTLB counters - L1 dcache & icache prefetch counters Signed-off-by: Jan Glauber [will: capitalisation] Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 69 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 82f04b09a2f0..ab504f72bc63 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -94,10 +94,19 @@ #define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST 0x43 #define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD 0x4C #define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST 0x4D +#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD 0x4E +#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST 0x4F /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2 +/* ARMv8 Cavium ThunderX specific event types. */ +#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST 0xE9 +#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS 0xEA +#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS 0xEB +#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS 0xEC +#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS 0xED + /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, @@ -131,6 +140,18 @@ static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, }; +static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, +}; + static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -193,6 +214,36 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, }; +static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +}; + #define ARMV8_EVENT_ATTR_RESOLVE(m) #m #define ARMV8_EVENT_ATTR(name, config) \ PMU_EVENT_ATTR_STRING(name, armv8_event_attr_##name, \ @@ -324,7 +375,6 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = { NULL, }; - /* * Perf Events' indices */ @@ -743,6 +793,13 @@ static int armv8_a57_map_event(struct perf_event *event) ARMV8_EVTYPE_EVENT); } +static int armv8_thunder_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv8_thunder_perf_map, + &armv8_thunder_perf_cache_map, + ARMV8_EVTYPE_EVENT); +} + static void armv8pmu_read_num_pmnc_events(void *info) { int *nb_cnt = info; @@ -811,11 +868,21 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) return armv8pmu_probe_num_events(cpu_pmu); } +static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_cavium_thunder"; + cpu_pmu->map_event = armv8_thunder_map_event; + cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + return armv8pmu_probe_num_events(cpu_pmu); +} + static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, + {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, {}, }; -- cgit v1.2.3 From 7175f0591eb9714fa71d499c59c35bcbd030931a Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 18 Feb 2016 17:50:13 +0100 Subject: arm64: perf: Enable PMCR long cycle counter bit With the long cycle counter bit (LC) disabled the cycle counter is not working on ThunderX SOC (ThunderX only implements Aarch64). Also, according to documentation LC == 0 is deprecated. To keep the code simple the patch does not introduce 64 bit wide counter functions. Instead writing the cycle counter always sets the upper 32 bits so overflow interrupts are generated as before. Original patch from Andrew Pinksi Signed-off-by: Jan Glauber Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index ab504f72bc63..1cb61c9fbb21 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -405,6 +405,7 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = { #define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ #define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ #define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ #define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMCR_N_MASK 0x1f #define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ @@ -494,9 +495,16 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) - asm volatile("msr pmccntr_el0, %0" :: "r" (value)); - else if (armv8pmu_select_counter(idx) == idx) + else if (idx == ARMV8_IDX_CYCLE_COUNTER) { + /* + * Set the upper 32bits as this is a 64bit counter but we only + * count using the lower 32bits and we want an interrupt when + * it overflows. + */ + u64 value64 = 0xffffffff00000000ULL | value; + + asm volatile("msr pmccntr_el0, %0" :: "r" (value64)); + } else if (armv8pmu_select_counter(idx) == idx) asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); } @@ -768,8 +776,11 @@ static void armv8pmu_reset(void *info) armv8pmu_disable_intens(idx); } - /* Initialize & Reset PMNC: C and P bits. */ - armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); + /* + * Initialize & Reset PMNC. Request overflow interrupt for + * 64 bit cycle counter but cheat in armv8pmu_write_counter(). + */ + armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C | ARMV8_PMCR_LC); } static int armv8_pmuv3_map_event(struct perf_event *event) -- cgit v1.2.3 From c210ae80e4e7083e03bee13535d11fc2c991cf71 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 18 Feb 2016 17:50:14 +0100 Subject: arm64: perf: Extend event mask for ARMv8.1 ARMv8.1 increases the PMU event number space to 16 bit so increase the EVTYPE mask. Signed-off-by: Jan Glauber Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 1cb61c9fbb21..1cc61fc321d9 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -419,8 +419,8 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = { /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */ -#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */ +#define ARMV8_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ +#define ARMV8_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ /* * Event filters for PMUv3 -- cgit v1.2.3 From 94085fe570e7b87597d4695e6fa77d4256efd29e Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 18 Feb 2016 17:50:12 +0100 Subject: arm64: dts: Add Cavium ThunderX specific PMU Add a compatible string for the Cavium ThunderX PMU. Signed-off-by: Jan Glauber Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.txt | 1 + arch/arm64/boot/dts/cavium/thunder-88xx.dtsi | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index b6056d3bca06..6eb73be9433e 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -25,6 +25,7 @@ Required properties: "qcom,scorpion-pmu" "qcom,scorpion-mp-pmu" "qcom,krait-pmu" + "cavium,thunder-pmu" - interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu interrupt (PPI) then 1 interrupt should be specified. diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index 9cb7cf94284a..2eb9b225f0bc 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -360,6 +360,11 @@ <1 10 0xff01>; }; + pmu { + compatible = "cavium,thunder-pmu", "arm,armv8-pmuv3"; + interrupts = <1 7 4>; + }; + soc { compatible = "simple-bus"; #address-cells = <2>; -- cgit v1.2.3 From da4e4f18afe0f3729d68f3785c5802f786d36e34 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 23 Feb 2016 18:22:39 +0000 Subject: drivers/perf: arm_pmu: implement CPU_PM notifier When a CPU is suspended (either through suspend-to-RAM or CPUidle), its PMU registers content can be lost, which means that counters registers values that were initialized on power down entry have to be reprogrammed on power-up to make sure the counters set-up is preserved (ie on power-up registers take the reset values on Cold or Warm reset, which can be architecturally UNKNOWN). To guarantee seamless profiling conditions across a core power down this patch adds a CPU PM notifier to ARM pmus, that upon CPU PM entry/exit from low-power states saves/restores the pmu registers set-up (by using the ARM perf API), so that the power-down/up cycle does not affect the perf behaviour (apart from a black-out period between power-up/down CPU PM notifications that is unavoidable). Cc: Will Deacon Cc: Sudeep Holla Cc: Daniel Lezcano Cc: Mathieu Poirier Cc: Mark Rutland Acked-by: Ashwin Chaugule Acked-by: Kevin Hilman Signed-off-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 95 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/perf/arm_pmu.h | 1 + 2 files changed, 96 insertions(+) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index ca63a452393a..11bacc7220a1 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -710,6 +711,93 @@ static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, return NOTIFY_OK; } +#ifdef CONFIG_CPU_PM +static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) +{ + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + struct perf_event *event; + int idx; + + for (idx = 0; idx < armpmu->num_events; idx++) { + /* + * If the counter is not used skip it, there is no + * need of stopping/restarting it. + */ + if (!test_bit(idx, hw_events->used_mask)) + continue; + + event = hw_events->events[idx]; + + switch (cmd) { + case CPU_PM_ENTER: + /* + * Stop and update the counter + */ + armpmu_stop(event, PERF_EF_UPDATE); + break; + case CPU_PM_EXIT: + case CPU_PM_ENTER_FAILED: + /* Restore and enable the counter */ + armpmu_start(event, PERF_EF_RELOAD); + break; + default: + break; + } + } +} + +static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, + void *v) +{ + struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return NOTIFY_DONE; + + /* + * Always reset the PMU registers on power-up even if + * there are no events running. + */ + if (cmd == CPU_PM_EXIT && armpmu->reset) + armpmu->reset(armpmu); + + if (!enabled) + return NOTIFY_OK; + + switch (cmd) { + case CPU_PM_ENTER: + armpmu->stop(armpmu); + cpu_pm_pmu_setup(armpmu, cmd); + break; + case CPU_PM_EXIT: + cpu_pm_pmu_setup(armpmu, cmd); + case CPU_PM_ENTER_FAILED: + armpmu->start(armpmu); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify; + return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb); +} + +static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) +{ + cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb); +} +#else +static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; } +static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { } +#endif + static int cpu_pmu_init(struct arm_pmu *cpu_pmu) { int err; @@ -725,6 +813,10 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) if (err) goto out_hw_events; + err = cpu_pm_pmu_register(cpu_pmu); + if (err) + goto out_unregister; + for_each_possible_cpu(cpu) { struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); raw_spin_lock_init(&events->pmu_lock); @@ -746,6 +838,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) return 0; +out_unregister: + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); out_hw_events: free_percpu(cpu_hw_events); return err; @@ -753,6 +847,7 @@ out_hw_events: static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) { + cpu_pm_pmu_unregister(cpu_pmu); unregister_cpu_notifier(&cpu_pmu->hotplug_nb); free_percpu(cpu_pmu->hw_events); } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 2d5eaaa90078..4196c90a3c88 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -108,6 +108,7 @@ struct arm_pmu { struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct notifier_block hotplug_nb; + struct notifier_block cpu_pm_nb; }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) -- cgit v1.2.3 From 5e442eba342e567e2b3f1a39a24f81559f8370f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 23 Feb 2016 10:49:43 +0000 Subject: arm-cci: simplify sysfs attr handling There's no need to dynamically initialise attribute pointers when we can get the compiler to do it for us. We also don't need a dev_ext_attribute for the cpumask, as the drvdata for a PMU device is a pointer to struct pmu. Cc: Punit Agrawal Acked-by: Olof Johansson Signed-off-by: Mark Rutland Tested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 100 +++++++++++++------------------------------------- 1 file changed, 25 insertions(+), 75 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 577cc4bf6a9d..ee47e6be9ab9 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -121,10 +121,8 @@ struct cci_pmu_model { u32 fixed_hw_cntrs; u32 num_hw_cntrs; u32 cntr_size; - u64 nformat_attrs; - u64 nevent_attrs; - struct dev_ext_attribute *format_attrs; - struct dev_ext_attribute *event_attrs; + struct attribute **format_attrs; + struct attribute **event_attrs; struct event_range event_ranges[CCI_IF_MAX]; int (*validate_hw_event)(struct cci_pmu *, unsigned long); int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long); @@ -166,8 +164,10 @@ static ssize_t cci_pmu_format_show(struct device *dev, static ssize_t cci_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf); -#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \ - { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } +#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \ + &((struct dev_ext_attribute[]) { \ + { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } \ + })[0].attr.attr #define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \ CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config) @@ -242,12 +242,13 @@ enum cci400_perf_events { static ssize_t cci400_pmu_cycle_event_show(struct device *dev, struct device_attribute *attr, char *buf); -static struct dev_ext_attribute cci400_pmu_format_attrs[] = { +static struct attribute *cci400_pmu_format_attrs[] = { CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"), + NULL }; -static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = { +static struct attribute *cci400_r0_pmu_event_attrs[] = { /* Slave events */ CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0), CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01), @@ -279,9 +280,10 @@ static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = { CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A), /* Special event for cycles counter */ CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff), + NULL }; -static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = { +static struct attribute *cci400_r1_pmu_event_attrs[] = { /* Slave events */ CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0), CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01), @@ -325,6 +327,7 @@ static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = { CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11), /* Special event for cycles counter */ CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff), + NULL }; static ssize_t cci400_pmu_cycle_event_show(struct device *dev, @@ -480,12 +483,13 @@ static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev static ssize_t cci500_pmu_global_event_show(struct device *dev, struct device_attribute *attr, char *buf); -static struct dev_ext_attribute cci500_pmu_format_attrs[] = { +static struct attribute *cci500_pmu_format_attrs[] = { CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"), + NULL, }; -static struct dev_ext_attribute cci500_pmu_event_attrs[] = { +static struct attribute *cci500_pmu_event_attrs[] = { /* Slave events */ CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0), CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1), @@ -546,6 +550,7 @@ static struct dev_ext_attribute cci500_pmu_event_attrs[] = { CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD), CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE), CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF), + NULL }; static ssize_t cci500_pmu_global_event_show(struct device *dev, @@ -1176,9 +1181,8 @@ static int cci_pmu_event_init(struct perf_event *event) static ssize_t pmu_cpumask_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dev_ext_attribute *eattr = container_of(attr, - struct dev_ext_attribute, attr); - struct cci_pmu *cci_pmu = eattr->var; + struct pmu *pmu = dev_get_drvdata(dev); + struct cci_pmu *cci_pmu = to_cci_pmu(pmu); int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", cpumask_pr_args(&cci_pmu->cpus)); @@ -1187,13 +1191,11 @@ static ssize_t pmu_cpumask_attr_show(struct device *dev, return n; } -static struct dev_ext_attribute pmu_cpumask_attr = { - __ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL), - NULL, /* Populated in cci_pmu_init */ -}; +static struct device_attribute pmu_cpumask_attr = + __ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL); static struct attribute *pmu_attrs[] = { - &pmu_cpumask_attr.attr.attr, + &pmu_cpumask_attr.attr, NULL, }; @@ -1218,60 +1220,14 @@ static const struct attribute_group *pmu_attr_groups[] = { NULL }; -static struct attribute **alloc_attrs(struct platform_device *pdev, - int n, struct dev_ext_attribute *source) -{ - int i; - struct attribute **attrs; - - /* Alloc n + 1 (for terminating NULL) */ - attrs = devm_kcalloc(&pdev->dev, n + 1, sizeof(struct attribute *), - GFP_KERNEL); - if (!attrs) - return attrs; - for(i = 0; i < n; i++) - attrs[i] = &source[i].attr.attr; - return attrs; -} - -static int cci_pmu_init_attrs(struct cci_pmu *cci_pmu, struct platform_device *pdev) -{ - const struct cci_pmu_model *model = cci_pmu->model; - struct attribute **attrs; - - /* - * All allocations below are managed, hence doesn't need to be - * free'd explicitly in case of an error. - */ - - if (model->nevent_attrs) { - attrs = alloc_attrs(pdev, model->nevent_attrs, - model->event_attrs); - if (!attrs) - return -ENOMEM; - pmu_event_attr_group.attrs = attrs; - } - if (model->nformat_attrs) { - attrs = alloc_attrs(pdev, model->nformat_attrs, - model->format_attrs); - if (!attrs) - return -ENOMEM; - pmu_format_attr_group.attrs = attrs; - } - pmu_cpumask_attr.var = cci_pmu; - - return 0; -} - static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) { - char *name = cci_pmu->model->name; + const struct cci_pmu_model *model = cci_pmu->model; + char *name = model->name; u32 num_cntrs; - int rc; - rc = cci_pmu_init_attrs(cci_pmu, pdev); - if (rc) - return rc; + pmu_event_attr_group.attrs = model->event_attrs; + pmu_format_attr_group.attrs = model->format_attrs; cci_pmu->pmu = (struct pmu) { .name = cci_pmu->model->name, @@ -1336,9 +1292,7 @@ static struct cci_pmu_model cci_pmu_models[] = { .num_hw_cntrs = 4, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, - .nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs), .event_attrs = cci400_r0_pmu_event_attrs, - .nevent_attrs = ARRAY_SIZE(cci400_r0_pmu_event_attrs), .event_ranges = { [CCI_IF_SLAVE] = { CCI400_R0_SLAVE_PORT_MIN_EV, @@ -1358,9 +1312,7 @@ static struct cci_pmu_model cci_pmu_models[] = { .num_hw_cntrs = 4, .cntr_size = SZ_4K, .format_attrs = cci400_pmu_format_attrs, - .nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs), .event_attrs = cci400_r1_pmu_event_attrs, - .nevent_attrs = ARRAY_SIZE(cci400_r1_pmu_event_attrs), .event_ranges = { [CCI_IF_SLAVE] = { CCI400_R1_SLAVE_PORT_MIN_EV, @@ -1382,9 +1334,7 @@ static struct cci_pmu_model cci_pmu_models[] = { .num_hw_cntrs = 8, .cntr_size = SZ_64K, .format_attrs = cci500_pmu_format_attrs, - .nformat_attrs = ARRAY_SIZE(cci500_pmu_format_attrs), .event_attrs = cci500_pmu_event_attrs, - .nevent_attrs = ARRAY_SIZE(cci500_pmu_event_attrs), .event_ranges = { [CCI_IF_SLAVE] = { CCI500_SLAVE_PORT_MIN_EV, -- cgit v1.2.3 From 0f17380c1aae80fe0f2fa495cb627ba750b693de Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 23 Feb 2016 10:49:44 +0000 Subject: arm-cci: fix handling cpumask_any_but return value cpumask_any_but returns value >= nr_cpu_ids if there are no more CPUs. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2038576 Acked-by: Olof Johansson Signed-off-by: Andrzej Hajda Acked-by: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index ee47e6be9ab9..5fb1c06e0020 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -1270,7 +1270,7 @@ static int cci_pmu_cpu_notifier(struct notifier_block *self, if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus)) break; target = cpumask_any_but(cpu_online_mask, cpu); - if (target < 0) // UP, last CPU + if (target >= nr_cpu_ids) // UP, last CPU break; /* * TODO: migrate context once core races on event->ctx have -- cgit v1.2.3 From a53eb5c62f214a72e00775126a3bbd11579e2969 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:45 +0000 Subject: arm-cci: Group writes to counter Add a helper to group the writes to PMU counter, this will be used to delay setting the event period to pmu::pmu_enable() Cc: Punit Agrawal Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K. Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 5fb1c06e0020..6a76ac76fb8b 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -771,6 +771,21 @@ static void pmu_write_counter(struct perf_event *event, u32 value) pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); } +static void __maybe_unused +pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_hw->events[i]; + + if (WARN_ON(!event)) + continue; + pmu_write_counter(event, local64_read(&event->hw.prev_count)); + } +} + static u64 pmu_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; -- cgit v1.2.3 From a077c52f349fdeeccc3f9f3590d63fe2b116b9a5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:46 +0000 Subject: arm-cci: Refactor CCI PMU enable/disable methods This patch refactors the CCI PMU driver code a little bit to make it easier share the code for enabling/disabling the CCI PMU. This will be used by the hooks to work around the special cases where writing to a counter is not always that easy(e.g, CCI-500) No functional changes. Cc: Punit Agrawal Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 6a76ac76fb8b..e42842be8edc 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -606,6 +606,26 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, } #endif /* CONFIG_ARM_CCI500_PMU */ +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_enable(void) +{ + u32 val; + + /* Enable all the PMU counters. */ + val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; + writel(val, cci_ctrl_base + CCI_PMCR); +} + +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_disable(void) +{ + u32 val; + + /* Disable all the PMU counters. */ + val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; + writel(val, cci_ctrl_base + CCI_PMCR); +} + static ssize_t cci_pmu_format_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -895,16 +915,12 @@ static void cci_pmu_enable(struct pmu *pmu) struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs); unsigned long flags; - u32 val; if (!enabled) return; raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - - /* Enable all the PMU counters. */ - val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; - writel(val, cci_ctrl_base + CCI_PMCR); + __cci_pmu_enable(); raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); } @@ -914,13 +930,9 @@ static void cci_pmu_disable(struct pmu *pmu) struct cci_pmu *cci_pmu = to_cci_pmu(pmu); struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; unsigned long flags; - u32 val; raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - - /* Disable all the PMU counters. */ - val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; - writel(val, cci_ctrl_base + CCI_PMCR); + __cci_pmu_disable(); raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); } -- cgit v1.2.3 From c66eea5f9f55f2ddf742670d35e4c9e30dc42a0a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:47 +0000 Subject: arm-cci: Delay PMU counter writes to pmu::pmu_enable CCI PMU driver always reprograms the counters to a safe value (half of the counter max, = 2^31) before starting the profiling to account for extreme interrupt latencies. Also, the cost of writing to a PMU counter could be very costly on some PMUs(e.g, CCI-500). In order to ammortise the cost of programming the counters, this patch delays the counter writes to pmu::pmu_enable(). We use the PER_HES_ARCH flag to keep track of the counters which need to be programmed. Before turning on the PMU, we go through the counters that were marked for write, and perform the operation in a batch. To unify all the counter writes to pmu_enable(), this patch also makes sure that we disable-and-enable the PMU in the irq handler to program any counters that overflowed. Cc: Punit Agrawal Cc: Peter Zijlstra Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index e42842be8edc..629c9e069c3b 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -159,6 +159,8 @@ enum cci_models { CCI_MODEL_MAX }; +static void pmu_write_counters(struct cci_pmu *cci_pmu, + unsigned long *mask); static ssize_t cci_pmu_format_show(struct device *dev, struct device_attribute *attr, char *buf); static ssize_t cci_pmu_event_show(struct device *dev, @@ -606,11 +608,44 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, } #endif /* CONFIG_ARM_CCI500_PMU */ +/* + * Program the CCI PMU counters which have PERF_HES_ARCH set + * with the event period and mark them ready before we enable + * PMU. + */ +void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) +{ + int i; + struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; + + DECLARE_BITMAP(mask, cci_pmu->num_cntrs); + + bitmap_zero(mask, cci_pmu->num_cntrs); + for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_hw->events[i]; + + if (WARN_ON(!event)) + continue; + + /* Leave the events which are not counting */ + if (event->hw.state & PERF_HES_STOPPED) + continue; + if (event->hw.state & PERF_HES_ARCH) { + set_bit(i, mask); + event->hw.state &= ~PERF_HES_ARCH; + } + } + + pmu_write_counters(cci_pmu, mask); +} + /* Should be called with cci_pmu->hw_events->pmu_lock held */ -static void __cci_pmu_enable(void) +static void __cci_pmu_enable(struct cci_pmu *cci_pmu) { u32 val; + cci_pmu_sync_counters(cci_pmu); + /* Enable all the PMU counters. */ val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; writel(val, cci_ctrl_base + CCI_PMCR); @@ -791,8 +826,7 @@ static void pmu_write_counter(struct perf_event *event, u32 value) pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); } -static void __maybe_unused -pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) { int i; struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; @@ -840,7 +874,14 @@ void pmu_event_set_period(struct perf_event *event) */ u64 val = 1ULL << 31; local64_set(&hwc->prev_count, val); - pmu_write_counter(event, val); + + /* + * CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose + * values needs to be sync-ed with the s/w state before the PMU is + * enabled. + * Mark this counter for sync. + */ + hwc->state |= PERF_HES_ARCH; } static irqreturn_t pmu_handle_irq(int irq_num, void *dev) @@ -851,6 +892,9 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) int idx, handled = IRQ_NONE; raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable the PMU while we walk through the counters */ + __cci_pmu_disable(); /* * Iterate over counters and update the corresponding perf events. * This should work regardless of whether we have per-counter overflow @@ -877,6 +921,9 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) pmu_event_set_period(event); handled = IRQ_HANDLED; } + + /* Enable the PMU and sync possibly overflowed counters */ + __cci_pmu_enable(cci_pmu); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); return IRQ_RETVAL(handled); @@ -920,7 +967,7 @@ static void cci_pmu_enable(struct pmu *pmu) return; raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - __cci_pmu_enable(); + __cci_pmu_enable(cci_pmu); raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); } -- cgit v1.2.3 From c8bc2b11405652792a9348c8de7d46637b459b4e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:48 +0000 Subject: arm-cci: write_counter: Remove redundant check pmu_write_counter() is now only called from pmu_write_counters(), which does so for each set index in the given mask, bounded by cci_pmu->num_cntrs. So, there is no need for an extra check to make sure the given counter is valid inside pmu_write_counter. This patch gets rid of that. Cc: Punit Agrawal Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 629c9e069c3b..420c30c15419 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -814,16 +814,9 @@ static u32 pmu_read_counter(struct perf_event *event) return value; } -static void pmu_write_counter(struct perf_event *event, u32 value) +static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx) { - struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); - struct hw_perf_event *hw_counter = &event->hw; - int idx = hw_counter->idx; - - if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) - dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); - else - pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); + pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); } static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) @@ -836,7 +829,7 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) if (WARN_ON(!event)) continue; - pmu_write_counter(event, local64_read(&event->hw.prev_count)); + pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); } } -- cgit v1.2.3 From 1ce6311b99fe809530e8e2181bb73855f85041d1 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:49 +0000 Subject: arm-cci: Get the status of a counter Add helper routines to check if the counter is enabled or not. Cc: Punit Agrawal Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 420c30c15419..d28903712eb8 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -707,6 +707,12 @@ static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx) pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL); } +static bool __maybe_unused +pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx) +{ + return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0; +} + static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event) { pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL); -- cgit v1.2.3 From cea16f8ba783b7d2ef15230c52f62eb6a134b417 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:50 +0000 Subject: arm-cci: Add routines to save/restore all counters Adds helper routines to disable the counter controls for all the counters on the CCI PMU and restore it back, by preserving the original state in caller provided mask. Cc: Punit Agrawal Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index d28903712eb8..7ce7da0a17c7 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -718,6 +718,44 @@ static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event) pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL); } +/* + * For all counters on the CCI-PMU, disable any 'enabled' counters, + * saving the changed counters in the mask, so that we can restore + * it later using pmu_restore_counters. The mask is private to the + * caller. We cannot rely on the used_mask maintained by the CCI_PMU + * as it only tells us if the counter is assigned to perf_event or not. + * The state of the perf_event cannot be locked by the PMU layer, hence + * we check the individual counter status (which can be locked by + * cci_pm->hw_events->pmu_lock). + * + * @mask should be initialised to empty by the caller. + */ +static void __maybe_unused +pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + + for (i = 0; i < cci_pmu->num_cntrs; i++) { + if (pmu_counter_is_enabled(cci_pmu, i)) { + set_bit(i, mask); + pmu_disable_counter(cci_pmu, i); + } + } +} + +/* + * Restore the status of the counters. Reversal of the pmu_save_counters(). + * For each counter set in the mask, enable the counter back. + */ +static void __maybe_unused +pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) + pmu_enable_counter(cci_pmu, i); +} + /* * Returns the number of programmable counters actually implemented * by the cci -- cgit v1.2.3 From 11300027b985af524d216c0ca1cf2b834610b636 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:51 +0000 Subject: arm-cci: Add helper to enable PMU without synchornising counters On CCI-500 writing to a counter requires turning the PMU on. So, synchronising the counter state should not be performed for such special cases, while turning the PMU on. This patch adds a helper, __cci_pmu_enable_nosync(), without flushing the counter states. Cc: Punit Agrawal Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 7ce7da0a17c7..bcc4c59c1e39 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -640,17 +640,22 @@ void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) } /* Should be called with cci_pmu->hw_events->pmu_lock held */ -static void __cci_pmu_enable(struct cci_pmu *cci_pmu) +static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu) { u32 val; - cci_pmu_sync_counters(cci_pmu); - /* Enable all the PMU counters. */ val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; writel(val, cci_ctrl_base + CCI_PMCR); } +/* Should be called with cci_pmu->hw_events->pmu_lock held */ +static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu) +{ + cci_pmu_sync_counters(cci_pmu); + __cci_pmu_enable_nosync(cci_pmu); +} + /* Should be called with cci_pmu->hw_events->pmu_lock held */ static void __cci_pmu_disable(void) { @@ -960,7 +965,7 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) } /* Enable the PMU and sync possibly overflowed counters */ - __cci_pmu_enable(cci_pmu); + __cci_pmu_enable_sync(cci_pmu); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); return IRQ_RETVAL(handled); @@ -1004,7 +1009,7 @@ static void cci_pmu_enable(struct pmu *pmu) return; raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); - __cci_pmu_enable(cci_pmu); + __cci_pmu_enable_sync(cci_pmu); raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); } -- cgit v1.2.3 From fff3f1a03ac071e5b1c1de84fb1da0b6377b25f2 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:52 +0000 Subject: arm-cci: Provide hook for writing to PMU counters Add a hook for writing to CCI PMU counters. This callback can be used for CCI models which requires some extra work to program the PMU counter values. To accommodate group writes and single counter writes, the call back accepts a bitmask of the counter indices which need to be programmed with the given value. Cc: Punit Agrawal Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index bcc4c59c1e39..f1d3f47fc583 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -126,6 +126,7 @@ struct cci_pmu_model { struct event_range event_ranges[CCI_IF_MAX]; int (*validate_hw_event)(struct cci_pmu *, unsigned long); int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long); + void (*write_counters)(struct cci_pmu *, unsigned long *); }; static struct cci_pmu_model cci_pmu_models[]; @@ -868,7 +869,7 @@ static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx) pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR); } -static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) { int i; struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; @@ -882,6 +883,14 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) } } +static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + if (cci_pmu->model->write_counters) + cci_pmu->model->write_counters(cci_pmu, mask); + else + __pmu_write_counters(cci_pmu, mask); +} + static u64 pmu_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; -- cgit v1.2.3 From a445fcc9979a837bdec897f0d8c90904e1b64ccf Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:53 +0000 Subject: arm-cci: CCI-500: Work around PMU counter writes The CCI PMU driver sets the event counter to the half of the maximum value(2^31) it can count before we start the counters via pmu_event_set_period(). This is done to give us the best chance to handle the overflow interrupt, taking care of extreme interrupt latencies. However, CCI-500 comes with advanced power saving schemes, which disables the clock to the event counters unless the counters are enabled to count (PMCR.CEN). This prevents the driver from writing the period to the counters before starting them. Also, there is no way we can reset the individual event counter to 0 (PMCR.RST resets all the counters, losing their current readings). However the value of the counter is preserved and could be read back, when the counters are not enabled. So we cannot reliably use the counters and compute the number of events generated during the sampling period since we don't have the value of the counter at start. This patch works around this issue by changing writes to the counter with the following steps. 1) Disable all the counters (remembering any counters which were enabled) 2) Enable the PMU, now that all the counters are disabled. For each counter to be programmed, repeat steps 3-7 3) Save the current event and program the target counter to count an invalid event, which by spec is guaranteed to not-generate any events. 4) Enable the target counter. 5) Write to the target counter. 6) Disable the target counter 7) Restore the event back on the target counter. 8) Disable the PMU 9) Restore the status of the all the counters Cc: Punit Agrawal Acked-by: Olof Johansson Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index f1d3f47fc583..ebb2f1efa12c 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -891,6 +891,71 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) __pmu_write_counters(cci_pmu, mask); } +#ifdef CONFIG_ARM_CCI500_PMU + +/* + * CCI-500 has advanced power saving policies, which could gate the + * clocks to the PMU counters, which makes the writes to them ineffective. + * The only way to write to those counters is when the global counters + * are enabled and the particular counter is enabled. + * + * So we do the following : + * + * 1) Disable all the PMU counters, saving their current state + * 2) Enable the global PMU profiling, now that all counters are + * disabled. + * + * For each counter to be programmed, repeat steps 3-7: + * + * 3) Write an invalid event code to the event control register for the + counter, so that the counters are not modified. + * 4) Enable the counter control for the counter. + * 5) Set the counter value + * 6) Disable the counter + * 7) Restore the event in the target counter + * + * 8) Disable the global PMU. + * 9) Restore the status of the rest of the counters. + * + * We choose an event which for CCI-500 is guaranteed not to count. + * We use the highest possible event code (0x1f) for the master interface 0. + */ +#define CCI500_INVALID_EVENT ((CCI500_PORT_M0 << CCI500_PMU_EVENT_SOURCE_SHIFT) | \ + (CCI500_PMU_EVENT_CODE_MASK << CCI500_PMU_EVENT_CODE_SHIFT)) +static void cci500_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +{ + int i; + DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); + + bitmap_zero(saved_mask, cci_pmu->num_cntrs); + pmu_save_counters(cci_pmu, saved_mask); + + /* + * Now that all the counters are disabled, we can safely turn the PMU on, + * without syncing the status of the counters + */ + __cci_pmu_enable_nosync(cci_pmu); + + for_each_set_bit(i, mask, cci_pmu->num_cntrs) { + struct perf_event *event = cci_pmu->hw_events.events[i]; + + if (WARN_ON(!event)) + continue; + + pmu_set_event(cci_pmu, i, CCI500_INVALID_EVENT); + pmu_enable_counter(cci_pmu, i); + pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); + pmu_disable_counter(cci_pmu, i); + pmu_set_event(cci_pmu, i, event->hw.config_base); + } + + __cci_pmu_disable(); + + pmu_restore_counters(cci_pmu, saved_mask); +} + +#endif /* CONFIG_ARM_CCI500_PMU */ + static u64 pmu_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -1475,6 +1540,7 @@ static struct cci_pmu_model cci_pmu_models[] = { }, }, .validate_hw_event = cci500_validate_hw_event, + .write_counters = cci500_pmu_write_counters, }, #endif }; -- cgit v1.2.3 From 3d2e870137c0434f1de125062bcf6f292d816585 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:54 +0000 Subject: arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU CCI-550 PMU shares most of the CCI-500 PMU attributes including the event format, PMU event codes. The only difference is an additional master interface (MI6 - 0xe). Hence we share the driver code for both, except for a model specific event validate method. This patch renames the common CCI500 symbols to CCI5xx, including the Kconfig symbol. No functional changes to the PMU driver. Acked-by: Olof Johansson Acked-by: Punit Agrawal Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- drivers/bus/Kconfig | 2 +- drivers/bus/arm-cci.c | 220 +++++++++++++++++++++++++------------------------- 2 files changed, 113 insertions(+), 109 deletions(-) diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 129d47bcc5fc..e25a850799b3 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -34,7 +34,7 @@ config ARM_CCI400_PORT_CTRL Low level power management driver for CCI400 cache coherent interconnect for ARM platforms. -config ARM_CCI500_PMU +config ARM_CCI5xx_PMU bool "ARM CCI500 PMU support" depends on (ARM && CPU_V7) || ARM64 depends on PERF_EVENTS diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index ebb2f1efa12c..683e289145d7 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -52,7 +52,7 @@ static const struct of_device_id arm_cci_matches[] = { #ifdef CONFIG_ARM_CCI400_COMMON {.compatible = "arm,cci-400", .data = CCI400_PORTS_DATA }, #endif -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU { .compatible = "arm,cci-500", }, #endif {}, @@ -92,7 +92,7 @@ static const struct of_device_id arm_cci_matches[] = { enum { CCI_IF_SLAVE, CCI_IF_MASTER, -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU CCI_IF_GLOBAL, #endif CCI_IF_MAX, @@ -154,7 +154,7 @@ enum cci_models { CCI400_R0, CCI400_R1, #endif -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU CCI500_R0, #endif CCI_MODEL_MAX @@ -426,73 +426,67 @@ static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev } #endif /* CONFIG_ARM_CCI400_PMU */ -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU /* - * CCI500 provides 8 independent event counters that can count - * any of the events available. - * - * CCI500 PMU event id is an 9-bit value made of two parts. + * CCI5xx PMU event id is an 9-bit value made of two parts. * bits [8:5] - Source for the event - * 0x0-0x6 - Slave interfaces - * 0x8-0xD - Master interfaces - * 0xf - Global Events - * 0x7,0xe - Reserved - * * bits [4:0] - Event code (specific to type of interface) + * + * */ /* Port ids */ -#define CCI500_PORT_S0 0x0 -#define CCI500_PORT_S1 0x1 -#define CCI500_PORT_S2 0x2 -#define CCI500_PORT_S3 0x3 -#define CCI500_PORT_S4 0x4 -#define CCI500_PORT_S5 0x5 -#define CCI500_PORT_S6 0x6 - -#define CCI500_PORT_M0 0x8 -#define CCI500_PORT_M1 0x9 -#define CCI500_PORT_M2 0xa -#define CCI500_PORT_M3 0xb -#define CCI500_PORT_M4 0xc -#define CCI500_PORT_M5 0xd - -#define CCI500_PORT_GLOBAL 0xf - -#define CCI500_PMU_EVENT_MASK 0x1ffUL -#define CCI500_PMU_EVENT_SOURCE_SHIFT 0x5 -#define CCI500_PMU_EVENT_SOURCE_MASK 0xf -#define CCI500_PMU_EVENT_CODE_SHIFT 0x0 -#define CCI500_PMU_EVENT_CODE_MASK 0x1f - -#define CCI500_PMU_EVENT_SOURCE(event) \ - ((event >> CCI500_PMU_EVENT_SOURCE_SHIFT) & CCI500_PMU_EVENT_SOURCE_MASK) -#define CCI500_PMU_EVENT_CODE(event) \ - ((event >> CCI500_PMU_EVENT_CODE_SHIFT) & CCI500_PMU_EVENT_CODE_MASK) - -#define CCI500_SLAVE_PORT_MIN_EV 0x00 -#define CCI500_SLAVE_PORT_MAX_EV 0x1f -#define CCI500_MASTER_PORT_MIN_EV 0x00 -#define CCI500_MASTER_PORT_MAX_EV 0x06 -#define CCI500_GLOBAL_PORT_MIN_EV 0x00 -#define CCI500_GLOBAL_PORT_MAX_EV 0x0f - - -#define CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \ - CCI_EXT_ATTR_ENTRY(_name, cci500_pmu_global_event_show, \ +#define CCI5xx_PORT_S0 0x0 +#define CCI5xx_PORT_S1 0x1 +#define CCI5xx_PORT_S2 0x2 +#define CCI5xx_PORT_S3 0x3 +#define CCI5xx_PORT_S4 0x4 +#define CCI5xx_PORT_S5 0x5 +#define CCI5xx_PORT_S6 0x6 + +#define CCI5xx_PORT_M0 0x8 +#define CCI5xx_PORT_M1 0x9 +#define CCI5xx_PORT_M2 0xa +#define CCI5xx_PORT_M3 0xb +#define CCI5xx_PORT_M4 0xc +#define CCI5xx_PORT_M5 0xd + +#define CCI5xx_PORT_GLOBAL 0xf + +#define CCI5xx_PMU_EVENT_MASK 0x1ffUL +#define CCI5xx_PMU_EVENT_SOURCE_SHIFT 0x5 +#define CCI5xx_PMU_EVENT_SOURCE_MASK 0xf +#define CCI5xx_PMU_EVENT_CODE_SHIFT 0x0 +#define CCI5xx_PMU_EVENT_CODE_MASK 0x1f + +#define CCI5xx_PMU_EVENT_SOURCE(event) \ + ((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK) +#define CCI5xx_PMU_EVENT_CODE(event) \ + ((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK) + +#define CCI5xx_SLAVE_PORT_MIN_EV 0x00 +#define CCI5xx_SLAVE_PORT_MAX_EV 0x1f +#define CCI5xx_MASTER_PORT_MIN_EV 0x00 +#define CCI5xx_MASTER_PORT_MAX_EV 0x06 +#define CCI5xx_GLOBAL_PORT_MIN_EV 0x00 +#define CCI5xx_GLOBAL_PORT_MAX_EV 0x0f + + +#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \ + CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \ (unsigned long) _config) -static ssize_t cci500_pmu_global_event_show(struct device *dev, +static ssize_t cci5xx_pmu_global_event_show(struct device *dev, struct device_attribute *attr, char *buf); -static struct attribute *cci500_pmu_format_attrs[] = { +static struct attribute *cci5xx_pmu_format_attrs[] = { CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"), CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"), NULL, }; -static struct attribute *cci500_pmu_event_attrs[] = { +static struct attribute *cci5xx_pmu_event_attrs[] = { /* Slave events */ CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0), CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1), @@ -537,64 +531,73 @@ static struct attribute *cci500_pmu_event_attrs[] = { CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6), /* Global events */ - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE), - CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE), + CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF), NULL }; -static ssize_t cci500_pmu_global_event_show(struct device *dev, +static ssize_t cci5xx_pmu_global_event_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *eattr = container_of(attr, struct dev_ext_attribute, attr); /* Global events have single fixed source code */ return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n", - (unsigned long)eattr->var, CCI500_PORT_GLOBAL); + (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL); } +/* + * CCI500 provides 8 independent event counters that can count + * any of the events available. + * CCI500 PMU event source ids + * 0x0-0x6 - Slave interfaces + * 0x8-0xD - Master interfaces + * 0xf - Global Events + * 0x7,0xe - Reserved + */ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event) { - u32 ev_source = CCI500_PMU_EVENT_SOURCE(hw_event); - u32 ev_code = CCI500_PMU_EVENT_CODE(hw_event); + u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event); + u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event); int if_type; - if (hw_event & ~CCI500_PMU_EVENT_MASK) + if (hw_event & ~CCI5xx_PMU_EVENT_MASK) return -ENOENT; switch (ev_source) { - case CCI500_PORT_S0: - case CCI500_PORT_S1: - case CCI500_PORT_S2: - case CCI500_PORT_S3: - case CCI500_PORT_S4: - case CCI500_PORT_S5: - case CCI500_PORT_S6: + case CCI5xx_PORT_S0: + case CCI5xx_PORT_S1: + case CCI5xx_PORT_S2: + case CCI5xx_PORT_S3: + case CCI5xx_PORT_S4: + case CCI5xx_PORT_S5: + case CCI5xx_PORT_S6: if_type = CCI_IF_SLAVE; break; - case CCI500_PORT_M0: - case CCI500_PORT_M1: - case CCI500_PORT_M2: - case CCI500_PORT_M3: - case CCI500_PORT_M4: - case CCI500_PORT_M5: + case CCI5xx_PORT_M0: + case CCI5xx_PORT_M1: + case CCI5xx_PORT_M2: + case CCI5xx_PORT_M3: + case CCI5xx_PORT_M4: + case CCI5xx_PORT_M5: if_type = CCI_IF_MASTER; break; - case CCI500_PORT_GLOBAL: + case CCI5xx_PORT_GLOBAL: if_type = CCI_IF_GLOBAL; break; default: @@ -607,7 +610,8 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, return -ENOENT; } -#endif /* CONFIG_ARM_CCI500_PMU */ + +#endif /* CONFIG_ARM_CCI5xx_PMU */ /* * Program the CCI PMU counters which have PERF_HES_ARCH set @@ -891,7 +895,7 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) __pmu_write_counters(cci_pmu, mask); } -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU /* * CCI-500 has advanced power saving policies, which could gate the @@ -917,12 +921,12 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) * 8) Disable the global PMU. * 9) Restore the status of the rest of the counters. * - * We choose an event which for CCI-500 is guaranteed not to count. + * We choose an event which for CCI-5xx is guaranteed not to count. * We use the highest possible event code (0x1f) for the master interface 0. */ -#define CCI500_INVALID_EVENT ((CCI500_PORT_M0 << CCI500_PMU_EVENT_SOURCE_SHIFT) | \ - (CCI500_PMU_EVENT_CODE_MASK << CCI500_PMU_EVENT_CODE_SHIFT)) -static void cci500_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) +#define CCI5xx_INVALID_EVENT ((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \ + (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT)) +static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) { int i; DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); @@ -942,7 +946,7 @@ static void cci500_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *ma if (WARN_ON(!event)) continue; - pmu_set_event(cci_pmu, i, CCI500_INVALID_EVENT); + pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT); pmu_enable_counter(cci_pmu, i); pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i); pmu_disable_counter(cci_pmu, i); @@ -954,7 +958,7 @@ static void cci500_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *ma pmu_restore_counters(cci_pmu, saved_mask); } -#endif /* CONFIG_ARM_CCI500_PMU */ +#endif /* CONFIG_ARM_CCI5xx_PMU */ static u64 pmu_event_update(struct perf_event *event) { @@ -1517,30 +1521,30 @@ static struct cci_pmu_model cci_pmu_models[] = { .get_event_idx = cci400_get_event_idx, }, #endif -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU [CCI500_R0] = { .name = "CCI_500", .fixed_hw_cntrs = 0, .num_hw_cntrs = 8, .cntr_size = SZ_64K, - .format_attrs = cci500_pmu_format_attrs, - .event_attrs = cci500_pmu_event_attrs, + .format_attrs = cci5xx_pmu_format_attrs, + .event_attrs = cci5xx_pmu_event_attrs, .event_ranges = { [CCI_IF_SLAVE] = { - CCI500_SLAVE_PORT_MIN_EV, - CCI500_SLAVE_PORT_MAX_EV, + CCI5xx_SLAVE_PORT_MIN_EV, + CCI5xx_SLAVE_PORT_MAX_EV, }, [CCI_IF_MASTER] = { - CCI500_MASTER_PORT_MIN_EV, - CCI500_MASTER_PORT_MAX_EV, + CCI5xx_MASTER_PORT_MIN_EV, + CCI5xx_MASTER_PORT_MAX_EV, }, [CCI_IF_GLOBAL] = { - CCI500_GLOBAL_PORT_MIN_EV, - CCI500_GLOBAL_PORT_MAX_EV, + CCI5xx_GLOBAL_PORT_MIN_EV, + CCI5xx_GLOBAL_PORT_MAX_EV, }, }, .validate_hw_event = cci500_validate_hw_event, - .write_counters = cci500_pmu_write_counters, + .write_counters = cci5xx_pmu_write_counters, }, #endif }; @@ -1560,7 +1564,7 @@ static const struct of_device_id arm_cci_pmu_matches[] = { .data = &cci_pmu_models[CCI400_R1], }, #endif -#ifdef CONFIG_ARM_CCI500_PMU +#ifdef CONFIG_ARM_CCI5xx_PMU { .compatible = "arm,cci-500-pmu,r0", .data = &cci_pmu_models[CCI500_R0], -- cgit v1.2.3 From d7dd5fd773497d7b63e7a35fcaa270c9f859432b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 23 Feb 2016 10:49:55 +0000 Subject: arm-cci: CoreLink CCI-550 PMU driver Add ARM CoreLink CCI-550 cache coherent interconnect PMU driver support. The CCI-550 PMU shares all the attributes of CCI-500 PMU, except for an additional master interface (MI-6 - 0xe). CCI-550 requires the same work around as for CCI-500 to write to the PMU counter. Acked-by: Olof Johansson Acked-by: Punit Agrawal Acked-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/cci.txt | 2 + drivers/bus/Kconfig | 8 +-- drivers/bus/arm-cci.c | 85 ++++++++++++++++++++++++++- 3 files changed, 90 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt index aef1d200a9b2..a1a5a7ecc2fb 100644 --- a/Documentation/devicetree/bindings/arm/cci.txt +++ b/Documentation/devicetree/bindings/arm/cci.txt @@ -34,6 +34,7 @@ specific to ARM. Definition: must contain one of the following: "arm,cci-400" "arm,cci-500" + "arm,cci-550" - reg Usage: required @@ -101,6 +102,7 @@ specific to ARM. "arm,cci-400-pmu" - DEPRECATED, permitted only where OS has secure acces to CCI registers "arm,cci-500-pmu,r0" + "arm,cci-550-pmu,r0" - reg: Usage: required Value type: Integer cells. A register entry, expressed diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index e25a850799b3..6eff571731b1 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -35,14 +35,14 @@ config ARM_CCI400_PORT_CTRL interconnect for ARM platforms. config ARM_CCI5xx_PMU - bool "ARM CCI500 PMU support" + bool "ARM CCI-500/CCI-550 PMU support" depends on (ARM && CPU_V7) || ARM64 depends on PERF_EVENTS select ARM_CCI_PMU help - Support for PMU events monitoring on the ARM CCI-500 cache coherent - interconnect. CCI-500 provides 8 independent event counters, which - can count events pertaining to the slave/master interfaces as well + Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache + coherent interconnects. Both of them provide 8 independent event counters, + which can count events pertaining to the slave/master interfaces as well as the internal events to the CCI. If unsure, say Y diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 683e289145d7..494bd39f8434 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -54,6 +54,7 @@ static const struct of_device_id arm_cci_matches[] = { #endif #ifdef CONFIG_ARM_CCI5xx_PMU { .compatible = "arm,cci-500", }, + { .compatible = "arm,cci-550", }, #endif {}, }; @@ -156,6 +157,7 @@ enum cci_models { #endif #ifdef CONFIG_ARM_CCI5xx_PMU CCI500_R0, + CCI550_R0, #endif CCI_MODEL_MAX }; @@ -451,6 +453,7 @@ static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev #define CCI5xx_PORT_M3 0xb #define CCI5xx_PORT_M4 0xc #define CCI5xx_PORT_M5 0xd +#define CCI5xx_PORT_M6 0xe #define CCI5xx_PORT_GLOBAL 0xf @@ -611,6 +614,58 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu, return -ENOENT; } +/* + * CCI550 provides 8 independent event counters that can count + * any of the events available. + * CCI550 PMU event source ids + * 0x0-0x6 - Slave interfaces + * 0x8-0xe - Master interfaces + * 0xf - Global Events + * 0x7 - Reserved + */ +static int cci550_validate_hw_event(struct cci_pmu *cci_pmu, + unsigned long hw_event) +{ + u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event); + u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event); + int if_type; + + if (hw_event & ~CCI5xx_PMU_EVENT_MASK) + return -ENOENT; + + switch (ev_source) { + case CCI5xx_PORT_S0: + case CCI5xx_PORT_S1: + case CCI5xx_PORT_S2: + case CCI5xx_PORT_S3: + case CCI5xx_PORT_S4: + case CCI5xx_PORT_S5: + case CCI5xx_PORT_S6: + if_type = CCI_IF_SLAVE; + break; + case CCI5xx_PORT_M0: + case CCI5xx_PORT_M1: + case CCI5xx_PORT_M2: + case CCI5xx_PORT_M3: + case CCI5xx_PORT_M4: + case CCI5xx_PORT_M5: + case CCI5xx_PORT_M6: + if_type = CCI_IF_MASTER; + break; + case CCI5xx_PORT_GLOBAL: + if_type = CCI_IF_GLOBAL; + break; + default: + return -ENOENT; + } + + if (ev_code >= cci_pmu->model->event_ranges[if_type].min && + ev_code <= cci_pmu->model->event_ranges[if_type].max) + return hw_event; + + return -ENOENT; +} + #endif /* CONFIG_ARM_CCI5xx_PMU */ /* @@ -898,7 +953,7 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) #ifdef CONFIG_ARM_CCI5xx_PMU /* - * CCI-500 has advanced power saving policies, which could gate the + * CCI-500/CCI-550 has advanced power saving policies, which could gate the * clocks to the PMU counters, which makes the writes to them ineffective. * The only way to write to those counters is when the global counters * are enabled and the particular counter is enabled. @@ -1546,6 +1601,30 @@ static struct cci_pmu_model cci_pmu_models[] = { .validate_hw_event = cci500_validate_hw_event, .write_counters = cci5xx_pmu_write_counters, }, + [CCI550_R0] = { + .name = "CCI_550", + .fixed_hw_cntrs = 0, + .num_hw_cntrs = 8, + .cntr_size = SZ_64K, + .format_attrs = cci5xx_pmu_format_attrs, + .event_attrs = cci5xx_pmu_event_attrs, + .event_ranges = { + [CCI_IF_SLAVE] = { + CCI5xx_SLAVE_PORT_MIN_EV, + CCI5xx_SLAVE_PORT_MAX_EV, + }, + [CCI_IF_MASTER] = { + CCI5xx_MASTER_PORT_MIN_EV, + CCI5xx_MASTER_PORT_MAX_EV, + }, + [CCI_IF_GLOBAL] = { + CCI5xx_GLOBAL_PORT_MIN_EV, + CCI5xx_GLOBAL_PORT_MAX_EV, + }, + }, + .validate_hw_event = cci550_validate_hw_event, + .write_counters = cci5xx_pmu_write_counters, + }, #endif }; @@ -1569,6 +1648,10 @@ static const struct of_device_id arm_cci_pmu_matches[] = { .compatible = "arm,cci-500-pmu,r0", .data = &cci_pmu_models[CCI500_R0], }, + { + .compatible = "arm,cci-550-pmu,r0", + .data = &cci_pmu_models[CCI550_R0], + }, #endif {}, }; -- cgit v1.2.3 From ceb495124417a8aada7c6ee917dff8f1328e8480 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Feb 2016 16:12:43 +0000 Subject: arm-cci: make private functions static cci_pmu_sync_counters and pmu_event_set_period are internal functions to the CCI PMU driver, so make them static to avoid polluting the kernel namespace. Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 494bd39f8434..d951371a74d4 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -673,7 +673,7 @@ static int cci550_validate_hw_event(struct cci_pmu *cci_pmu, * with the event period and mark them ready before we enable * PMU. */ -void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) +static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) { int i; struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; @@ -1038,7 +1038,7 @@ static void pmu_read(struct perf_event *event) pmu_event_update(event); } -void pmu_event_set_period(struct perf_event *event) +static void pmu_event_set_period(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; /* -- cgit v1.2.3 From 6ec3070298ab8ea56aa694c6160c51d47a8d0830 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Feb 2016 16:13:37 +0000 Subject: arm-cci: don't return value from void function pmu_write_register has a void return type, so remove the useless return statement. Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index d951371a74d4..afe64ab2c06b 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -758,8 +758,8 @@ static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offs static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value, int idx, unsigned int offset) { - return writel_relaxed(value, cci_pmu->base + - CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset); + writel_relaxed(value, cci_pmu->base + + CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset); } static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx) -- cgit v1.2.3 From 3b23d77b4258558a317157b18cf418eeea4ca788 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Feb 2016 16:14:25 +0000 Subject: arm-cci: remove unused variable hw_counter is unused in the PMU IRQ handler, so remove it. Signed-off-by: Will Deacon --- drivers/bus/arm-cci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index afe64ab2c06b..a49b28378d59 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -1077,13 +1077,10 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) */ for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) { struct perf_event *event = events->events[idx]; - struct hw_perf_event *hw_counter; if (!event) continue; - hw_counter = &event->hw; - /* Did this counter overflow? */ if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) & CCI_PMU_OVRFLW_FLAG)) -- cgit v1.2.3 From fe638401a091f4473e9e30942ea3cecc0c0b2b94 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Feb 2016 23:15:44 +0000 Subject: arm64: perf: Extend ARMV8_EVTYPE_MASK to include PMCR.LC Commit 7175f0591eb9 ("arm64: perf: Enable PMCR long cycle counter bit") added initial support for a 64-bit cycle counter enabled using PMCR.LC. Unfortunately, that patch doesn't extend ARMV8_EVTYPE_MASK, so any attempts to set the enable bit are ignored by armv8pmu_pmcr_write. This patch extends the mask to include the new bit. Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 1cc61fc321d9..c4c97655662e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -408,7 +408,7 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = { #define ARMV8_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ #define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMCR_N_MASK 0x1f -#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ +#define ARMV8_PMCR_MASK 0x7f /* Mask for writable bits */ /* * PMOVSR: counters overflow flag status reg -- cgit v1.2.3 From 357b565d5d52b2dc2a51390eb8f887a9caa8597f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Mar 2016 11:07:15 +0000 Subject: drivers/perf: arm_pmu: avoid NULL dereference when not using devicetree Commit c6b90653f1f7 ("drivers/perf: arm_pmu: make info messages more verbose") breaks booting on systems where the PMU is probed without devicetree (e.g by inspecting the MIDR of the current CPU). In this case, pdev->dev.of_node is NULL and we shouldn't try to access its ->fullname field when printing probe error messages. This patch fixes the probing code to use of_node_full_name, which safely handles NULL nodes and removes the "Error %i" part of the string, since it's not terribly useful. Reported-by: Guenter Roeck Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 11bacc7220a1..32346b5a8a11 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -1002,8 +1002,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, } if (ret) { - pr_info("%s: failed to probe PMU! Error %i\n", - node->full_name, ret); + pr_info("%s: failed to probe PMU!\n", of_node_full_name(node)); goto out_free; } @@ -1023,8 +1022,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, out_destroy: cpu_pmu_destroy(pmu); out_free: - pr_info("%s: failed to register PMU devices! Error %i\n", - node->full_name, ret); + pr_info("%s: failed to register PMU devices!\n", + of_node_full_name(node)); kfree(pmu); return ret; } -- cgit v1.2.3