From 7e5705c635ecfccde559ebbbe1eaf05b5cc60529 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 27 Sep 2019 09:26:42 -0700 Subject: tools/power/cpupower: Fix initializer override in hsw_ext_cstates When building cpupower with clang, the following warning appears: utils/idle_monitor/hsw_ext_idle.c:42:16: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] .desc = N_("Processor Package C2"), ^~~~~~~~~~~~~~~~~~~~~~ ./utils/helpers/helpers.h:25:33: note: expanded from macro 'N_' #define N_(String) gettext_noop(String) ^~~~~~ ./utils/helpers/helpers.h:23:30: note: expanded from macro 'gettext_noop' #define gettext_noop(String) String ^~~~~~ utils/idle_monitor/hsw_ext_idle.c:41:16: note: previous initialization is here .desc = N_("Processor Package C9"), ^~~~~~~~~~~~~~~~~~~~~~ ./utils/helpers/helpers.h:25:33: note: expanded from macro 'N_' #define N_(String) gettext_noop(String) ^~~~~~ ./utils/helpers/helpers.h:23:30: note: expanded from macro 'gettext_noop' #define gettext_noop(String) String ^~~~~~ 1 warning generated. This appears to be a copy and paste or merge mistake because the name and id fields both have PC9 in them, not PC2. Remove the second assignment to fix the warning. Fixes: 7ee767b69b68 ("cpupower: Add Haswell family 0x45 specific idle monitor to show PC8,9,10 states") Link: https://github.com/ClangBuiltLinux/linux/issues/718 Signed-off-by: Nathan Chancellor Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index 7c7451d3f494..58dbdfd4fa13 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -39,7 +39,6 @@ static cstate_t hsw_ext_cstates[HSW_EXT_CSTATE_COUNT] = { { .name = "PC9", .desc = N_("Processor Package C9"), - .desc = N_("Processor Package C2"), .id = PC9, .range = RANGE_PACKAGE, .get_count_percent = hsw_ext_get_count_percent, -- cgit v1.2.3 From d80a4ac20800035c46a3868ad9e11ebda0049c7d Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Thu, 17 Oct 2019 00:56:39 -0500 Subject: cpupower : Handle set and info subcommands correctly Cpupower tool has set and info options which are being used only by x86 machines. This patch removes support for these two subcommands from cpupower utility for POWER. Thus, these two subcommands will now be available only for intel. This removes the ambiguous error message while using set option in case of using non-intel systems. Without this patch on a POWER system: root@ubuntu:~# cpupower info System does not support Intel's performance bias setting root@ubuntu:~# cpupower set -b 10 Error setting perf-bias value on CPU With this patch on a POWER box: root@ubuntu:~# cpupower info Subcommand not supported on POWER Same result for set subcommand. This patch does not affect results on a intel box. Signed-off-by: Abhishek Goel Acked-by: Thomas Renninger Reviewed-by: Shuah Khan Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpupower-info.c | 9 +++++++++ tools/power/cpupower/utils/cpupower-set.c | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 4c9d342b70ff..d3755ea70d4d 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "helpers/helpers.h" #include "helpers/sysfs.h" @@ -30,6 +31,7 @@ int cmd_info(int argc, char **argv) extern char *optarg; extern int optind, opterr, optopt; unsigned int cpu; + struct utsname uts; union { struct { @@ -39,6 +41,13 @@ int cmd_info(int argc, char **argv) } params = {}; int ret = 0; + ret = uname(&uts); + if (!ret && (!strcmp(uts.machine, "ppc64le") || + !strcmp(uts.machine, "ppc64"))) { + fprintf(stderr, _("Subcommand not supported on POWER.\n")); + return ret; + } + setlocale(LC_ALL, ""); textdomain(PACKAGE); diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 3cd95c6cb974..3cca6f715dd9 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "helpers/helpers.h" #include "helpers/sysfs.h" @@ -31,6 +32,7 @@ int cmd_set(int argc, char **argv) extern char *optarg; extern int optind, opterr, optopt; unsigned int cpu; + struct utsname uts; union { struct { @@ -41,6 +43,13 @@ int cmd_set(int argc, char **argv) int perf_bias = 0; int ret = 0; + ret = uname(&uts); + if (!ret && (!strcmp(uts.machine, "ppc64le") || + !strcmp(uts.machine, "ppc64"))) { + fprintf(stderr, _("Subcommand not supported on POWER.\n")); + return ret; + } + setlocale(LC_ALL, ""); textdomain(PACKAGE); -- cgit v1.2.3 From d3f5d2a192a299f56579ae6e6283f9011b00208f Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Tue, 5 Nov 2019 17:16:52 +0000 Subject: cpupower: Move needs_root variable into a sub-struct Move the needs_root variable into a sub-struct. This is in preparation for adding a new flag for cpuidle_monitor. Update all uses of the needs_root variable to reflect this change. Signed-off-by: Janakarajan Natarajan Acked-by: Thomas Renninger Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 2 +- tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 2 +- tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h | 4 +++- tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/mperf_monitor.c | 2 +- tools/power/cpupower/utils/idle_monitor/nhm_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/snb_idle.c | 2 +- 8 files changed, 10 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 3f893b99b337..33dc34db4f3c 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -328,7 +328,7 @@ struct cpuidle_monitor amd_fam14h_monitor = { .stop = amd_fam14h_stop, .do_register = amd_fam14h_register, .unregister = amd_fam14h_unregister, - .needs_root = 1, + .flags.needs_root = 1, .overflow_s = OVERFLOW_MS / 1000, }; #endif /* #if defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index f634aeb65c5f..3c4cee160b0e 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -207,6 +207,6 @@ struct cpuidle_monitor cpuidle_sysfs_monitor = { .stop = cpuidle_stop, .do_register = cpuidle_register, .unregister = cpuidle_unregister, - .needs_root = 0, + .flags.needs_root = 0, .overflow_s = UINT_MAX, }; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index d3c3e6e7aa26..6d44fec55ad5 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -408,7 +408,7 @@ int cmd_monitor(int argc, char **argv) dprint("Try to register: %s\n", all_monitors[num]->name); test_mon = all_monitors[num]->do_register(); if (test_mon) { - if (test_mon->needs_root && !run_as_root) { + if (test_mon->flags.needs_root && !run_as_root) { fprintf(stderr, _("Available monitor %s needs " "root access\n"), test_mon->name); continue; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index a2d901d3bfaf..9b612d999660 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -60,7 +60,9 @@ struct cpuidle_monitor { struct cpuidle_monitor* (*do_register) (void); void (*unregister)(void); unsigned int overflow_s; - int needs_root; + struct { + unsigned int needs_root:1; + } flags; }; extern long long timespec_diff_us(struct timespec start, struct timespec end); diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index 58dbdfd4fa13..97ad3233a521 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -187,7 +187,7 @@ struct cpuidle_monitor intel_hsw_ext_monitor = { .stop = hsw_ext_stop, .do_register = hsw_ext_register, .unregister = hsw_ext_unregister, - .needs_root = 1, + .flags.needs_root = 1, .overflow_s = 922000000 /* 922337203 seconds TSC overflow at 20GHz */ }; diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 44806a6dae11..7cae74202a4d 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -333,7 +333,7 @@ struct cpuidle_monitor mperf_monitor = { .stop = mperf_stop, .do_register = mperf_register, .unregister = mperf_unregister, - .needs_root = 1, + .flags.needs_root = 1, .overflow_s = 922000000 /* 922337203 seconds TSC overflow at 20GHz */ }; diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c index be7256696a37..114271165182 100644 --- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c @@ -208,7 +208,7 @@ struct cpuidle_monitor intel_nhm_monitor = { .stop = nhm_stop, .do_register = intel_nhm_register, .unregister = intel_nhm_unregister, - .needs_root = 1, + .flags.needs_root = 1, .overflow_s = 922000000 /* 922337203 seconds TSC overflow at 20GHz */ }; diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index 968333571cad..df8b223cc096 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -192,7 +192,7 @@ struct cpuidle_monitor intel_snb_monitor = { .stop = snb_stop, .do_register = snb_register, .unregister = snb_unregister, - .needs_root = 1, + .flags.needs_root = 1, .overflow_s = 922000000 /* 922337203 seconds TSC overflow at 20GHz */ }; -- cgit v1.2.3 From 7adafe541fe5e015261a92d39db8b163db477337 Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Tue, 5 Nov 2019 17:16:54 +0000 Subject: cpupower: mperf_monitor: Introduce per_cpu_schedule flag The per_cpu_schedule flag is used to move the cpupower process to the cpu on which we are looking to read the APERF/MPERF registers. This prevents IPIs from being generated by read_msr()s as we are already on the cpu of interest. Ex: If cpupower is running on CPU 0 and we execute read_msr(20, MSR_APERF, val) then, read_msr(20, MSR_MPERF, val) the msr module will generate an IPI from CPU 0 to CPU 20 to query for the MSR_APERF and then the MSR_MPERF in separate IPIs. This delay, caused by IPI latency, between reading the APERF and MPERF registers may cause both of them to go out of sync. The use of the per_cpu_schedule flag reduces the probability of this from happening. It comes at the cost of a negligible increase in cpu consumption caused by the migration of cpupower across each of the cpus of the system. Signed-off-by: Janakarajan Natarajan Acked-by: Thomas Renninger Signed-off-by: Shuah Khan --- .../cpupower/utils/idle_monitor/cpupower-monitor.h | 1 + .../cpupower/utils/idle_monitor/mperf_monitor.c | 42 ++++++++++++++++------ 2 files changed, 33 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 9b612d999660..5b5eb1da0cce 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -62,6 +62,7 @@ struct cpuidle_monitor { unsigned int overflow_s; struct { unsigned int needs_root:1; + unsigned int per_cpu_schedule:1; } flags; }; diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 7cae74202a4d..afb2e6f8edd3 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -86,15 +86,35 @@ static int mperf_get_tsc(unsigned long long *tsc) return ret; } +static int get_aperf_mperf(int cpu, unsigned long long *aval, + unsigned long long *mval) +{ + int ret; + + /* + * Running on the cpu from which we read the registers will + * prevent APERF/MPERF from going out of sync because of IPI + * latency introduced by read_msr()s. + */ + if (mperf_monitor.flags.per_cpu_schedule) { + if (bind_cpu(cpu)) + return 1; + } + + ret = read_msr(cpu, MSR_APERF, aval); + ret |= read_msr(cpu, MSR_MPERF, mval); + + return ret; +} + static int mperf_init_stats(unsigned int cpu) { - unsigned long long val; + unsigned long long aval, mval; int ret; - ret = read_msr(cpu, MSR_APERF, &val); - aperf_previous_count[cpu] = val; - ret |= read_msr(cpu, MSR_MPERF, &val); - mperf_previous_count[cpu] = val; + ret = get_aperf_mperf(cpu, &aval, &mval); + aperf_previous_count[cpu] = aval; + mperf_previous_count[cpu] = mval; is_valid[cpu] = !ret; return 0; @@ -102,13 +122,12 @@ static int mperf_init_stats(unsigned int cpu) static int mperf_measure_stats(unsigned int cpu) { - unsigned long long val; + unsigned long long aval, mval; int ret; - ret = read_msr(cpu, MSR_APERF, &val); - aperf_current_count[cpu] = val; - ret |= read_msr(cpu, MSR_MPERF, &val); - mperf_current_count[cpu] = val; + ret = get_aperf_mperf(cpu, &aval, &mval); + aperf_current_count[cpu] = aval; + mperf_current_count[cpu] = mval; is_valid[cpu] = !ret; return 0; @@ -305,6 +324,9 @@ struct cpuidle_monitor *mperf_register(void) if (init_maxfreq_mode()) return NULL; + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) + mperf_monitor.flags.per_cpu_schedule = 1; + /* Free this at program termination */ is_valid = calloc(cpu_count, sizeof(int)); mperf_previous_count = calloc(cpu_count, sizeof(unsigned long long)); -- cgit v1.2.3 From 6af2ed53f0402c09b36d2b38698e18a25ca732a7 Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Tue, 5 Nov 2019 17:16:55 +0000 Subject: cpupower: mperf_monitor: Update cpupower to use the RDPRU instruction AMD Zen 2 introduces the RDPRU instruction which can be used to access some processor registers which are typically only accessible in privilege level 0. ECX specifies the register to read and EDX:EAX will contain the value read. ECX: 0 - Register MPERF 1 - Register APERF This has the added advantage of not having to use the msr module, since the userspace to kernel transitions which occur during each read_msr() might cause APERF and MPERF to go out of sync. Signed-off-by: Janakarajan Natarajan Acked-by: Thomas Renninger Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/cpuid.c | 4 ++++ tools/power/cpupower/utils/helpers/helpers.h | 1 + .../cpupower/utils/idle_monitor/mperf_monitor.c | 20 ++++++++++++++++++++ 3 files changed, 25 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c index 5cc39d4e23ed..73bfafc60e9b 100644 --- a/tools/power/cpupower/utils/helpers/cpuid.c +++ b/tools/power/cpupower/utils/helpers/cpuid.c @@ -131,6 +131,10 @@ out: if (ext_cpuid_level >= 0x80000007 && (cpuid_edx(0x80000007) & (1 << 9))) cpu_info->caps |= CPUPOWER_CAP_AMD_CBP; + + if (ext_cpuid_level >= 0x80000008 && + cpuid_ebx(0x80000008) & (1 << 4)) + cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU; } if (cpu_info->vendor == X86_VENDOR_INTEL) { diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 357b19bb136e..c258eeccd05f 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -69,6 +69,7 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, #define CPUPOWER_CAP_HAS_TURBO_RATIO 0x00000010 #define CPUPOWER_CAP_IS_SNB 0x00000020 #define CPUPOWER_CAP_INTEL_IDA 0x00000040 +#define CPUPOWER_CAP_AMD_RDPRU 0x00000080 #define CPUPOWER_AMD_CPBDIS 0x02000000 diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index afb2e6f8edd3..e7d48cb563c0 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -19,6 +19,10 @@ #define MSR_APERF 0xE8 #define MSR_MPERF 0xE7 +#define RDPRU ".byte 0x0f, 0x01, 0xfd" +#define RDPRU_ECX_MPERF 0 +#define RDPRU_ECX_APERF 1 + #define MSR_TSC 0x10 #define MSR_AMD_HWCR 0xc0010015 @@ -89,6 +93,8 @@ static int mperf_get_tsc(unsigned long long *tsc) static int get_aperf_mperf(int cpu, unsigned long long *aval, unsigned long long *mval) { + unsigned long low_a, high_a; + unsigned long low_m, high_m; int ret; /* @@ -101,6 +107,20 @@ static int get_aperf_mperf(int cpu, unsigned long long *aval, return 1; } + if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_RDPRU) { + asm volatile(RDPRU + : "=a" (low_a), "=d" (high_a) + : "c" (RDPRU_ECX_APERF)); + asm volatile(RDPRU + : "=a" (low_m), "=d" (high_m) + : "c" (RDPRU_ECX_MPERF)); + + *aval = ((low_a) | (high_a) << 32); + *mval = ((low_m) | (high_m) << 32); + + return 0; + } + ret = read_msr(cpu, MSR_APERF, aval); ret |= read_msr(cpu, MSR_MPERF, mval); -- cgit v1.2.3 From 4611a4fb0cce3973dce8c9d74e5d6261ffa4210f Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Tue, 5 Nov 2019 17:16:57 +0000 Subject: cpupower: ToDo: Update ToDo with ideas for per_cpu_schedule handling Based on Thomas Renninger's feedback/ideas. Re-structure the code to better handle the per_cpu_schedule mechanism which was introduced when adding support for AMD Zen based processors. Signed-off-by: Janakarajan Natarajan Acked-by: Thomas Renninger Signed-off-by: Shuah Khan --- tools/power/cpupower/ToDo | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/ToDo index 6e8b89f282e6..b196a139a3e4 100644 --- a/tools/power/cpupower/ToDo +++ b/tools/power/cpupower/ToDo @@ -8,3 +8,17 @@ ToDos sorted by priority: - Add another c1e debug idle monitor -> Is by design racy with BIOS, but could be added with a --force option and some "be careful" messages +- Add cpu_start()/cpu_stop() callbacks for monitor + -> This is to move the per_cpu logic from inside the + monitor to outside it. This can be given higher + priority in fork_it. +- Fork as many processes as there are CPUs in case the + per_cpu_schedule flag is set. + -> Bind forked process to each cpu. + -> Execute start measures via the forked processes on + each cpu. + -> Run test executable in a forked process. + -> Execute stop measures via the forked processes on + each cpu. + This would be ideal as it will not introduce noise in the + tested executable. -- cgit v1.2.3